aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/yt_dlp_plugins/extractor/radiko.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp_plugins/extractor/radiko.py')
-rw-r--r--yt_dlp_plugins/extractor/radiko.py106
1 files changed, 83 insertions, 23 deletions
diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py
index 2996290..ad5c77f 100644
--- a/yt_dlp_plugins/extractor/radiko.py
+++ b/yt_dlp_plugins/extractor/radiko.py
@@ -87,13 +87,14 @@ class _RadikoBaseIE(InfoExtractor):
_DELIVERED_ONDEMAND = ('radiko.jp',)
_DOESNT_WORK_WITH_FFMPEG = ('tf-f-rpaa-radiko.smartstream.ne.jp', 'si-f-radiko.smartstream.ne.jp', 'alliance-stream-radiko.smartstream.ne.jp')
+ _AD_INSERTION = ('si-f-radiko.smartstream.ne.jp', )
_has_tf30 = None
def _index_regions(self):
region_data = {}
- tree = self._download_xml("https://radiko.jp/v3/station/region/full.xml", None, note="Indexing regions")
+ tree = self._download_xml("https://radiko.jp/v3/station/region/full.xml", None, note="Indexing station regions")
for stations in tree:
for station in stations:
area = station.find("area_id").text
@@ -255,8 +256,7 @@ class _RadikoBaseIE(InfoExtractor):
config_device = traverse_obj(self._configuration_arg('device', casesense=True, ie_key="rajiko"), 0)
if not use_pc_html5:
- device = config_device or "aSmartPhone7a" # this device only gives us the on-demand one for timefree
- # that's good imo - we just get the one that works, and don't bother with probing the rest as well
+ device = config_device or "aSmartPhone7a" # still has the radiko.jp on-demand one for timefree
else:
device = config_device or "pc_html5" # the on-demand one doesnt work with timefree30 stuff sadly
# so just use pc_html5 which has everything
@@ -272,6 +272,7 @@ class _RadikoBaseIE(InfoExtractor):
do_as_live_chunks = not len(self._configuration_arg("no_as_live_chunks", ie_key="rajiko")) > 0
for element in url_data.findall(f".//url[@timefree='{timefree_int}'][@areafree='0']/playlist_create_url"):
# find <url>s with matching timefree and no areafree, then get their <playlist_create_url>
+ # we don't want areafree here because we should always be in-region
url = element.text
if url in seen_urls: # there are always dupes, even with ^ specific filtering
continue
@@ -299,15 +300,22 @@ class _RadikoBaseIE(InfoExtractor):
delivered_live = True
preference = -1
entry_protocol = 'm3u8'
+ format_note=[]
- if domain in self._DOESNT_WORK_WITH_FFMPEG and do_blacklist_streams:
+ if timefree and domain in self._DOESNT_WORK_WITH_FFMPEG and do_blacklist_streams:
+ # TODO: remove this completely
+ # https://github.com/garret1317/yt-dlp-rajiko/issues/29
self.write_debug(f"skipping {domain} (known not working)")
continue
if domain in self._DELIVERED_ONDEMAND:
# override the defaults for delivered as on-demand
delivered_live = False
- preference = 1
+ preference += 2
entry_protocol = None
+ if domain in self._AD_INSERTION:
+ preference -= 3
+ format_note.append("Ad insertion")
+
auth_headers = auth_data["token"]
@@ -317,22 +325,31 @@ class _RadikoBaseIE(InfoExtractor):
self, playlist_url, start_at, end_at, domain, auth_headers
)
- formats.append({
+ m3u8_formats = [{
"format_id": join_nonempty(domain, "chunked"),
"hls_media_playlist_data": chunks_playlist,
"preference": preference,
"ext": "m4a",
+ "vcodec": "none",
# fallback to live for ffmpeg etc
"url": playlist_url,
"http_headers": auth_headers,
- })
+ }]
+ format_note.append("Chunked")
else:
- formats += self._extract_m3u8_formats(
+ m3u8_formats = self._extract_m3u8_formats(
playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_headers,
live=delivered_live, preference=preference, entry_protocol=entry_protocol,
note=f"Downloading m3u8 information from {domain}")
+
+ for f in m3u8_formats:
+ # ffmpeg sends a Range header which some streams reject. here we disable that (and also some icecast header as well)
+ f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']}
+ f['format_note'] = ", ".join(format_note)
+ formats.append(f)
+
return formats
@@ -422,7 +439,7 @@ class RadikoLiveIE(_RadikoBaseIE):
class RadikoTimeFreeIE(_RadikoBaseIE):
_NETRC_MACHINE = "rajiko"
_VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-_]+)/(?P<id>\d+)"
- # TESTS use a custom-ish script that updates the airdates automatically, see misc/test_extractors.py
+ # TESTS use a custom-ish script that updates the airdates automatically, see contrib/test_extractors.py
def _perform_login(self, username, password):
try:
@@ -527,9 +544,13 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
region = self._get_station_region(station)
station_meta = self._get_station_meta(region, station)
- chapters = self._extract_chapters(station, start, end, video_id=meta["id"])
- auth_data = self._auth(region, need_tf30=need_tf30)
- formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end, use_pc_html5=need_tf30)
+ if live_status == "was_live":
+ chapters = self._extract_chapters(station, start, end, video_id=meta["id"])
+ auth_data = self._auth(region, need_tf30=need_tf30)
+ formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end, use_pc_html5=need_tf30)
+ else:
+ chapters = None
+ formats = None
return {
**station_meta,
@@ -718,19 +739,58 @@ class RadikoPersonIE(InfoExtractor):
def entries():
key_station_only = len(self._configuration_arg("key_station_only", ie_key="rajiko")) > 0
for episode in person_api.get("data"):
- if key_station_only and episode.get("key_station_id") != episode.get("station_id"):
- continue
- share_url = traverse_obj(episode, ("radiko_url", ("pc", "sp", "android", "ios", "app"),
- {url_or_none}), get_all=False)
- # they're all identical share links at the moment (5th aug 2024) but they might not be in the future
+ station = episode.get("station_id")
+ if key_station_only and episode.get("key_station_id") != station:
+ continue
- # predictions:
- # pc will probably stay the same
- # don't know what sp is, possibly "SmartPhone"?, anyway seems reasonably generic
- # android is easier for me to reverse-engineer than ios (no ithing)
- # i assume "app" would be some internal tell-it-to-do-something link, not a regular web link
+ start = episode.get("start_at")
+ timestring = rtime.RadikoTime.fromisoformat(start).timestring()
- yield self.url_result(share_url, ie=RadikoShareIE, video_title=episode.get("title"))
+ timefree_id = join_nonempty(station, timestring)
+ timefree_url = f"https://radiko.jp/#!/ts/{station}/{timestring}"
+ yield self.url_result(timefree_url, ie=RadikoTimeFreeIE, video_id=timefree_id)
return self.playlist_result(entries(), playlist_id=join_nonempty("person", person_id))
+
+
+class RadikoRSeasonsIE(InfoExtractor):
+ _VALID_URL = r"https?://(?:www\.)?radiko\.jp/(?:mobile/)?r_seasons/(?P<id>\d+$)"
+ _TESTS = [{
+ "url": "https://radiko.jp/r_seasons/10012302",
+ "playlist_mincount": 4,
+ "info_dict": {
+ "id": '10012302',
+ "title": '山下達郎の楽天カード サンデー・ソングブック',
+ }
+ }, {
+ "url": "https://radiko.jp/r_seasons/10002831",
+ "playlist_mincount": 4,
+ "info_dict": {
+ "id": "10002831",
+ "title": "Tokyo Moon",
+ }
+ }]
+
+ def _real_extract(self, url):
+ season_id = self._match_id(url)
+ html = self._download_webpage(url, season_id)
+ pageProps = self._search_nextjs_data(html, season_id)["props"]["pageProps"]
+ season_id = traverse_obj(pageProps, ("rSeason", "id")) or season_id
+
+ def entries():
+ for episode in pageProps.get("pastPrograms"):
+ station = traverse_obj(episode, ("stationId"))
+ start = traverse_obj(episode, ("startAt", "seconds"))
+ timestring = rtime.RadikoTime.fromtimestamp(start, tz=rtime.JST).timestring()
+
+ timefree_id = join_nonempty(station, timestring)
+ timefree_url = f"https://radiko.jp/#!/ts/{station}/{timestring}"
+
+ yield self.url_result(timefree_url, ie=RadikoTimeFreeIE, video_id=timefree_id)
+
+ return self.playlist_result(
+ entries(),
+ playlist_id=season_id,
+ playlist_title=traverse_obj(pageProps, ("rSeason", "rSeasonName")),
+ )