diff options
Diffstat (limited to 'yt_dlp_plugins/extractor/radiko.py')
-rw-r--r-- | yt_dlp_plugins/extractor/radiko.py | 106 |
1 files changed, 83 insertions, 23 deletions
diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py index 2996290..ad5c77f 100644 --- a/yt_dlp_plugins/extractor/radiko.py +++ b/yt_dlp_plugins/extractor/radiko.py @@ -87,13 +87,14 @@ class _RadikoBaseIE(InfoExtractor): _DELIVERED_ONDEMAND = ('radiko.jp',) _DOESNT_WORK_WITH_FFMPEG = ('tf-f-rpaa-radiko.smartstream.ne.jp', 'si-f-radiko.smartstream.ne.jp', 'alliance-stream-radiko.smartstream.ne.jp') + _AD_INSERTION = ('si-f-radiko.smartstream.ne.jp', ) _has_tf30 = None def _index_regions(self): region_data = {} - tree = self._download_xml("https://radiko.jp/v3/station/region/full.xml", None, note="Indexing regions") + tree = self._download_xml("https://radiko.jp/v3/station/region/full.xml", None, note="Indexing station regions") for stations in tree: for station in stations: area = station.find("area_id").text @@ -255,8 +256,7 @@ class _RadikoBaseIE(InfoExtractor): config_device = traverse_obj(self._configuration_arg('device', casesense=True, ie_key="rajiko"), 0) if not use_pc_html5: - device = config_device or "aSmartPhone7a" # this device only gives us the on-demand one for timefree - # that's good imo - we just get the one that works, and don't bother with probing the rest as well + device = config_device or "aSmartPhone7a" # still has the radiko.jp on-demand one for timefree else: device = config_device or "pc_html5" # the on-demand one doesnt work with timefree30 stuff sadly # so just use pc_html5 which has everything @@ -272,6 +272,7 @@ class _RadikoBaseIE(InfoExtractor): do_as_live_chunks = not len(self._configuration_arg("no_as_live_chunks", ie_key="rajiko")) > 0 for element in url_data.findall(f".//url[@timefree='{timefree_int}'][@areafree='0']/playlist_create_url"): # find <url>s with matching timefree and no areafree, then get their <playlist_create_url> + # we don't want areafree here because we should always be in-region url = element.text if url in seen_urls: # there are always dupes, even with ^ specific filtering continue @@ -299,15 +300,22 @@ class _RadikoBaseIE(InfoExtractor): delivered_live = True preference = -1 entry_protocol = 'm3u8' + format_note=[] - if domain in self._DOESNT_WORK_WITH_FFMPEG and do_blacklist_streams: + if timefree and domain in self._DOESNT_WORK_WITH_FFMPEG and do_blacklist_streams: + # TODO: remove this completely + # https://github.com/garret1317/yt-dlp-rajiko/issues/29 self.write_debug(f"skipping {domain} (known not working)") continue if domain in self._DELIVERED_ONDEMAND: # override the defaults for delivered as on-demand delivered_live = False - preference = 1 + preference += 2 entry_protocol = None + if domain in self._AD_INSERTION: + preference -= 3 + format_note.append("Ad insertion") + auth_headers = auth_data["token"] @@ -317,22 +325,31 @@ class _RadikoBaseIE(InfoExtractor): self, playlist_url, start_at, end_at, domain, auth_headers ) - formats.append({ + m3u8_formats = [{ "format_id": join_nonempty(domain, "chunked"), "hls_media_playlist_data": chunks_playlist, "preference": preference, "ext": "m4a", + "vcodec": "none", # fallback to live for ffmpeg etc "url": playlist_url, "http_headers": auth_headers, - }) + }] + format_note.append("Chunked") else: - formats += self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_headers, live=delivered_live, preference=preference, entry_protocol=entry_protocol, note=f"Downloading m3u8 information from {domain}") + + for f in m3u8_formats: + # ffmpeg sends a Range header which some streams reject. here we disable that (and also some icecast header as well) + f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']} + f['format_note'] = ", ".join(format_note) + formats.append(f) + return formats @@ -422,7 +439,7 @@ class RadikoLiveIE(_RadikoBaseIE): class RadikoTimeFreeIE(_RadikoBaseIE): _NETRC_MACHINE = "rajiko" _VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-_]+)/(?P<id>\d+)" - # TESTS use a custom-ish script that updates the airdates automatically, see misc/test_extractors.py + # TESTS use a custom-ish script that updates the airdates automatically, see contrib/test_extractors.py def _perform_login(self, username, password): try: @@ -527,9 +544,13 @@ class RadikoTimeFreeIE(_RadikoBaseIE): region = self._get_station_region(station) station_meta = self._get_station_meta(region, station) - chapters = self._extract_chapters(station, start, end, video_id=meta["id"]) - auth_data = self._auth(region, need_tf30=need_tf30) - formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end, use_pc_html5=need_tf30) + if live_status == "was_live": + chapters = self._extract_chapters(station, start, end, video_id=meta["id"]) + auth_data = self._auth(region, need_tf30=need_tf30) + formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end, use_pc_html5=need_tf30) + else: + chapters = None + formats = None return { **station_meta, @@ -718,19 +739,58 @@ class RadikoPersonIE(InfoExtractor): def entries(): key_station_only = len(self._configuration_arg("key_station_only", ie_key="rajiko")) > 0 for episode in person_api.get("data"): - if key_station_only and episode.get("key_station_id") != episode.get("station_id"): - continue - share_url = traverse_obj(episode, ("radiko_url", ("pc", "sp", "android", "ios", "app"), - {url_or_none}), get_all=False) - # they're all identical share links at the moment (5th aug 2024) but they might not be in the future + station = episode.get("station_id") + if key_station_only and episode.get("key_station_id") != station: + continue - # predictions: - # pc will probably stay the same - # don't know what sp is, possibly "SmartPhone"?, anyway seems reasonably generic - # android is easier for me to reverse-engineer than ios (no ithing) - # i assume "app" would be some internal tell-it-to-do-something link, not a regular web link + start = episode.get("start_at") + timestring = rtime.RadikoTime.fromisoformat(start).timestring() - yield self.url_result(share_url, ie=RadikoShareIE, video_title=episode.get("title")) + timefree_id = join_nonempty(station, timestring) + timefree_url = f"https://radiko.jp/#!/ts/{station}/{timestring}" + yield self.url_result(timefree_url, ie=RadikoTimeFreeIE, video_id=timefree_id) return self.playlist_result(entries(), playlist_id=join_nonempty("person", person_id)) + + +class RadikoRSeasonsIE(InfoExtractor): + _VALID_URL = r"https?://(?:www\.)?radiko\.jp/(?:mobile/)?r_seasons/(?P<id>\d+$)" + _TESTS = [{ + "url": "https://radiko.jp/r_seasons/10012302", + "playlist_mincount": 4, + "info_dict": { + "id": '10012302', + "title": '山下達郎の楽天カード サンデー・ソングブック', + } + }, { + "url": "https://radiko.jp/r_seasons/10002831", + "playlist_mincount": 4, + "info_dict": { + "id": "10002831", + "title": "Tokyo Moon", + } + }] + + def _real_extract(self, url): + season_id = self._match_id(url) + html = self._download_webpage(url, season_id) + pageProps = self._search_nextjs_data(html, season_id)["props"]["pageProps"] + season_id = traverse_obj(pageProps, ("rSeason", "id")) or season_id + + def entries(): + for episode in pageProps.get("pastPrograms"): + station = traverse_obj(episode, ("stationId")) + start = traverse_obj(episode, ("startAt", "seconds")) + timestring = rtime.RadikoTime.fromtimestamp(start, tz=rtime.JST).timestring() + + timefree_id = join_nonempty(station, timestring) + timefree_url = f"https://radiko.jp/#!/ts/{station}/{timestring}" + + yield self.url_result(timefree_url, ie=RadikoTimeFreeIE, video_id=timefree_id) + + return self.playlist_result( + entries(), + playlist_id=season_id, + playlist_title=traverse_obj(pageProps, ("rSeason", "rSeasonName")), + ) |