diff options
Diffstat (limited to 'yt_dlp_plugins/extractor/radiko.py')
-rw-r--r-- | yt_dlp_plugins/extractor/radiko.py | 272 |
1 files changed, 137 insertions, 135 deletions
diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py index d12f203..2996290 100644 --- a/yt_dlp_plugins/extractor/radiko.py +++ b/yt_dlp_plugins/extractor/radiko.py @@ -6,18 +6,22 @@ import urllib.parse import pkgutil from yt_dlp.extractor.common import InfoExtractor +from yt_dlp.networking.exceptions import HTTPError from yt_dlp.utils import ( + ExtractorError, OnDemandPagedList, clean_html, int_or_none, join_nonempty, parse_qs, traverse_obj, + urlencode_postdata, url_or_none, update_url_query, ) import yt_dlp_plugins.extractor.radiko_time as rtime +import yt_dlp_plugins.extractor.radiko_hacks as hacks class _RadikoBaseIE(InfoExtractor): @@ -82,7 +86,9 @@ class _RadikoBaseIE(InfoExtractor): _APP_VERSIONS = ["7.5.0", "7.4.17", "7.4.16", "7.4.15", "7.4.14", "7.4.13", "7.4.12", "7.4.11", "7.4.10", "7.4.9", "7.4.8", "7.4.7", "7.4.6", "7.4.5", "7.4.4", "7.4.3", "7.4.2", "7.4.1", "7.4.0", "7.3.8", "7.3.7", "7.3.6", "7.3.1", "7.3.0", "7.2.11", "7.2.10"] _DELIVERED_ONDEMAND = ('radiko.jp',) - _DOESNT_WORK_WITH_FFMPEG = ('tf-f-rpaa-radiko.smartstream.ne.jp', 'si-f-radiko.smartstream.ne.jp') + _DOESNT_WORK_WITH_FFMPEG = ('tf-f-rpaa-radiko.smartstream.ne.jp', 'si-f-radiko.smartstream.ne.jp', 'alliance-stream-radiko.smartstream.ne.jp') + + _has_tf30 = None def _index_regions(self): region_data = {} @@ -178,16 +184,21 @@ class _RadikoBaseIE(InfoExtractor): "X-Radiko-AuthToken": auth_token, }, "user": auth2_headers["X-Radiko-User"], + "has_tf30": self._has_tf30, } if not region_mismatch: self.cache.store("rajiko", station_region, auth_data) return auth_data - def _auth(self, station_region): + def _auth(self, station_region, need_tf30=False): cachedata = self.cache.load("rajiko", station_region) self.write_debug(cachedata) if cachedata is not None: + if need_tf30 and not cachedata.get("has_tf30"): + self.write_debug("Cached token doesn't have timefree 30, getting a new one") + return self._negotiate_token(station_region) + auth_headers = cachedata.get("token") response = self._download_webpage("https://radiko.jp/v2/api/auth_check", station_region, "Checking cached token", headers=auth_headers, expected_status=401) @@ -205,6 +216,17 @@ class _RadikoBaseIE(InfoExtractor): station = region.find(f'.//station/id[.="{station_id}"]/..') # a <station> with an <id> of our station_id station_name = station.find("name").text station_url = url_or_none(station.find("href").text) + + thumbnails = [] + for logo in station.findall("logo"): + thumbnails.append({ + "url": logo.text, + **traverse_obj(logo.attrib, ({ + "width": ("width", {int_or_none}), + "height": ("height", {int_or_none}), + })) + }) + meta = { "id": station_id, "title": station_name, @@ -218,7 +240,7 @@ class _RadikoBaseIE(InfoExtractor): "uploader_id": station_id, "uploader_url": station_url, - "thumbnail": url_or_none(station.find("banner").text), + "thumbnails": thumbnails, } self.cache.store("rajiko", station_id, { "expiry": (now + datetime.timedelta(days=1)).timestamp(), @@ -229,8 +251,16 @@ class _RadikoBaseIE(InfoExtractor): self.to_screen(f"{station_id}: Using cached station metadata") return cachedata.get("meta") - def _get_station_formats(self, station, timefree, auth_data, start_at=None, end_at=None): - device = self._configuration_arg('device', ['aSmartPhone7a'], casesense=True, ie_key="rajiko")[0] # aSmartPhone7a formats = always happy path + def _get_station_formats(self, station, timefree, auth_data, start_at=None, end_at=None, use_pc_html5=False): + config_device = traverse_obj(self._configuration_arg('device', casesense=True, ie_key="rajiko"), 0) + + if not use_pc_html5: + device = config_device or "aSmartPhone7a" # this device only gives us the on-demand one for timefree + # that's good imo - we just get the one that works, and don't bother with probing the rest as well + else: + device = config_device or "pc_html5" # the on-demand one doesnt work with timefree30 stuff sadly + # so just use pc_html5 which has everything + url_data = self._download_xml(f"https://radiko.jp/v3/station/stream/{device}/{station}.xml", station, note=f"Downloading {device} stream information") @@ -238,6 +268,8 @@ class _RadikoBaseIE(InfoExtractor): formats = [] timefree_int = 1 if timefree else 0 + do_blacklist_streams = not len(self._configuration_arg("no_stream_blacklist", ie_key="rajiko")) > 0 + do_as_live_chunks = not len(self._configuration_arg("no_as_live_chunks", ie_key="rajiko")) > 0 for element in url_data.findall(f".//url[@timefree='{timefree_int}'][@areafree='0']/playlist_create_url"): # find <url>s with matching timefree and no areafree, then get their <playlist_create_url> url = element.text @@ -249,7 +281,7 @@ class _RadikoBaseIE(InfoExtractor): "station_id": station, "l": "15", # l = length, ie how many seconds in the live m3u8 (max 300) "lsid": auth_data["user"], - "type": "b", # it is a mystery + "type": "b", # a/b = in-region, c = areafree }) if timefree: @@ -268,7 +300,7 @@ class _RadikoBaseIE(InfoExtractor): preference = -1 entry_protocol = 'm3u8' - if domain in self._DOESNT_WORK_WITH_FFMPEG: + if domain in self._DOESNT_WORK_WITH_FFMPEG and do_blacklist_streams: self.write_debug(f"skipping {domain} (known not working)") continue if domain in self._DELIVERED_ONDEMAND: @@ -277,10 +309,30 @@ class _RadikoBaseIE(InfoExtractor): preference = 1 entry_protocol = None - formats += self._extract_m3u8_formats( - playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_data["token"], - live=delivered_live, preference=preference, entry_protocol=entry_protocol, - note=f"Downloading m3u8 information from {domain}") + auth_headers = auth_data["token"] + + if delivered_live and timefree and do_as_live_chunks: + + chunks_playlist = hacks._generate_as_live_playlist( + self, playlist_url, start_at, end_at, domain, auth_headers + ) + + formats.append({ + "format_id": join_nonempty(domain, "chunked"), + "hls_media_playlist_data": chunks_playlist, + "preference": preference, + "ext": "m4a", + + # fallback to live for ffmpeg etc + "url": playlist_url, + "http_headers": auth_headers, + }) + else: + + formats += self._extract_m3u8_formats( + playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_headers, + live=delivered_live, preference=preference, entry_protocol=entry_protocol, + note=f"Downloading m3u8 information from {domain}") return formats @@ -299,7 +351,7 @@ class RadikoLiveIE(_RadikoBaseIE): "id": "FMT", "title": "re:^TOKYO FM.+$", "alt_title": "TOKYO FM", - "thumbnail": "https://radiko.jp/res/banner/FMT/20220512162447.jpg", + "thumbnail": "https://radiko.jp/v2/static/station/logo/FMT/lrtrim/688x160.png", "channel": "TOKYO FM", "channel_id": "FMT", @@ -319,7 +371,7 @@ class RadikoLiveIE(_RadikoBaseIE): "id": "NORTHWAVE", "title": "re:^FM NORTH WAVE.+$", "alt_title": "FM NORTH WAVE", - "thumbnail": "https://radiko.jp/res/banner/NORTHWAVE/20150731161543.png", + "thumbnail": "https://radiko.jp/v2/static/station/logo/NORTHWAVE/lrtrim/688x160.png", "uploader": "FM NORTH WAVE", "uploader_url": "https://www.fmnorth.co.jp/", @@ -340,7 +392,7 @@ class RadikoLiveIE(_RadikoBaseIE): "id": "RN1", "title": "re:^ラジオNIKKEI第1.+$", "alt_title": "RADIONIKKEI", - "thumbnail": "https://radiko.jp/res/banner/RN1/20120802154152.png", + "thumbnail": "https://radiko.jp/v2/static/station/logo/RN1/lrtrim/688x160.png", "channel": "ラジオNIKKEI第1", "channel_url": "http://www.radionikkei.jp/", @@ -357,7 +409,7 @@ class RadikoLiveIE(_RadikoBaseIE): region = self._get_station_region(station) station_meta = self._get_station_meta(region, station) auth_data = self._auth(region) - formats = self._get_station_formats(station, False, auth_data) + formats = self._get_station_formats(station, False, auth_data, use_pc_html5=True) return { "is_live": True, @@ -368,71 +420,36 @@ class RadikoLiveIE(_RadikoBaseIE): class RadikoTimeFreeIE(_RadikoBaseIE): + _NETRC_MACHINE = "rajiko" _VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-_]+)/(?P<id>\d+)" - _TESTS = [{ - "url": "https://radiko.jp/#!/ts/INT/20240809230000", - "info_dict": { - "live_status": "was_live", - "ext": "m4a", - "id": "INT-20240809230000", - - "title": "TOKYO MOON", - "series": "Tokyo Moon", - "description": "md5:20e68d2f400a391fa34d4e7c8c702cb8", - "chapters": "count:14", - "thumbnail": "https://program-static.cf.radiko.jp/ehwtw6mcvy.jpg", - - "upload_date": "20240809", - "timestamp": 1723212000.0, - "release_date": "20240809", - "release_timestamp": 1723215600.0, - "duration": 3600, - - "channel": "interfm", - "channel_id": "INT", - "channel_url": "https://www.interfm.co.jp/", - "uploader": "interfm", - "uploader_id": "INT", - "uploader_url": "https://www.interfm.co.jp/", - - "cast": ["松浦\u3000俊夫"], - "tags": ["松浦俊夫"], - }, - }, { - # late-night/early-morning show to test broadcast day checking - "url": "https://radiko.jp/#!/ts/TBS/20240810033000", - "info_dict": { - "live_status": "was_live", - "ext": "m4a", - "id": "TBS-20240810033000", - - "title": "CITY CHILL CLUB", - "series": "CITY CHILL CLUB", - "description": "md5:3fba2c1125059bed27247c0be90e58fa", - "chapters": "count:22", - "thumbnail": "https://program-static.cf.radiko.jp/ku7t4ztnaq.jpg", - - "upload_date": "20240809", - "timestamp": 1723228200.0, - "release_date": "20240809", - "release_timestamp": 1723233600.0, - "duration": 5400, - - "channel": "TBSラジオ", - "channel_url": "https://www.tbsradio.jp/", - "channel_id": "TBS", - "uploader": "TBSラジオ", - "uploader_url": "https://www.tbsradio.jp/", - "uploader_id": "TBS", - - "tags": ["CCC905", "音楽との出会いが楽しめる", "人気アーティストトーク", "音楽プロデューサー出演", "ドライブ中におすすめ", "寝る前におすすめ", "学生におすすめ"], - "cast": ["PES"], - }, - }] + # TESTS use a custom-ish script that updates the airdates automatically, see misc/test_extractors.py + + def _perform_login(self, username, password): + try: + login_info = self._download_json('https://radiko.jp/ap/member/webapi/member/login', None, note='Logging in', + data=urlencode_postdata({'mail': username, 'pass': password})) + self._has_tf30 = '2' in login_info.get('privileges') + # areafree = 1, timefree30 = 2, double plan = both + self.write_debug({**login_info, "radiko_session": "PRIVATE", "member_ukey": "PRIVATE"}) + except ExtractorError as error: + if isinstance(error.cause, HTTPError) and error.cause.status == 401: + raise ExtractorError('Invalid username and/or password', expected=True) + raise + + def _check_tf30(self): + if self._has_tf30 is not None: + return self._has_tf30 + if self._get_cookies('https://radiko.jp').get('radiko_session') is None: + return + account_info = self._download_json('https://radiko.jp/ap/member/webapi/v2/member/login/check', + None, note='Checking account status from cookies', expected_status=400) + self.write_debug({**account_info, "user_key": "PRIVATE"}) + self._has_tf30 = account_info.get('timefreeplus') == '1' + return self._has_tf30 def _get_programme_meta(self, station_id, url_time): day = url_time.broadcast_day_string() - meta = self._download_json(f"https://radiko.jp/v4/program/station/date/{day}/{station_id}.json", station_id, + meta = self._download_json(f"https://api.radiko.jp/program/v4/date/{day}/station/{station_id}.json", station_id, note="Downloading programme data") programmes = traverse_obj(meta, ("stations", lambda _, v: v["station_id"] == station_id, "programs", "program"), get_all=False) @@ -467,10 +484,12 @@ class RadikoTimeFreeIE(_RadikoBaseIE): "start_time_gte": start.isoformat(), "end_time_lt": end.isoformat(), }) - data = self._download_json(api_url, video_id, note="Downloading tracklist").get("data") + data_json = self._download_json( + api_url, video_id, note="Downloading tracklist", errnote="Downloading tracklist", fatal=False + ) chapters = [] - for track in data: + for track in traverse_obj(data_json, "data") or []: artist = traverse_obj(track, ("artist", "name")) or track.get("artist_name") chapters.append({ "title": join_nonempty(artist, track.get("title"), delim=" - "), @@ -492,9 +511,13 @@ class RadikoTimeFreeIE(_RadikoBaseIE): start = times[0] end = times[1] now = datetime.datetime.now(tz=rtime.JST) + expiry_free, expiry_tf30 = end.expiry() - if end.expiry(False) < now: + if expiry_tf30 < now: self.raise_no_formats("Programme is no longer available.", video_id=meta["id"], expected=True) + need_tf30 = expiry_free < now + if need_tf30 and not self._check_tf30(): + self.raise_login_required("Programme is only available with a Timefree 30 subscription") elif start > now: self.raise_no_formats("Programme has not aired yet.", video_id=meta["id"], expected=True) live_status = "is_upcoming" @@ -505,12 +528,14 @@ class RadikoTimeFreeIE(_RadikoBaseIE): region = self._get_station_region(station) station_meta = self._get_station_meta(region, station) chapters = self._extract_chapters(station, start, end, video_id=meta["id"]) - auth_data = self._auth(region) - formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end) + auth_data = self._auth(region, need_tf30=need_tf30) + formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end, use_pc_html5=need_tf30) return { **station_meta, - "alt_title": None, + "alt_title": None, # override from station metadata + "thumbnails": None, + **meta, "chapters": chapters, "formats": formats, @@ -548,22 +573,37 @@ class RadikoSearchIE(InfoExtractor): }] def _strip_date(self, date): + # lazy way of making a timestring (from eg 2025-05-20 01:00:00) return date.replace(" ", "").replace("-", "").replace(":", "") def _pagefunc(self, url, idx): url = update_url_query(url, {"page_idx": idx}) data = self._download_json(url, None, note=f"Downloading page {idx+1}") - return [self.url_result("https://radiko.jp/#!/ts/{station}/{time}".format( - station = i.get("station_id"), time = self._strip_date(i.get("start_time")))) - for i in data.get("data")] + results = [] + for r in data.get("data"): + station = r.get("station_id") + timestring = self._strip_date(r.get("start_time")) + + results.append( + self.url_result( + f"https://radiko.jp/#!/ts/{station}/{timestring}", + id=join_nonempty(station, timestring) + ) + ) + return results def _real_extract(self, url): url = url.replace("/#!/", "/!/", 1) # urllib.parse interprets the path as just one giant fragment because of the #, so we hack it away queries = parse_qs(url) - search_url = update_url_query("https://radiko.jp/v3/api/program/search", { + if queries.get("cul_area_id"): + queries["cur_area_id"] = queries.pop("cul_area_id") + # site used to use "cul_area_id" in the search url, now it uses "cur_area_id" (with an r) + # and outright rejects the old one with HTTP Error 415: Unsupported Media Type + + search_url = update_url_query("https://api.annex-cf.radiko.jp/v1/programs/legacy/perl/program/search", { **queries, "uid": "".join(random.choices("0123456789abcdef", k=32)), "app_id": "pc", @@ -588,44 +628,16 @@ class RadikoSearchIE(InfoExtractor): class RadikoShareIE(InfoExtractor): _VALID_URL = r"https?://(?:www\.)?radiko\.jp/share/" - _TESTS = [{ - # 29-hour time -> 24-hour time - "url": "http://radiko.jp/share/?sid=FMT&t=20240802240000", - "info_dict": { - "live_status": "was_live", - "ext": "m4a", - "id": "FMT-20240803000000", # the time given (24:00) works out to 00:00 the next day - - "title": "JET STREAM", - "series": "JET STREAM", - "description": "md5:c1a2172036ebb7a54eeafb47e0a08a50", - "chapters": "count:9", - "thumbnail": "https://program-static.cf.radiko.jp/greinlrspi.jpg", - - "upload_date": "20240802", - "timestamp": 1722610800.0, - "release_date": "20240802", - "release_timestamp": 1722614100.0, - "duration": 3300, - - "channel": "TOKYO FM", - "channel_id": "FMT", - "channel_url": "https://www.tfm.co.jp/", - "uploader": "TOKYO FM", - "uploader_id": "FMT", - "uploader_url": "https://www.tfm.co.jp/", - - "cast": ["福山雅治"], - "tags": ["福山雅治", "夜間飛行", "音楽との出会いが楽しめる", "朗読を楽しめる", "寝る前に聴きたい"], - } - }] def _real_extract(self, url): queries = parse_qs(url) station = traverse_obj(queries, ("sid", 0)) time = traverse_obj(queries, ("t", 0)) time = rtime.RadikoShareTime(time).timestring() - return self.url_result(f"https://radiko.jp/#!/ts/{station}/{time}", RadikoTimeFreeIE) + return self.url_result( + f"https://radiko.jp/#!/ts/{station}/{time}", RadikoTimeFreeIE, + id=join_nonempty(station, time) + ) class RadikoStationButtonIE(InfoExtractor): @@ -638,19 +650,9 @@ class RadikoStationButtonIE(InfoExtractor): "info_dict": { "ext": "m4a", 'live_status': 'is_live', - "id": "QRR", - "title": "re:^文化放送.+$", - 'alt_title': 'JOQR BUNKA HOSO', - 'thumbnail': 'https://radiko.jp/res/banner/QRR/20240423144553.png', - 'channel': '文化放送', - 'channel_id': 'QRR', - 'channel_url': 'http://www.joqr.co.jp/', - 'uploader': '文化放送', - 'uploader_id': 'QRR', - 'uploader_url': 'http://www.joqr.co.jp/', - - } + }, + 'only_matching': True, }] _WEBPAGE_TESTS = [{ @@ -661,7 +663,7 @@ class RadikoStationButtonIE(InfoExtractor): 'id': 'CCL', "title": "re:^FM COCOLO.+$", 'alt_title': 'FM COCOLO', - 'thumbnail': 'https://radiko.jp/res/banner/CCL/20161014144826.png', + 'thumbnail': 'https://radiko.jp/v2/static/station/logo/CCL/lrtrim/688x160.png', 'channel': 'FM COCOLO', 'channel_id': 'CCL', @@ -690,7 +692,7 @@ class RadikoPersonIE(InfoExtractor): },{ "url": "https://radiko.jp/persons/11421", "params": {'extractor_args': {'rajiko': {'key_station_only': ['']}}}, - "playlist_count": 1, + "playlist_mincount": 1, "info_dict": { "id": "person-11421", }, @@ -701,9 +703,9 @@ class RadikoPersonIE(InfoExtractor): now = rtime.RadikoTime.now(tz=rtime.JST) - min_start = rtime.earliest_available(False) - # we set the earliest time as the earliest we can get, - # so, the start of the broadcast day 1 week ago + min_start = (now - datetime.timedelta(days=30)).broadcast_day_start() + # we set the earliest time as the earliest we can get (or at least, that it's possible to get), + # so, the start of the broadcast day 30 days ago # that way we can get everything we can actually download, including stuff that aired at eg "26:00" person_api_url = update_url_query("https://api.radiko.jp/program/api/v1/programs", { |