import re from yt_dlp.utils import ( clean_html, join_nonempty, traverse_obj, update_url_query ) import yt_dlp_plugins.extractor.radiko_time as rtime from yt_dlp_plugins.extractor.radiko import _RadikoBaseIE, RadikoTimeFreeIE class _RadikoMobileBaseIE(_RadikoBaseIE): _MOBILE_USER_AGENT = 'Mozilla/5.0 (Linux; Android 10; Pixel 4 XL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Mobile Safari/537.36' def _get_nextjs_data(self, url, video_id): html = self._download_webpage(url, video_id, headers={"User-Agent": self._MOBILE_USER_AGENT}) data = self._search_nextjs_data(html, video_id) return traverse_obj(data, ("props", "pageProps")), data def _get_station_region(self, station): api_url = update_url_query("https://radiko.jp/api/stations/batchGetStations", {"stationId": station}) data = self._download_json(api_url, station, note="Getting station regions") return traverse_obj(data, ("stationList", lambda _, v: v["id"] == station, "prefecturesList"), get_all=False) _extract_chapters = RadikoTimeFreeIE._extract_chapters def _get_programme_meta(self, program, actors=None): if actors is not None: cast = [] for actor_id in program.get("actorIdsList"): cast.append(traverse_obj(actors, ("actorsList", lambda _, v: v["key"] == actor_id, "name"), get_all=None)) else: cast = traverse_obj(program, ( 'performer', {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip})) or None start = traverse_obj(program, ("startAt", "seconds")) end = traverse_obj(program, ("endAt", "seconds")) old_timestring = rtime.RadikoTime.fromtimestamp(start, tz=rtime.JST).timestring() return { **traverse_obj(program, { "id": "id", "title": "title", "series": "rSeasonName", "tags": "tagsList", "thumbnail": "imageUrl", "channel": "stationName", "channel_id": "stationId", "uploader": "stationName", "uploader_id": "stationId", }), "description": clean_html(join_nonempty("summary", "description", from_dict=program, delim="\n")), "cast": cast, "timestamp": start, "release_timestamp": end, "duration": end - start, "_old_archive_ids": [join_nonempty(program.get("stationId"), old_timestring)], } def _extract_episode(self, program, actors=None): meta = self._get_programme_meta(program, actors) station = meta.get("channel_id") start = rtime.RadikoTime.fromtimestamp(meta.get("timestamp"), tz=rtime.JST) end = rtime.RadikoTime.fromtimestamp(meta.get("release_timestamp"), tz=rtime.JST) chapters = self._extract_chapters(station, start, end, video_id=meta["id"]) area = self._get_station_region(station)[0] auth_data = self._auth(area) formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end) return { **meta, "chapters": chapters, "formats": formats, "live_status": "was_live", "container": "m4a_dash", # force fixup, AAC-only HLS } class RadikoMobileEventIE(_RadikoMobileBaseIE): _VALID_URL = r"https?://(?:www\.)?radiko\.jp/mobile/events/(?P[0-9-]+$)" _TESTS = [{ "url": "https://radiko.jp/mobile/events/10282949", "info_dict": { "live_status": "was_live", "ext": "m4a", "id": "10282949", "_old_archive_ids": ["INT-20240802230000"], "title": "TOKYO MOON", "series": "Tokyo Moon", "description": "md5:20e68d2f400a391fa34d4e7c8c702cb8", "chapters": "count:15", "thumbnail": "https://program-static.cf.radiko.jp/ehwtw6mcvy.jpg", "upload_date": "20240802", "timestamp": 1722607200.0, "release_date": "20240802", "release_timestamp": 1722610800.0, "duration": 3600, "channel": "interfm", "channel_id": "INT", "uploader": "interfm", "uploader_id": "INT", "cast": ["松浦俊夫"], "tags": ["松浦俊夫"], }, }] def _real_extract(self, url): event_id = self._match_id(url) pageProps, data = self._get_nextjs_data(url, event_id) return self._extract_episode(pageProps.get("program"), pageProps.get("actors")) class RadikoMobileSeasonsIE(_RadikoMobileBaseIE): _VALID_URL = r"https?://(?:www\.)?radiko\.jp/(?:mobile/)?r_seasons/(?P\d+$)" def _real_extract(self, url): season_id = self._match_id(url) pageProps, data = self._get_nextjs_data(url, season_id) def entries(): for episode in pageProps.get("pastPrograms"): yield self._extract_episode(episode, pageProps.get("actors")) return self.playlist_result(entries(), playlist_id=season_id, playlist_count=len(pageProps.get("pastPrograms")))