From e886d693df1f6676601e119a7408db864d7eed43 Mon Sep 17 00:00:00 2001 From: garret Date: Tue, 6 Aug 2024 08:36:15 +0100 Subject: add support for episodes from mobile site auth is a bit hmm at the minute, i haven't implemented the multi-area process yet but, it works! a --- yt_dlp_plugins/extractor/radiko_mobile.py | 84 +++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 yt_dlp_plugins/extractor/radiko_mobile.py (limited to 'yt_dlp_plugins') diff --git a/yt_dlp_plugins/extractor/radiko_mobile.py b/yt_dlp_plugins/extractor/radiko_mobile.py new file mode 100644 index 0000000..d3793d0 --- /dev/null +++ b/yt_dlp_plugins/extractor/radiko_mobile.py @@ -0,0 +1,84 @@ +import re + +from yt_dlp.utils import ( + clean_html, + join_nonempty, + traverse_obj, + update_url_query +) +import yt_dlp_plugins.extractor.radiko_time as rtime +from yt_dlp_plugins.extractor.radiko import _RadikoBaseIE, RadikoTimeFreeIE + + +class _RadikoMobileBaseIE(_RadikoBaseIE): + _MOBILE_USER_AGENT = 'Mozilla/5.0 (Linux; Android 10; Pixel 4 XL) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Mobile Safari/537.36' + + def _get_nextjs_data(self, url, video_id): + html = self._download_webpage(url, video_id, headers={"User-Agent": self._MOBILE_USER_AGENT}) + data = self._search_nextjs_data(html, video_id) + return traverse_obj(data, ("props", "pageProps")), data + + def _get_station_region(self, station): + api_url = update_url_query("https://radiko.jp/api/stations/batchGetStations", {"stationId": station}) + data = self._download_json(api_url, station, note="Getting station regions") + return traverse_obj(data, ("stationList", lambda _, v: v["id"] == station, "prefecturesList"), get_all=False) + + _extract_chapters = RadikoTimeFreeIE._extract_chapters + + +class RadikoMobileEventIE(_RadikoMobileBaseIE): + _VALID_URL = r"https?://(?:www\.)?radiko\.jp/mobile/events/(?P[0-9-]+$)" + + def _get_programme_meta(self, program, actors=None): + if actors is not None: + cast = [] + for actor_id in program.get("actorIdsList"): + cast.append(traverse_obj(actors, ("actorsList", lambda _, v: v["key"] == actor_id, "name"), get_all=None)) + else: + cast = traverse_obj(program, ( + 'performer', {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip})) or None + + start = traverse_obj(program, ("startAt", "seconds")) + end = traverse_obj(program, ("endAt", "seconds")) + + return { + **traverse_obj(program, { + "id": "id", + "title": "title", + "series": "rSeasonName", + "tags": "tagsList", + "thumbnail": "imageUrl", + + "channel": "stationName", + "channel_id": "stationId", + "uploader": "stationName", + "uploader_id": "stationId", + }), + "description": clean_html(join_nonempty("summary", "description", from_dict=program, delim="\n")), + "cast": cast, + "timestamp": start, + "release_timestamp": end, + "duration": end - start, + } + + def _real_extract(self, url): + event_id = self._match_id(url) + pageProps, _ = self._get_nextjs_data(url, event_id) + + meta = self._get_programme_meta(pageProps.get("program"), pageProps.get("actors")) + station = meta.get("channel_id") + start = rtime.RadikoTime.fromtimestamp(meta.get("timestamp"), tz=rtime.JST) + end = rtime.RadikoTime.fromtimestamp(meta.get("release_timestamp"), tz=rtime.JST) + + chapters = self._extract_chapters(station, start, end, video_id=meta["id"]) + area = self._get_station_region(station)[0] + auth_data = self._auth(area) + formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end) + + return { + **meta, + "chapters": chapters, + "formats": formats, + "live_status": "was_live", + "container": "m4a_dash", # force fixup, AAC-only HLS + } -- cgit v1.2.3-70-g09d2