From 794ec2b5256a8219a0e50f3e14165f0914189db4 Mon Sep 17 00:00:00 2001 From: garret1317 Date: Thu, 10 Jul 2025 22:12:28 +0100 Subject: Add basic radiko podcast extractors --- yt_dlp_plugins/extractor/radiko_podcast.py | 114 +++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 yt_dlp_plugins/extractor/radiko_podcast.py (limited to 'yt_dlp_plugins/extractor/radiko_podcast.py') diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py new file mode 100644 index 0000000..93e1408 --- /dev/null +++ b/yt_dlp_plugins/extractor/radiko_podcast.py @@ -0,0 +1,114 @@ +from yt_dlp.extractor.common import InfoExtractor +from yt_dlp.utils import ( + clean_html, + traverse_obj, + url_or_none, + str_or_none, +) + +# nice simple one for a change... +# the app uses a similar system to regular programmes, thankfully the site doesn't +# but it does need protobufs to get more than 20 items... + +class _RadikoPodcastBaseIE(InfoExtractor): + + def _extract_episode(self, episode_info): + return { + **traverse_obj(episode_info, { + "id": ("id", {str_or_none}), + "url": ("audio", "url"), + "duration": ("audio", "durationSec"), + + "title": "title", + "description": ("description", {clean_html}), + "timestamp": ("startAt", "seconds"), + + "series": "channelTitle", + "series_id": "channelId", + "channel": "stationName", + "uploader": "stationName", + }), + "thumbnail": traverse_obj(episode_info, ("imageUrl", {url_or_none})) + or traverse_obj(episode_info, ("channelImageUrl", {url_or_none})), + + # so that --download-archive still works if you download from the playlist page + "webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=episode_info.get("id")), + 'extractor_key': RadikoPodcastEpisodeIE.ie_key(), + 'extractor': 'RadikoPodcastEpisode', + } + + +class RadikoPodcastEpisodeIE(_RadikoPodcastBaseIE): + _VALID_URL = r"https?://radiko\.jp/podcast/episodes/(?P[a-f0-9-]+)" + + _TESTS = [{ + "url": "https://radiko.jp/podcast/episodes/cc8cf709-a50b-4846-aa0e-91ab10cf8bff", + "info_dict": { + "id": "cc8cf709-a50b-4846-aa0e-91ab10cf8bff", + "ext": "mp3", + 'title': '2025.6.26 おしゃべり技術くん', + 'description': 'md5:1c4048025f68d6da053dd879a5d62304', + 'duration': 717, + 'thumbnail': 'https://podcast-static.cf.radiko.jp/09f27a48-ae04-4ce7-a024-572460e46eb7-20240214160012.png', + 'series': 'おしゃべり技術くん', + 'series_id': '09f27a48-ae04-4ce7-a024-572460e46eb7', + 'timestamp': 1751554800, + 'upload_date': '20250703', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"] + + episode_info = next_data["podcastEpisode"] + + return self._extract_episode(episode_info) + + +class RadikoPodcastChannelIE(_RadikoPodcastBaseIE): + _VALID_URL = r"https?://radiko\.jp/podcast/channels/(?P[a-f0-9-]+)" + + _TESTS = [{ + "url": "https://radiko.jp/podcast/channels/09f27a48-ae04-4ce7-a024-572460e46eb7", + "info_dict": { + "id": "09f27a48-ae04-4ce7-a024-572460e46eb7" + }, + 'playlist_mincount': 20, + 'expected_warnings': ['Currently this extractor can only extract the latest 20 episodes'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"] + + channel_info = next_data["podcastChannel"] + episode_list_response = next_data["listPodcastEpisodesResponse"] + + + def entries(): + for episode in episode_list_response["episodesList"]: + yield self._extract_episode(episode) + + if traverse_obj(episode_list_response, "hasNextPage"): + self.report_warning(f'Currently this extractor can only extract the latest {len(episode_list_response["episodesList"])} episodes') + + # TODO: GRPC/protobuf stuff to get the next page + # https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes + # see さらに表示 button on site + + + return { + "_type": "playlist", + "id": video_id, + **traverse_obj(channel_info, { + "playlist_title": "title", + "playlist_id": "id", + "playlist_description": ("description", {clean_html}), + "playlist_thumbnail": ("imageUrl", {url_or_none}), + + }), + "entries": entries(), + } -- cgit v1.2.3-70-g09d2