From 794ec2b5256a8219a0e50f3e14165f0914189db4 Mon Sep 17 00:00:00 2001 From: garret1317 Date: Thu, 10 Jul 2025 22:12:28 +0100 Subject: Add basic radiko podcast extractors --- yt_dlp_plugins/extractor/radiko_podcast.py | 114 +++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 yt_dlp_plugins/extractor/radiko_podcast.py (limited to 'yt_dlp_plugins/extractor/radiko_podcast.py') diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py new file mode 100644 index 0000000..93e1408 --- /dev/null +++ b/yt_dlp_plugins/extractor/radiko_podcast.py @@ -0,0 +1,114 @@ +from yt_dlp.extractor.common import InfoExtractor +from yt_dlp.utils import ( + clean_html, + traverse_obj, + url_or_none, + str_or_none, +) + +# nice simple one for a change... +# the app uses a similar system to regular programmes, thankfully the site doesn't +# but it does need protobufs to get more than 20 items... + +class _RadikoPodcastBaseIE(InfoExtractor): + + def _extract_episode(self, episode_info): + return { + **traverse_obj(episode_info, { + "id": ("id", {str_or_none}), + "url": ("audio", "url"), + "duration": ("audio", "durationSec"), + + "title": "title", + "description": ("description", {clean_html}), + "timestamp": ("startAt", "seconds"), + + "series": "channelTitle", + "series_id": "channelId", + "channel": "stationName", + "uploader": "stationName", + }), + "thumbnail": traverse_obj(episode_info, ("imageUrl", {url_or_none})) + or traverse_obj(episode_info, ("channelImageUrl", {url_or_none})), + + # so that --download-archive still works if you download from the playlist page + "webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=episode_info.get("id")), + 'extractor_key': RadikoPodcastEpisodeIE.ie_key(), + 'extractor': 'RadikoPodcastEpisode', + } + + +class RadikoPodcastEpisodeIE(_RadikoPodcastBaseIE): + _VALID_URL = r"https?://radiko\.jp/podcast/episodes/(?P[a-f0-9-]+)" + + _TESTS = [{ + "url": "https://radiko.jp/podcast/episodes/cc8cf709-a50b-4846-aa0e-91ab10cf8bff", + "info_dict": { + "id": "cc8cf709-a50b-4846-aa0e-91ab10cf8bff", + "ext": "mp3", + 'title': '2025.6.26 おしゃべり技術くん', + 'description': 'md5:1c4048025f68d6da053dd879a5d62304', + 'duration': 717, + 'thumbnail': 'https://podcast-static.cf.radiko.jp/09f27a48-ae04-4ce7-a024-572460e46eb7-20240214160012.png', + 'series': 'おしゃべり技術くん', + 'series_id': '09f27a48-ae04-4ce7-a024-572460e46eb7', + 'timestamp': 1751554800, + 'upload_date': '20250703', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"] + + episode_info = next_data["podcastEpisode"] + + return self._extract_episode(episode_info) + + +class RadikoPodcastChannelIE(_RadikoPodcastBaseIE): + _VALID_URL = r"https?://radiko\.jp/podcast/channels/(?P[a-f0-9-]+)" + + _TESTS = [{ + "url": "https://radiko.jp/podcast/channels/09f27a48-ae04-4ce7-a024-572460e46eb7", + "info_dict": { + "id": "09f27a48-ae04-4ce7-a024-572460e46eb7" + }, + 'playlist_mincount': 20, + 'expected_warnings': ['Currently this extractor can only extract the latest 20 episodes'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"] + + channel_info = next_data["podcastChannel"] + episode_list_response = next_data["listPodcastEpisodesResponse"] + + + def entries(): + for episode in episode_list_response["episodesList"]: + yield self._extract_episode(episode) + + if traverse_obj(episode_list_response, "hasNextPage"): + self.report_warning(f'Currently this extractor can only extract the latest {len(episode_list_response["episodesList"])} episodes') + + # TODO: GRPC/protobuf stuff to get the next page + # https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes + # see さらに表示 button on site + + + return { + "_type": "playlist", + "id": video_id, + **traverse_obj(channel_info, { + "playlist_title": "title", + "playlist_id": "id", + "playlist_description": ("description", {clean_html}), + "playlist_thumbnail": ("imageUrl", {url_or_none}), + + }), + "entries": entries(), + } -- cgit v1.2.3-70-g09d2 From fad5614bf07dae50be5850f28e845af7893967dc Mon Sep 17 00:00:00 2001 From: garret1317 Date: Thu, 10 Jul 2025 23:13:39 +0100 Subject: fix channel/uploader field in podcast extractor --- yt_dlp_plugins/extractor/radiko_podcast.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'yt_dlp_plugins/extractor/radiko_podcast.py') diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py index 93e1408..af66f6a 100644 --- a/yt_dlp_plugins/extractor/radiko_podcast.py +++ b/yt_dlp_plugins/extractor/radiko_podcast.py @@ -25,8 +25,8 @@ class _RadikoPodcastBaseIE(InfoExtractor): "series": "channelTitle", "series_id": "channelId", - "channel": "stationName", - "uploader": "stationName", + "channel": "channelStationName", + "uploader": "channelStationName", }), "thumbnail": traverse_obj(episode_info, ("imageUrl", {url_or_none})) or traverse_obj(episode_info, ("channelImageUrl", {url_or_none})), @@ -54,6 +54,8 @@ class RadikoPodcastEpisodeIE(_RadikoPodcastBaseIE): 'series_id': '09f27a48-ae04-4ce7-a024-572460e46eb7', 'timestamp': 1751554800, 'upload_date': '20250703', + 'uploader': 'IBCラジオ', + 'channel': 'IBCラジオ', }, }] -- cgit v1.2.3-70-g09d2 From ac94bad6ed14f32adfeceac35cc60d39680508dd Mon Sep 17 00:00:00 2001 From: garret1317 Date: Mon, 11 Aug 2025 09:22:50 +0100 Subject: Implement multi-page podcasts with protobug needs core change in upstream to work (traverse_obj doesnt work with dataclasses) --- yt_dlp_plugins/extractor/radiko_podcast.py | 39 ++++++++++++++++++------------ 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'yt_dlp_plugins/extractor/radiko_podcast.py') diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py index af66f6a..904bb62 100644 --- a/yt_dlp_plugins/extractor/radiko_podcast.py +++ b/yt_dlp_plugins/extractor/radiko_podcast.py @@ -6,9 +6,12 @@ from yt_dlp.utils import ( str_or_none, ) -# nice simple one for a change... -# the app uses a similar system to regular programmes, thankfully the site doesn't -# but it does need protobufs to get more than 20 items... +try: + import protobug + import yt_dlp_plugins.extractor.radiko_protobufs as pb +except ImportError: + protobug = None + class _RadikoPodcastBaseIE(InfoExtractor): @@ -32,7 +35,7 @@ class _RadikoPodcastBaseIE(InfoExtractor): or traverse_obj(episode_info, ("channelImageUrl", {url_or_none})), # so that --download-archive still works if you download from the playlist page - "webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=episode_info.get("id")), + "webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=traverse_obj(episode_info, "id")), 'extractor_key': RadikoPodcastEpisodeIE.ie_key(), 'extractor': 'RadikoPodcastEpisode', } @@ -82,29 +85,35 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE): }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"] + channel_id = self._match_id(url) + webpage = self._download_webpage(url, channel_id) + next_data = self._search_nextjs_data(webpage, channel_id)["props"]["pageProps"] channel_info = next_data["podcastChannel"] episode_list_response = next_data["listPodcastEpisodesResponse"] def entries(): + has_next_page = episode_list_response.get("hasNextPage") for episode in episode_list_response["episodesList"]: + cursor = episode.get("id") yield self._extract_episode(episode) - if traverse_obj(episode_list_response, "hasNextPage"): - self.report_warning(f'Currently this extractor can only extract the latest {len(episode_list_response["episodesList"])} episodes') - - # TODO: GRPC/protobuf stuff to get the next page - # https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes - # see さらに表示 button on site - + if has_next_page: + if protobug: + userservice_token = pb.auth_userservice(self) + while has_next_page: + page = pb.get_podcast_episodes(self, channel_id, userservice_token, cursor) + has_next_page = page.hasNextPage + for episode in page.episodes: + cursor = episode.id + yield self._extract_episode(episode) + else: + self.report_warning(f'Only extracting the latest {len(episode_list_response["episodesList"])} episodes. Install protobug for more.') return { "_type": "playlist", - "id": video_id, + "id": channel_id, **traverse_obj(channel_info, { "playlist_title": "title", "playlist_id": "id", -- cgit v1.2.3-70-g09d2 From d5f824093b0748889916a1ba820398aecaa184c8 Mon Sep 17 00:00:00 2001 From: garret1317 Date: Wed, 13 Aug 2025 02:49:46 +0100 Subject: convert protobug obj to dict, for traverse_obj --- yt_dlp_plugins/extractor/radiko_podcast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'yt_dlp_plugins/extractor/radiko_podcast.py') diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py index 904bb62..84bc288 100644 --- a/yt_dlp_plugins/extractor/radiko_podcast.py +++ b/yt_dlp_plugins/extractor/radiko_podcast.py @@ -6,6 +6,7 @@ from yt_dlp.utils import ( str_or_none, ) +import dataclasses try: import protobug import yt_dlp_plugins.extractor.radiko_protobufs as pb @@ -107,7 +108,7 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE): has_next_page = page.hasNextPage for episode in page.episodes: cursor = episode.id - yield self._extract_episode(episode) + yield self._extract_episode(dataclasses.asdict(episode)) else: self.report_warning(f'Only extracting the latest {len(episode_list_response["episodesList"])} episodes. Install protobug for more.') -- cgit v1.2.3-70-g09d2 From 8337d2b164759181777f64f12b985e4fad769ab7 Mon Sep 17 00:00:00 2001 From: garret1317 Date: Sun, 14 Sep 2025 17:28:08 +0100 Subject: Add support for bundled protobug library github: closes #29 --- yt_dlp_plugins/extractor/radiko_dependencies.py | 29 +++++++++++++++++++++++++ yt_dlp_plugins/extractor/radiko_podcast.py | 7 +++--- yt_dlp_plugins/extractor/radiko_protobufs.py | 7 ++---- 3 files changed, 34 insertions(+), 9 deletions(-) create mode 100644 yt_dlp_plugins/extractor/radiko_dependencies.py (limited to 'yt_dlp_plugins/extractor/radiko_podcast.py') diff --git a/yt_dlp_plugins/extractor/radiko_dependencies.py b/yt_dlp_plugins/extractor/radiko_dependencies.py new file mode 100644 index 0000000..769a5e3 --- /dev/null +++ b/yt_dlp_plugins/extractor/radiko_dependencies.py @@ -0,0 +1,29 @@ +# Bundle importing code Copyright (c) 2021-2022 Grub4K, from yt-dont-lock-p. +# https://github.com/Grub4K/yt-dont-lock-p/blob/ff3b6e1d42ce8584153ae27544d2c05b50ab5954/yt_dlp_plugins/postprocessor/yt_dont_lock_p/__init__.py#L23-L46 +# Used under 0BSD with permission + +# https://discord.com/channels/807245652072857610/1112613156934668338/1416816007732920430 (yt-dlp discord server, https://discord.gg/H5MNcFW63r ) +# [17:00] garret1317: @Grub4K can i pinch your MIT-licensed dependency bundling code to use in my 0BSD-licensed plugin? +# I will credit of course but i can't require that anyone else does the same +# (Any response to this message will be considered a written consent or refusal of the request) +# [17:04] Grub4K: Feel free to use that part under 0BSD +# [17:05] garret1317: 👍 cheers + +try: + import protobug +except ImportError: + import sys + from pathlib import Path + + # Try importing from zip file bundle + search_path = str(Path(__file__).parent.parent) + sys.path.append(search_path) + try: + import protobug + except ImportError: + protobug = None + except Exception: + protobug = None + + finally: + sys.path.remove(search_path) diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py index 84bc288..a984be3 100644 --- a/yt_dlp_plugins/extractor/radiko_podcast.py +++ b/yt_dlp_plugins/extractor/radiko_podcast.py @@ -7,11 +7,10 @@ from yt_dlp.utils import ( ) import dataclasses -try: - import protobug + +from yt_dlp_plugins.extractor.radiko_dependencies import protobug +if protobug: import yt_dlp_plugins.extractor.radiko_protobufs as pb -except ImportError: - protobug = None class _RadikoPodcastBaseIE(InfoExtractor): diff --git a/yt_dlp_plugins/extractor/radiko_protobufs.py b/yt_dlp_plugins/extractor/radiko_protobufs.py index 4eb4f8b..a8bbec1 100755 --- a/yt_dlp_plugins/extractor/radiko_protobufs.py +++ b/yt_dlp_plugins/extractor/radiko_protobufs.py @@ -1,12 +1,9 @@ #!/usr/bin/env python3 -try: - import protobug -except ImportError: - protobug = None - import struct import random +from yt_dlp_plugins.extractor.radiko_dependencies import protobug + if protobug: # i suppose it works lmao -- cgit v1.2.3-70-g09d2 From 68660d1dfe550be845033cd0301cd2e46144afdd Mon Sep 17 00:00:00 2001 From: garret1317 Date: Sun, 14 Sep 2025 17:33:57 +0100 Subject: Add instructions for obtaining protobug in the warning message --- yt_dlp_plugins/extractor/radiko_podcast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'yt_dlp_plugins/extractor/radiko_podcast.py') diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py index a984be3..10579b5 100644 --- a/yt_dlp_plugins/extractor/radiko_podcast.py +++ b/yt_dlp_plugins/extractor/radiko_podcast.py @@ -109,7 +109,7 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE): cursor = episode.id yield self._extract_episode(dataclasses.asdict(episode)) else: - self.report_warning(f'Only extracting the latest {len(episode_list_response["episodesList"])} episodes. Install protobug for more.') + self.report_warning(f'protobug is required to extract more than the latest {len(episode_list_response["episodesList"])} episodes.\nIf you installed yt-dlp-rajiko manually, use the .zip bundle instead. If you installed with pip, install protobug as well.') return { "_type": "playlist", -- cgit v1.2.3-70-g09d2 From 25d4c20be556825a5b3e2b6a66c719ad2c5d8c1f Mon Sep 17 00:00:00 2001 From: garret1317 Date: Thu, 18 Sep 2025 23:11:24 +0100 Subject: clarify protobug error message --- yt_dlp_plugins/extractor/radiko_podcast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'yt_dlp_plugins/extractor/radiko_podcast.py') diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py index 10579b5..21e3dfd 100644 --- a/yt_dlp_plugins/extractor/radiko_podcast.py +++ b/yt_dlp_plugins/extractor/radiko_podcast.py @@ -109,7 +109,7 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE): cursor = episode.id yield self._extract_episode(dataclasses.asdict(episode)) else: - self.report_warning(f'protobug is required to extract more than the latest {len(episode_list_response["episodesList"])} episodes.\nIf you installed yt-dlp-rajiko manually, use the .zip bundle instead. If you installed with pip, install protobug as well.') + self.report_warning(f'protobug is required to extract more than the latest {len(episode_list_response["episodesList"])} episodes.\nIf you installed yt-dlp-rajiko manually (with the .whl), use the .zip bundle instead. If you installed with pip, pip install protobug .') return { "_type": "playlist", -- cgit v1.2.3-70-g09d2 From ca5cf0703d7a3171c7c9e5b1a9c89a96e0b8d6e8 Mon Sep 17 00:00:00 2001 From: garret1317 Date: Fri, 19 Sep 2025 00:48:09 +0100 Subject: add podcast search IE --- yt_dlp_plugins/extractor/radiko_podcast.py | 41 ++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'yt_dlp_plugins/extractor/radiko_podcast.py') diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py index 21e3dfd..67d6475 100644 --- a/yt_dlp_plugins/extractor/radiko_podcast.py +++ b/yt_dlp_plugins/extractor/radiko_podcast.py @@ -1,12 +1,16 @@ from yt_dlp.extractor.common import InfoExtractor from yt_dlp.utils import ( clean_html, + OnDemandPagedList, + parse_qs, traverse_obj, + update_url_query, url_or_none, str_or_none, ) import dataclasses +import random from yt_dlp_plugins.extractor.radiko_dependencies import protobug if protobug: @@ -123,3 +127,40 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE): }), "entries": entries(), } + + +class RadikoPodcastSearchIE(InfoExtractor): + _VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/search/podcast/(?:timeshift|live)\?" + + def _pagefunc(self, url, idx): + url = update_url_query(url, {"pageIdx": idx}) + data = self._download_json(url, None, note=f"Downloading page {idx+1}") + + results = [] + for channel in data.get("channels"): + results.append( + self.url_result( + channel.get("channelUrl"), + id=channel.get("id"), + ie=RadikoPodcastChannelIE, + ) + ) + return results + + + def _real_extract(self, url): + # hack away the # so urllib.parse will work (same as normal RadikoSearchIE) + url = url.replace("/#!/", "/!/", 1) + queries = parse_qs(url) + + keywords = traverse_obj(queries, ("key", 0)) + search_url = update_url_query("https://api.annex-cf.radiko.jp/v1/podcasts/channels/search_with_keywords_by_offset", { + "keywords": keywords, + "uid": "".join(random.choices("0123456789abcdef", k=32)), + "limit": 50, # result limit. the actual limit before the api errors is 5000, but that seems a bit rude so i'll leave as 50 like the radio one + }) + + return self.playlist_result( + OnDemandPagedList(lambda idx: self._pagefunc(search_url, idx), 50), + title=keywords, + ) -- cgit v1.2.3-70-g09d2 From b8807109ba7e4515ffc5d6d3d0fcf8888acd13a3 Mon Sep 17 00:00:00 2001 From: garret1317 Date: Fri, 19 Sep 2025 01:28:17 +0100 Subject: Add PodcastSearch test --- contrib/test_extractors.py | 4 ++-- yt_dlp_plugins/extractor/radiko_podcast.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'yt_dlp_plugins/extractor/radiko_podcast.py') diff --git a/contrib/test_extractors.py b/contrib/test_extractors.py index 1ef63d0..0b505b8 100755 --- a/contrib/test_extractors.py +++ b/contrib/test_extractors.py @@ -49,7 +49,7 @@ from yt_dlp_plugins.extractor.radiko import ( ) from yt_dlp_plugins.extractor.radiko_podcast import ( - RadikoPodcastEpisodeIE, RadikoPodcastChannelIE, + RadikoPodcastEpisodeIE, RadikoPodcastChannelIE, RadikoPodcastSearchIE, ) RadikoTimeFreeIE._TESTS = [] @@ -148,7 +148,7 @@ IEs = [ RadikoTimeFreeIE, RadikoShareIE, RadikoLiveIE, RadikoPersonIE, RadikoStationButtonIE, RadikoPodcastEpisodeIE, RadikoPodcastChannelIE, - RadikoSearchIE, RadikoRSeasonsIE, + RadikoSearchIE, RadikoPodcastSearchIE, RadikoRSeasonsIE, ] import test.helper as th diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py index 67d6475..27b91ad 100644 --- a/yt_dlp_plugins/extractor/radiko_podcast.py +++ b/yt_dlp_plugins/extractor/radiko_podcast.py @@ -131,6 +131,14 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE): class RadikoPodcastSearchIE(InfoExtractor): _VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/search/podcast/(?:timeshift|live)\?" + _TESTS = [{ + "url": "https://radiko.jp/#!/search/podcast/live?key=ドラマ", + "playlist_mincount": 51, + "info_dict": { + "id": "ドラマ", + "title": "ドラマ", + }, + }] def _pagefunc(self, url, idx): url = update_url_query(url, {"pageIdx": idx}) @@ -163,4 +171,5 @@ class RadikoPodcastSearchIE(InfoExtractor): return self.playlist_result( OnDemandPagedList(lambda idx: self._pagefunc(search_url, idx), 50), title=keywords, + id=keywords, # i have to put some kind of id or the tests fail ) -- cgit v1.2.3-70-g09d2