aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorgarret1317 <garret@airmail.cc>2025-08-11 09:22:50 +0100
committergarret1317 <garret@airmail.cc>2025-08-11 09:22:50 +0100
commitac94bad6ed14f32adfeceac35cc60d39680508dd (patch)
treeffbbd862960e43bd887cbc31161b5b66110ad29e
parent42cd450ab7578f723a5590003f751052d0a83ad7 (diff)
downloadyt-dlp-rajiko-podcast-protobuf.tar.gz
yt-dlp-rajiko-podcast-protobuf.tar.bz2
yt-dlp-rajiko-podcast-protobuf.zip
Implement multi-page podcasts with protobugpodcast-protobuf
needs core change in upstream to work (traverse_obj doesnt work with dataclasses)
-rw-r--r--yt_dlp_plugins/extractor/radiko_podcast.py39
1 files changed, 24 insertions, 15 deletions
diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py
index af66f6a..904bb62 100644
--- a/yt_dlp_plugins/extractor/radiko_podcast.py
+++ b/yt_dlp_plugins/extractor/radiko_podcast.py
@@ -6,9 +6,12 @@ from yt_dlp.utils import (
str_or_none,
)
-# nice simple one for a change...
-# the app uses a similar system to regular programmes, thankfully the site doesn't
-# but it does need protobufs to get more than 20 items...
+try:
+ import protobug
+ import yt_dlp_plugins.extractor.radiko_protobufs as pb
+except ImportError:
+ protobug = None
+
class _RadikoPodcastBaseIE(InfoExtractor):
@@ -32,7 +35,7 @@ class _RadikoPodcastBaseIE(InfoExtractor):
or traverse_obj(episode_info, ("channelImageUrl", {url_or_none})),
# so that --download-archive still works if you download from the playlist page
- "webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=episode_info.get("id")),
+ "webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=traverse_obj(episode_info, "id")),
'extractor_key': RadikoPodcastEpisodeIE.ie_key(),
'extractor': 'RadikoPodcastEpisode',
}
@@ -82,29 +85,35 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE):
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"]
+ channel_id = self._match_id(url)
+ webpage = self._download_webpage(url, channel_id)
+ next_data = self._search_nextjs_data(webpage, channel_id)["props"]["pageProps"]
channel_info = next_data["podcastChannel"]
episode_list_response = next_data["listPodcastEpisodesResponse"]
def entries():
+ has_next_page = episode_list_response.get("hasNextPage")
for episode in episode_list_response["episodesList"]:
+ cursor = episode.get("id")
yield self._extract_episode(episode)
- if traverse_obj(episode_list_response, "hasNextPage"):
- self.report_warning(f'Currently this extractor can only extract the latest {len(episode_list_response["episodesList"])} episodes')
-
- # TODO: GRPC/protobuf stuff to get the next page
- # https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes
- # see さらに表示 button on site
-
+ if has_next_page:
+ if protobug:
+ userservice_token = pb.auth_userservice(self)
+ while has_next_page:
+ page = pb.get_podcast_episodes(self, channel_id, userservice_token, cursor)
+ has_next_page = page.hasNextPage
+ for episode in page.episodes:
+ cursor = episode.id
+ yield self._extract_episode(episode)
+ else:
+ self.report_warning(f'Only extracting the latest {len(episode_list_response["episodesList"])} episodes. Install protobug for more.')
return {
"_type": "playlist",
- "id": video_id,
+ "id": channel_id,
**traverse_obj(channel_info, {
"playlist_title": "title",
"playlist_id": "id",