Implement multi-page podcasts with protobugpodcast-protobuf

needs core change in upstream to work (traverse_obj doesnt work with dataclasses)
author: garret1317 <garret@airmail.cc> 2025-08-11 09:22:50 +0100
committer: garret1317 <garret@airmail.cc> 2025-08-11 09:22:50 +0100
commit: ac94bad6ed14f32adfeceac35cc60d39680508dd (patch)
tree: ffbbd862960e43bd887cbc31161b5b66110ad29e
parent: 42cd450ab7578f723a5590003f751052d0a83ad7 (diff)
download: yt-dlp-rajiko-podcast-protobuf.tar.gz
yt-dlp-rajiko-podcast-protobuf.tar.bz2
yt-dlp-rajiko-podcast-protobuf.zip
1 files changed, 24 insertions, 15 deletions
diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py
index af66f6a..904bb62 100644
--- a/yt_dlp_plugins/extractor/radiko_podcast.py
+++ b/yt_dlp_plugins/extractor/radiko_podcast.py
@@ -6,9 +6,12 @@ from yt_dlp.utils import (
 	str_or_none,
 )
 
-# nice simple one for a change...
-# the app uses a similar system to regular programmes, thankfully the site doesn't
-# but it does need protobufs to get more than 20 items...
+try:
+	import protobug
+	import yt_dlp_plugins.extractor.radiko_protobufs as pb
+except ImportError:
+	protobug = None
+
 
 class _RadikoPodcastBaseIE(InfoExtractor):
 
@@ -32,7 +35,7 @@ class _RadikoPodcastBaseIE(InfoExtractor):
 				or traverse_obj(episode_info, ("channelImageUrl", {url_or_none})),
 
 			# so that --download-archive still works if you download from the playlist page
-			"webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=episode_info.get("id")),
+			"webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=traverse_obj(episode_info, "id")),
 			'extractor_key': RadikoPodcastEpisodeIE.ie_key(),
 			'extractor': 'RadikoPodcastEpisode',
 		}
@@ -82,29 +85,35 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE):
 	}]
 
 	def _real_extract(self, url):
-		video_id = self._match_id(url)
-		webpage = self._download_webpage(url, video_id)
-		next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"]
+		channel_id = self._match_id(url)
+		webpage = self._download_webpage(url, channel_id)
+		next_data = self._search_nextjs_data(webpage, channel_id)["props"]["pageProps"]
 
 		channel_info = next_data["podcastChannel"]
 		episode_list_response = next_data["listPodcastEpisodesResponse"]
 
 
 		def entries():
+			has_next_page = episode_list_response.get("hasNextPage")
 			for episode in episode_list_response["episodesList"]:
+				cursor = episode.get("id")
 				yield self._extract_episode(episode)
 
-		if traverse_obj(episode_list_response, "hasNextPage"):
-			self.report_warning(f'Currently this extractor can only extract the latest {len(episode_list_response["episodesList"])} episodes')
-
-		# TODO: GRPC/protobuf stuff to get the next page
-		# https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes
-		# see さらに表示 button on site
-
+			if has_next_page:
+				if protobug:
+					userservice_token = pb.auth_userservice(self)
+					while has_next_page:
+						page = pb.get_podcast_episodes(self, channel_id, userservice_token, cursor)
+						has_next_page = page.hasNextPage
+						for episode in page.episodes:
+							cursor = episode.id
+							yield self._extract_episode(episode)
+				else:
+					self.report_warning(f'Only extracting the latest {len(episode_list_response["episodesList"])} episodes. Install protobug for more.')
 
 		return {
 			"_type": "playlist",
-			"id": video_id,
+			"id": channel_id,
 			**traverse_obj(channel_info, {
 				"playlist_title": "title",
 				"playlist_id": "id",
author	garret1317 <garret@airmail.cc>	2025-08-11 09:22:50 +0100
committer	garret1317 <garret@airmail.cc>	2025-08-11 09:22:50 +0100
commit	ac94bad6ed14f32adfeceac35cc60d39680508dd (patch)
tree	ffbbd862960e43bd887cbc31161b5b66110ad29e
parent	42cd450ab7578f723a5590003f751052d0a83ad7 (diff)
download	yt-dlp-rajiko-podcast-protobuf.tar.gz yt-dlp-rajiko-podcast-protobuf.tar.bz2 yt-dlp-rajiko-podcast-protobuf.zip