From 42cd450ab7578f723a5590003f751052d0a83ad7 Mon Sep 17 00:00:00 2001
From: garret1317 <garret@airmail.cc>
Date: Mon, 11 Aug 2025 09:10:14 +0100
Subject: Add podcast protobufs + functions to use them

---
 yt_dlp_plugins/extractor/radiko_protobufs.py | 152 +++++++++++++++++++++++++++
 1 file changed, 152 insertions(+)
 create mode 100755 yt_dlp_plugins/extractor/radiko_protobufs.py

(limited to 'yt_dlp_plugins/extractor')

diff --git a/yt_dlp_plugins/extractor/radiko_protobufs.py b/yt_dlp_plugins/extractor/radiko_protobufs.py
new file mode 100755
index 0000000..2336f10
--- /dev/null
+++ b/yt_dlp_plugins/extractor/radiko_protobufs.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+try:
+	import protobug
+except ImportError:
+	protobug = None
+
+import base64
+import struct
+
+import random
+import requests
+
+if protobug:  # i suppose it works lmao
+
+
+	def add_grpc_header(protobuf_data):
+		compression_flag = 0
+		message_length = len(protobuf_data)
+		header = struct.pack('>BI', compression_flag, message_length)
+		return header + protobuf_data
+
+	def strip_grpc_response(response):
+		return response[5:].rpartition(b"grpc-status:")[0]
+
+	def _download_grpc(self, url_or_request, video_id, response_message, note="Downloading GRPC information", *args, **kwargs):
+		urlh = self._request_webpage(url_or_request, video_id,
+			headers={
+				'Content-Type': 'application/grpc-web+proto',
+				'X-User-Agent': 'grpc-web-javascript/0.1',
+				'X-Grpc-Web': '1',
+				**kwargs.pop('headers')
+			},
+			data=add_grpc_header(protobug.dumps(kwargs.pop('data'))), note=note,
+			*args, **kwargs,
+		)
+		response = urlh.read()
+
+		protobuf = strip_grpc_response(response)
+		if len(protobuf) > 0:
+			return protobug.loads(protobuf, response_message)
+
+
+	@protobug.message
+	class SignUpRequest:
+		lsid: protobug.String = protobug.field(1)
+
+	def sign_up(self):
+		lsid = ''.join(random.choices('0123456789abcdef', k=32))
+
+		signup = _download_grpc(self, "https://api.annex.radiko.jp/radiko.UserService/SignUp",
+			"UserService", None, note="Registering ID", headers={'Origin': 'https://radiko.jp'},
+			data=SignUpRequest(lsid=lsid),
+		)
+		# youre meant to only do the sign up ^ once and then keep your lsid for later
+		# so that you can sign in and get the token for the API to work
+		return lsid
+
+
+	@protobug.message
+	class SignInRequest:
+		lsid: protobug.String = protobug.field(2)
+		area: protobug.String = protobug.field(3)
+
+	@protobug.message
+	class SignInResponse:
+		jwt: protobug.String = protobug.field(1)
+
+
+	def sign_in(self, lsid):
+		sign_in = _download_grpc(self, "https://api.annex.radiko.jp/radiko.UserService/SignIn",
+			"UserService", SignInResponse, note="Getting auth token", headers={'Origin': 'https://radiko.jp'},
+			data=SignInRequest(lsid=lsid, area="JP13"),
+		)
+		return sign_in.jwt
+
+
+	def auth_userservice(self):
+		cachedata = self.cache.load("rajiko", "UserService")
+		if cachedata is not None:
+			lsid = cachedata.get("lsid")
+		else:
+			lsid = sign_up(self)
+			self.cache.store("rajiko", "UserService", {"lsid": lsid})
+		jwt = sign_in(self, lsid)
+		return jwt
+
+
+	@protobug.message
+	class ListPodcastEpisodesRequest:
+		channel_id: protobug.String = protobug.field(1)
+		dontknow: protobug.Int32 = protobug.field(2)
+		page_length: protobug.Int32 = protobug.field(4)
+		cursor: protobug.String = protobug.field(5, default=None)
+
+
+	@protobug.message
+	class Audio:
+		revision: protobug.Int32 = protobug.field(1)
+		url: protobug.String = protobug.field(2)
+		fileSize: protobug.Int64 = protobug.field(3)
+		durationSec: protobug.Int64 = protobug.field(4)
+		transcoded: protobug.Bool = protobug.field(5)
+
+	@protobug.message
+	class EpisodeStartAt:
+		seconds: protobug.UInt64 = protobug.field(1)
+		nanos: protobug.UInt64 = protobug.field(2, default=0)
+
+
+	@protobug.message
+	class PodcastEpisode:
+		id: protobug.String = protobug.field(1)
+		workspaceId: protobug.String = protobug.field(2)
+		channelId: protobug.String = protobug.field(3)
+		title: protobug.String = protobug.field(4)
+		description: protobug.String = protobug.field(5)
+
+		audio: Audio = protobug.field(8)
+		channelImageUrl: protobug.String = protobug.field(16)
+		channelTitle: protobug.String = protobug.field(17)
+		channelStationName: protobug.String = protobug.field(18)
+		channelAuthor: protobug.String = protobug.field(19)
+
+		channelThumbnailImageUrl: protobug.String = protobug.field(21)
+		channelStationType: protobug.UInt32 = protobug.field(22)
+		startAt: EpisodeStartAt = protobug.field(27)
+		isEnabled: protobug.Bool = protobug.field(29)
+		hasTranscription: protobug.Bool = protobug.field(32)
+
+		imageUrl: protobug.String = protobug.field(7, default=None)
+		thumbnailImageUrl: protobug.String = protobug.field(20, default=None)
+
+	@protobug.message
+	class ListPodcastEpisodesResponse:
+		episodes: list[PodcastEpisode] = protobug.field(1)
+		hasNextPage: protobug.Bool = protobug.field(2, default=False)
+
+
+	def get_podcast_episodes(self, channel_id, jwt, cursor, page_length=20):
+		# site uses 20 items
+		# cursor is the id of the last episode you've seen in the list
+
+		return _download_grpc(self, 'https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes',
+			channel_id, ListPodcastEpisodesResponse, note="Downloading episode listings",
+			headers={'Authorization': f'Bearer {jwt}'},
+			data=ListPodcastEpisodesRequest(
+				channel_id=channel_id,
+				dontknow=1,
+				page_length=page_length,
+				cursor=cursor,
+			)
+		)
-- 
cgit v1.2.3-70-g09d2


From ac94bad6ed14f32adfeceac35cc60d39680508dd Mon Sep 17 00:00:00 2001
From: garret1317 <garret@airmail.cc>
Date: Mon, 11 Aug 2025 09:22:50 +0100
Subject: Implement multi-page podcasts with protobug

needs core change in upstream to work (traverse_obj doesnt work with dataclasses)
---
 yt_dlp_plugins/extractor/radiko_podcast.py | 39 ++++++++++++++++++------------
 1 file changed, 24 insertions(+), 15 deletions(-)

(limited to 'yt_dlp_plugins/extractor')

diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py
index af66f6a..904bb62 100644
--- a/yt_dlp_plugins/extractor/radiko_podcast.py
+++ b/yt_dlp_plugins/extractor/radiko_podcast.py
@@ -6,9 +6,12 @@ from yt_dlp.utils import (
 	str_or_none,
 )
 
-# nice simple one for a change...
-# the app uses a similar system to regular programmes, thankfully the site doesn't
-# but it does need protobufs to get more than 20 items...
+try:
+	import protobug
+	import yt_dlp_plugins.extractor.radiko_protobufs as pb
+except ImportError:
+	protobug = None
+
 
 class _RadikoPodcastBaseIE(InfoExtractor):
 
@@ -32,7 +35,7 @@ class _RadikoPodcastBaseIE(InfoExtractor):
 				or traverse_obj(episode_info, ("channelImageUrl", {url_or_none})),
 
 			# so that --download-archive still works if you download from the playlist page
-			"webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=episode_info.get("id")),
+			"webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=traverse_obj(episode_info, "id")),
 			'extractor_key': RadikoPodcastEpisodeIE.ie_key(),
 			'extractor': 'RadikoPodcastEpisode',
 		}
@@ -82,29 +85,35 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE):
 	}]
 
 	def _real_extract(self, url):
-		video_id = self._match_id(url)
-		webpage = self._download_webpage(url, video_id)
-		next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"]
+		channel_id = self._match_id(url)
+		webpage = self._download_webpage(url, channel_id)
+		next_data = self._search_nextjs_data(webpage, channel_id)["props"]["pageProps"]
 
 		channel_info = next_data["podcastChannel"]
 		episode_list_response = next_data["listPodcastEpisodesResponse"]
 
 
 		def entries():
+			has_next_page = episode_list_response.get("hasNextPage")
 			for episode in episode_list_response["episodesList"]:
+				cursor = episode.get("id")
 				yield self._extract_episode(episode)
 
-		if traverse_obj(episode_list_response, "hasNextPage"):
-			self.report_warning(f'Currently this extractor can only extract the latest {len(episode_list_response["episodesList"])} episodes')
-
-		# TODO: GRPC/protobuf stuff to get the next page
-		# https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes
-		# see さらに表示 button on site
-
+			if has_next_page:
+				if protobug:
+					userservice_token = pb.auth_userservice(self)
+					while has_next_page:
+						page = pb.get_podcast_episodes(self, channel_id, userservice_token, cursor)
+						has_next_page = page.hasNextPage
+						for episode in page.episodes:
+							cursor = episode.id
+							yield self._extract_episode(episode)
+				else:
+					self.report_warning(f'Only extracting the latest {len(episode_list_response["episodesList"])} episodes. Install protobug for more.')
 
 		return {
 			"_type": "playlist",
-			"id": video_id,
+			"id": channel_id,
 			**traverse_obj(channel_info, {
 				"playlist_title": "title",
 				"playlist_id": "id",
-- 
cgit v1.2.3-70-g09d2


From d5f824093b0748889916a1ba820398aecaa184c8 Mon Sep 17 00:00:00 2001
From: garret1317 <garret@airmail.cc>
Date: Wed, 13 Aug 2025 02:49:46 +0100
Subject: convert protobug obj to dict, for traverse_obj

---
 yt_dlp_plugins/extractor/radiko_podcast.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'yt_dlp_plugins/extractor')

diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py
index 904bb62..84bc288 100644
--- a/yt_dlp_plugins/extractor/radiko_podcast.py
+++ b/yt_dlp_plugins/extractor/radiko_podcast.py
@@ -6,6 +6,7 @@ from yt_dlp.utils import (
 	str_or_none,
 )
 
+import dataclasses
 try:
 	import protobug
 	import yt_dlp_plugins.extractor.radiko_protobufs as pb
@@ -107,7 +108,7 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE):
 						has_next_page = page.hasNextPage
 						for episode in page.episodes:
 							cursor = episode.id
-							yield self._extract_episode(episode)
+							yield self._extract_episode(dataclasses.asdict(episode))
 				else:
 					self.report_warning(f'Only extracting the latest {len(episode_list_response["episodesList"])} episodes. Install protobug for more.')
 
-- 
cgit v1.2.3-70-g09d2


From 9e91cb5ee32a47eb05dc2d3885e13d274cdadd03 Mon Sep 17 00:00:00 2001
From: garret1317 <garret@airmail.cc>
Date: Wed, 13 Aug 2025 07:29:19 +0100
Subject: ListPodcastEpisodesRequest: dontknow -> sort_by_latest

---
 yt_dlp_plugins/extractor/radiko_protobufs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'yt_dlp_plugins/extractor')

diff --git a/yt_dlp_plugins/extractor/radiko_protobufs.py b/yt_dlp_plugins/extractor/radiko_protobufs.py
index 2336f10..ff4531e 100755
--- a/yt_dlp_plugins/extractor/radiko_protobufs.py
+++ b/yt_dlp_plugins/extractor/radiko_protobufs.py
@@ -88,7 +88,7 @@ if protobug:  # i suppose it works lmao
 	@protobug.message
 	class ListPodcastEpisodesRequest:
 		channel_id: protobug.String = protobug.field(1)
-		dontknow: protobug.Int32 = protobug.field(2)
+		sort_by_latest: protobug.Bool = protobug.field(2)
 		page_length: protobug.Int32 = protobug.field(4)
 		cursor: protobug.String = protobug.field(5, default=None)
 
@@ -145,7 +145,7 @@ if protobug:  # i suppose it works lmao
 			headers={'Authorization': f'Bearer {jwt}'},
 			data=ListPodcastEpisodesRequest(
 				channel_id=channel_id,
-				dontknow=1,
+				sort_by_latest=True,
 				page_length=page_length,
 				cursor=cursor,
 			)
-- 
cgit v1.2.3-70-g09d2