Merge branch 'podcast-protobuf'

author: garret1317 <garret@airmail.cc> 2025-09-14 15:31:47 +0100
committer: garret1317 <garret@airmail.cc> 2025-09-14 15:31:47 +0100
commit: 0f4d6c073de8e161014cf62eaa44cfc2fdb236dd (patch)
tree: 58f1d341062dfca50ac6c042cebb2a033e2c8632
parent: bcb5df38d71f2b6d2092797201ad62638b6d3ef0 (diff)
parent: c229a64db275ddc7b87f5c23a8570a10e3e2cbd3 (diff)
download: yt-dlp-rajiko-0f4d6c073de8e161014cf62eaa44cfc2fdb236dd.tar.gz
yt-dlp-rajiko-0f4d6c073de8e161014cf62eaa44cfc2fdb236dd.tar.bz2
yt-dlp-rajiko-0f4d6c073de8e161014cf62eaa44cfc2fdb236dd.zip
3 files changed, 180 insertions, 15 deletions
diff --git a/pyproject.toml b/pyproject.toml
index 2a13f3f..d92abe7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,9 @@ Homepage = "https://427738.xyz/yt-dlp-rajiko/"
 "Source Code" = "https://github.com/garret1317/yt-dlp-rajiko/"
 "Release Notes" = "https://427738.xyz/yt-dlp-rajiko/CHANGELOG.xml"
 
+[project.optional-dependencies]
+protobuf = ["protobug"]
+
 [build-system]
 requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py
index af66f6a..84bc288 100644
--- a/yt_dlp_plugins/extractor/radiko_podcast.py
+++ b/yt_dlp_plugins/extractor/radiko_podcast.py
@@ -6,9 +6,13 @@ from yt_dlp.utils import (
 	str_or_none,
 )
 
-# nice simple one for a change...
-# the app uses a similar system to regular programmes, thankfully the site doesn't
-# but it does need protobufs to get more than 20 items...
+import dataclasses
+try:
+	import protobug
+	import yt_dlp_plugins.extractor.radiko_protobufs as pb
+except ImportError:
+	protobug = None
+
 
 class _RadikoPodcastBaseIE(InfoExtractor):
 
@@ -32,7 +36,7 @@ class _RadikoPodcastBaseIE(InfoExtractor):
 				or traverse_obj(episode_info, ("channelImageUrl", {url_or_none})),
 
 			# so that --download-archive still works if you download from the playlist page
-			"webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=episode_info.get("id")),
+			"webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=traverse_obj(episode_info, "id")),
 			'extractor_key': RadikoPodcastEpisodeIE.ie_key(),
 			'extractor': 'RadikoPodcastEpisode',
 		}
@@ -82,29 +86,35 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE):
 	}]
 
 	def _real_extract(self, url):
-		video_id = self._match_id(url)
-		webpage = self._download_webpage(url, video_id)
-		next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"]
+		channel_id = self._match_id(url)
+		webpage = self._download_webpage(url, channel_id)
+		next_data = self._search_nextjs_data(webpage, channel_id)["props"]["pageProps"]
 
 		channel_info = next_data["podcastChannel"]
 		episode_list_response = next_data["listPodcastEpisodesResponse"]
 
 
 		def entries():
+			has_next_page = episode_list_response.get("hasNextPage")
 			for episode in episode_list_response["episodesList"]:
+				cursor = episode.get("id")
 				yield self._extract_episode(episode)
 
-		if traverse_obj(episode_list_response, "hasNextPage"):
-			self.report_warning(f'Currently this extractor can only extract the latest {len(episode_list_response["episodesList"])} episodes')
-
-		# TODO: GRPC/protobuf stuff to get the next page
-		# https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes
-		# see さらに表示 button on site
-
+			if has_next_page:
+				if protobug:
+					userservice_token = pb.auth_userservice(self)
+					while has_next_page:
+						page = pb.get_podcast_episodes(self, channel_id, userservice_token, cursor)
+						has_next_page = page.hasNextPage
+						for episode in page.episodes:
+							cursor = episode.id
+							yield self._extract_episode(dataclasses.asdict(episode))
+				else:
+					self.report_warning(f'Only extracting the latest {len(episode_list_response["episodesList"])} episodes. Install protobug for more.')
 
 		return {
 			"_type": "playlist",
-			"id": video_id,
+			"id": channel_id,
 			**traverse_obj(channel_info, {
 				"playlist_title": "title",
 				"playlist_id": "id",
diff --git a/yt_dlp_plugins/extractor/radiko_protobufs.py b/yt_dlp_plugins/extractor/radiko_protobufs.py
new file mode 100755
index 0000000..ff4531e
--- /dev/null
+++ b/yt_dlp_plugins/extractor/radiko_protobufs.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+try:
+	import protobug
+except ImportError:
+	protobug = None
+
+import base64
+import struct
+
+import random
+import requests
+
+if protobug:  # i suppose it works lmao
+
+
+	def add_grpc_header(protobuf_data):
+		compression_flag = 0
+		message_length = len(protobuf_data)
+		header = struct.pack('>BI', compression_flag, message_length)
+		return header + protobuf_data
+
+	def strip_grpc_response(response):
+		return response[5:].rpartition(b"grpc-status:")[0]
+
+	def _download_grpc(self, url_or_request, video_id, response_message, note="Downloading GRPC information", *args, **kwargs):
+		urlh = self._request_webpage(url_or_request, video_id,
+			headers={
+				'Content-Type': 'application/grpc-web+proto',
+				'X-User-Agent': 'grpc-web-javascript/0.1',
+				'X-Grpc-Web': '1',
+				**kwargs.pop('headers')
+			},
+			data=add_grpc_header(protobug.dumps(kwargs.pop('data'))), note=note,
+			*args, **kwargs,
+		)
+		response = urlh.read()
+
+		protobuf = strip_grpc_response(response)
+		if len(protobuf) > 0:
+			return protobug.loads(protobuf, response_message)
+
+
+	@protobug.message
+	class SignUpRequest:
+		lsid: protobug.String = protobug.field(1)
+
+	def sign_up(self):
+		lsid = ''.join(random.choices('0123456789abcdef', k=32))
+
+		signup = _download_grpc(self, "https://api.annex.radiko.jp/radiko.UserService/SignUp",
+			"UserService", None, note="Registering ID", headers={'Origin': 'https://radiko.jp'},
+			data=SignUpRequest(lsid=lsid),
+		)
+		# youre meant to only do the sign up ^ once and then keep your lsid for later
+		# so that you can sign in and get the token for the API to work
+		return lsid
+
+
+	@protobug.message
+	class SignInRequest:
+		lsid: protobug.String = protobug.field(2)
+		area: protobug.String = protobug.field(3)
+
+	@protobug.message
+	class SignInResponse:
+		jwt: protobug.String = protobug.field(1)
+
+
+	def sign_in(self, lsid):
+		sign_in = _download_grpc(self, "https://api.annex.radiko.jp/radiko.UserService/SignIn",
+			"UserService", SignInResponse, note="Getting auth token", headers={'Origin': 'https://radiko.jp'},
+			data=SignInRequest(lsid=lsid, area="JP13"),
+		)
+		return sign_in.jwt
+
+
+	def auth_userservice(self):
+		cachedata = self.cache.load("rajiko", "UserService")
+		if cachedata is not None:
+			lsid = cachedata.get("lsid")
+		else:
+			lsid = sign_up(self)
+			self.cache.store("rajiko", "UserService", {"lsid": lsid})
+		jwt = sign_in(self, lsid)
+		return jwt
+
+
+	@protobug.message
+	class ListPodcastEpisodesRequest:
+		channel_id: protobug.String = protobug.field(1)
+		sort_by_latest: protobug.Bool = protobug.field(2)
+		page_length: protobug.Int32 = protobug.field(4)
+		cursor: protobug.String = protobug.field(5, default=None)
+
+
+	@protobug.message
+	class Audio:
+		revision: protobug.Int32 = protobug.field(1)
+		url: protobug.String = protobug.field(2)
+		fileSize: protobug.Int64 = protobug.field(3)
+		durationSec: protobug.Int64 = protobug.field(4)
+		transcoded: protobug.Bool = protobug.field(5)
+
+	@protobug.message
+	class EpisodeStartAt:
+		seconds: protobug.UInt64 = protobug.field(1)
+		nanos: protobug.UInt64 = protobug.field(2, default=0)
+
+
+	@protobug.message
+	class PodcastEpisode:
+		id: protobug.String = protobug.field(1)
+		workspaceId: protobug.String = protobug.field(2)
+		channelId: protobug.String = protobug.field(3)
+		title: protobug.String = protobug.field(4)
+		description: protobug.String = protobug.field(5)
+
+		audio: Audio = protobug.field(8)
+		channelImageUrl: protobug.String = protobug.field(16)
+		channelTitle: protobug.String = protobug.field(17)
+		channelStationName: protobug.String = protobug.field(18)
+		channelAuthor: protobug.String = protobug.field(19)
+
+		channelThumbnailImageUrl: protobug.String = protobug.field(21)
+		channelStationType: protobug.UInt32 = protobug.field(22)
+		startAt: EpisodeStartAt = protobug.field(27)
+		isEnabled: protobug.Bool = protobug.field(29)
+		hasTranscription: protobug.Bool = protobug.field(32)
+
+		imageUrl: protobug.String = protobug.field(7, default=None)
+		thumbnailImageUrl: protobug.String = protobug.field(20, default=None)
+
+	@protobug.message
+	class ListPodcastEpisodesResponse:
+		episodes: list[PodcastEpisode] = protobug.field(1)
+		hasNextPage: protobug.Bool = protobug.field(2, default=False)
+
+
+	def get_podcast_episodes(self, channel_id, jwt, cursor, page_length=20):
+		# site uses 20 items
+		# cursor is the id of the last episode you've seen in the list
+
+		return _download_grpc(self, 'https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes',
+			channel_id, ListPodcastEpisodesResponse, note="Downloading episode listings",
+			headers={'Authorization': f'Bearer {jwt}'},
+			data=ListPodcastEpisodesRequest(
+				channel_id=channel_id,
+				sort_by_latest=True,
+				page_length=page_length,
+				cursor=cursor,
+			)
+		)
author	garret1317 <garret@airmail.cc>	2025-09-14 15:31:47 +0100
committer	garret1317 <garret@airmail.cc>	2025-09-14 15:31:47 +0100
commit	0f4d6c073de8e161014cf62eaa44cfc2fdb236dd (patch)
tree	58f1d341062dfca50ac6c042cebb2a033e2c8632
parent	bcb5df38d71f2b6d2092797201ad62638b6d3ef0 (diff)
parent	c229a64db275ddc7b87f5c23a8570a10e3e2cbd3 (diff)
download	yt-dlp-rajiko-0f4d6c073de8e161014cf62eaa44cfc2fdb236dd.tar.gz yt-dlp-rajiko-0f4d6c073de8e161014cf62eaa44cfc2fdb236dd.tar.bz2 yt-dlp-rajiko-0f4d6c073de8e161014cf62eaa44cfc2fdb236dd.zip