aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorgarret1317 <garret@airmail.cc>2025-09-14 15:31:47 +0100
committergarret1317 <garret@airmail.cc>2025-09-14 15:31:47 +0100
commit0f4d6c073de8e161014cf62eaa44cfc2fdb236dd (patch)
tree58f1d341062dfca50ac6c042cebb2a033e2c8632
parentbcb5df38d71f2b6d2092797201ad62638b6d3ef0 (diff)
parentc229a64db275ddc7b87f5c23a8570a10e3e2cbd3 (diff)
downloadyt-dlp-rajiko-0f4d6c073de8e161014cf62eaa44cfc2fdb236dd.tar.gz
yt-dlp-rajiko-0f4d6c073de8e161014cf62eaa44cfc2fdb236dd.tar.bz2
yt-dlp-rajiko-0f4d6c073de8e161014cf62eaa44cfc2fdb236dd.zip
Merge branch 'podcast-protobuf'
-rw-r--r--pyproject.toml3
-rw-r--r--yt_dlp_plugins/extractor/radiko_podcast.py40
-rwxr-xr-xyt_dlp_plugins/extractor/radiko_protobufs.py152
3 files changed, 180 insertions, 15 deletions
diff --git a/pyproject.toml b/pyproject.toml
index 2a13f3f..d92abe7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,9 @@ Homepage = "https://427738.xyz/yt-dlp-rajiko/"
"Source Code" = "https://github.com/garret1317/yt-dlp-rajiko/"
"Release Notes" = "https://427738.xyz/yt-dlp-rajiko/CHANGELOG.xml"
+[project.optional-dependencies]
+protobuf = ["protobug"]
+
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py
index af66f6a..84bc288 100644
--- a/yt_dlp_plugins/extractor/radiko_podcast.py
+++ b/yt_dlp_plugins/extractor/radiko_podcast.py
@@ -6,9 +6,13 @@ from yt_dlp.utils import (
str_or_none,
)
-# nice simple one for a change...
-# the app uses a similar system to regular programmes, thankfully the site doesn't
-# but it does need protobufs to get more than 20 items...
+import dataclasses
+try:
+ import protobug
+ import yt_dlp_plugins.extractor.radiko_protobufs as pb
+except ImportError:
+ protobug = None
+
class _RadikoPodcastBaseIE(InfoExtractor):
@@ -32,7 +36,7 @@ class _RadikoPodcastBaseIE(InfoExtractor):
or traverse_obj(episode_info, ("channelImageUrl", {url_or_none})),
# so that --download-archive still works if you download from the playlist page
- "webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=episode_info.get("id")),
+ "webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=traverse_obj(episode_info, "id")),
'extractor_key': RadikoPodcastEpisodeIE.ie_key(),
'extractor': 'RadikoPodcastEpisode',
}
@@ -82,29 +86,35 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE):
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"]
+ channel_id = self._match_id(url)
+ webpage = self._download_webpage(url, channel_id)
+ next_data = self._search_nextjs_data(webpage, channel_id)["props"]["pageProps"]
channel_info = next_data["podcastChannel"]
episode_list_response = next_data["listPodcastEpisodesResponse"]
def entries():
+ has_next_page = episode_list_response.get("hasNextPage")
for episode in episode_list_response["episodesList"]:
+ cursor = episode.get("id")
yield self._extract_episode(episode)
- if traverse_obj(episode_list_response, "hasNextPage"):
- self.report_warning(f'Currently this extractor can only extract the latest {len(episode_list_response["episodesList"])} episodes')
-
- # TODO: GRPC/protobuf stuff to get the next page
- # https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes
- # see さらに表示 button on site
-
+ if has_next_page:
+ if protobug:
+ userservice_token = pb.auth_userservice(self)
+ while has_next_page:
+ page = pb.get_podcast_episodes(self, channel_id, userservice_token, cursor)
+ has_next_page = page.hasNextPage
+ for episode in page.episodes:
+ cursor = episode.id
+ yield self._extract_episode(dataclasses.asdict(episode))
+ else:
+ self.report_warning(f'Only extracting the latest {len(episode_list_response["episodesList"])} episodes. Install protobug for more.')
return {
"_type": "playlist",
- "id": video_id,
+ "id": channel_id,
**traverse_obj(channel_info, {
"playlist_title": "title",
"playlist_id": "id",
diff --git a/yt_dlp_plugins/extractor/radiko_protobufs.py b/yt_dlp_plugins/extractor/radiko_protobufs.py
new file mode 100755
index 0000000..ff4531e
--- /dev/null
+++ b/yt_dlp_plugins/extractor/radiko_protobufs.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+try:
+ import protobug
+except ImportError:
+ protobug = None
+
+import base64
+import struct
+
+import random
+import requests
+
+if protobug: # i suppose it works lmao
+
+
+ def add_grpc_header(protobuf_data):
+ compression_flag = 0
+ message_length = len(protobuf_data)
+ header = struct.pack('>BI', compression_flag, message_length)
+ return header + protobuf_data
+
+ def strip_grpc_response(response):
+ return response[5:].rpartition(b"grpc-status:")[0]
+
+ def _download_grpc(self, url_or_request, video_id, response_message, note="Downloading GRPC information", *args, **kwargs):
+ urlh = self._request_webpage(url_or_request, video_id,
+ headers={
+ 'Content-Type': 'application/grpc-web+proto',
+ 'X-User-Agent': 'grpc-web-javascript/0.1',
+ 'X-Grpc-Web': '1',
+ **kwargs.pop('headers')
+ },
+ data=add_grpc_header(protobug.dumps(kwargs.pop('data'))), note=note,
+ *args, **kwargs,
+ )
+ response = urlh.read()
+
+ protobuf = strip_grpc_response(response)
+ if len(protobuf) > 0:
+ return protobug.loads(protobuf, response_message)
+
+
+ @protobug.message
+ class SignUpRequest:
+ lsid: protobug.String = protobug.field(1)
+
+ def sign_up(self):
+ lsid = ''.join(random.choices('0123456789abcdef', k=32))
+
+ signup = _download_grpc(self, "https://api.annex.radiko.jp/radiko.UserService/SignUp",
+ "UserService", None, note="Registering ID", headers={'Origin': 'https://radiko.jp'},
+ data=SignUpRequest(lsid=lsid),
+ )
+ # youre meant to only do the sign up ^ once and then keep your lsid for later
+ # so that you can sign in and get the token for the API to work
+ return lsid
+
+
+ @protobug.message
+ class SignInRequest:
+ lsid: protobug.String = protobug.field(2)
+ area: protobug.String = protobug.field(3)
+
+ @protobug.message
+ class SignInResponse:
+ jwt: protobug.String = protobug.field(1)
+
+
+ def sign_in(self, lsid):
+ sign_in = _download_grpc(self, "https://api.annex.radiko.jp/radiko.UserService/SignIn",
+ "UserService", SignInResponse, note="Getting auth token", headers={'Origin': 'https://radiko.jp'},
+ data=SignInRequest(lsid=lsid, area="JP13"),
+ )
+ return sign_in.jwt
+
+
+ def auth_userservice(self):
+ cachedata = self.cache.load("rajiko", "UserService")
+ if cachedata is not None:
+ lsid = cachedata.get("lsid")
+ else:
+ lsid = sign_up(self)
+ self.cache.store("rajiko", "UserService", {"lsid": lsid})
+ jwt = sign_in(self, lsid)
+ return jwt
+
+
+ @protobug.message
+ class ListPodcastEpisodesRequest:
+ channel_id: protobug.String = protobug.field(1)
+ sort_by_latest: protobug.Bool = protobug.field(2)
+ page_length: protobug.Int32 = protobug.field(4)
+ cursor: protobug.String = protobug.field(5, default=None)
+
+
+ @protobug.message
+ class Audio:
+ revision: protobug.Int32 = protobug.field(1)
+ url: protobug.String = protobug.field(2)
+ fileSize: protobug.Int64 = protobug.field(3)
+ durationSec: protobug.Int64 = protobug.field(4)
+ transcoded: protobug.Bool = protobug.field(5)
+
+ @protobug.message
+ class EpisodeStartAt:
+ seconds: protobug.UInt64 = protobug.field(1)
+ nanos: protobug.UInt64 = protobug.field(2, default=0)
+
+
+ @protobug.message
+ class PodcastEpisode:
+ id: protobug.String = protobug.field(1)
+ workspaceId: protobug.String = protobug.field(2)
+ channelId: protobug.String = protobug.field(3)
+ title: protobug.String = protobug.field(4)
+ description: protobug.String = protobug.field(5)
+
+ audio: Audio = protobug.field(8)
+ channelImageUrl: protobug.String = protobug.field(16)
+ channelTitle: protobug.String = protobug.field(17)
+ channelStationName: protobug.String = protobug.field(18)
+ channelAuthor: protobug.String = protobug.field(19)
+
+ channelThumbnailImageUrl: protobug.String = protobug.field(21)
+ channelStationType: protobug.UInt32 = protobug.field(22)
+ startAt: EpisodeStartAt = protobug.field(27)
+ isEnabled: protobug.Bool = protobug.field(29)
+ hasTranscription: protobug.Bool = protobug.field(32)
+
+ imageUrl: protobug.String = protobug.field(7, default=None)
+ thumbnailImageUrl: protobug.String = protobug.field(20, default=None)
+
+ @protobug.message
+ class ListPodcastEpisodesResponse:
+ episodes: list[PodcastEpisode] = protobug.field(1)
+ hasNextPage: protobug.Bool = protobug.field(2, default=False)
+
+
+ def get_podcast_episodes(self, channel_id, jwt, cursor, page_length=20):
+ # site uses 20 items
+ # cursor is the id of the last episode you've seen in the list
+
+ return _download_grpc(self, 'https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes',
+ channel_id, ListPodcastEpisodesResponse, note="Downloading episode listings",
+ headers={'Authorization': f'Bearer {jwt}'},
+ data=ListPodcastEpisodesRequest(
+ channel_id=channel_id,
+ sort_by_latest=True,
+ page_length=page_length,
+ cursor=cursor,
+ )
+ )