From 42cd450ab7578f723a5590003f751052d0a83ad7 Mon Sep 17 00:00:00 2001 From: garret1317 Date: Mon, 11 Aug 2025 09:10:14 +0100 Subject: Add podcast protobufs + functions to use them --- yt_dlp_plugins/extractor/radiko_protobufs.py | 152 +++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100755 yt_dlp_plugins/extractor/radiko_protobufs.py (limited to 'yt_dlp_plugins/extractor') diff --git a/yt_dlp_plugins/extractor/radiko_protobufs.py b/yt_dlp_plugins/extractor/radiko_protobufs.py new file mode 100755 index 0000000..2336f10 --- /dev/null +++ b/yt_dlp_plugins/extractor/radiko_protobufs.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +try: + import protobug +except ImportError: + protobug = None + +import base64 +import struct + +import random +import requests + +if protobug: # i suppose it works lmao + + + def add_grpc_header(protobuf_data): + compression_flag = 0 + message_length = len(protobuf_data) + header = struct.pack('>BI', compression_flag, message_length) + return header + protobuf_data + + def strip_grpc_response(response): + return response[5:].rpartition(b"grpc-status:")[0] + + def _download_grpc(self, url_or_request, video_id, response_message, note="Downloading GRPC information", *args, **kwargs): + urlh = self._request_webpage(url_or_request, video_id, + headers={ + 'Content-Type': 'application/grpc-web+proto', + 'X-User-Agent': 'grpc-web-javascript/0.1', + 'X-Grpc-Web': '1', + **kwargs.pop('headers') + }, + data=add_grpc_header(protobug.dumps(kwargs.pop('data'))), note=note, + *args, **kwargs, + ) + response = urlh.read() + + protobuf = strip_grpc_response(response) + if len(protobuf) > 0: + return protobug.loads(protobuf, response_message) + + + @protobug.message + class SignUpRequest: + lsid: protobug.String = protobug.field(1) + + def sign_up(self): + lsid = ''.join(random.choices('0123456789abcdef', k=32)) + + signup = _download_grpc(self, "https://api.annex.radiko.jp/radiko.UserService/SignUp", + "UserService", None, note="Registering ID", headers={'Origin': 'https://radiko.jp'}, + data=SignUpRequest(lsid=lsid), + ) + # youre meant to only do the sign up ^ once and then keep your lsid for later + # so that you can sign in and get the token for the API to work + return lsid + + + @protobug.message + class SignInRequest: + lsid: protobug.String = protobug.field(2) + area: protobug.String = protobug.field(3) + + @protobug.message + class SignInResponse: + jwt: protobug.String = protobug.field(1) + + + def sign_in(self, lsid): + sign_in = _download_grpc(self, "https://api.annex.radiko.jp/radiko.UserService/SignIn", + "UserService", SignInResponse, note="Getting auth token", headers={'Origin': 'https://radiko.jp'}, + data=SignInRequest(lsid=lsid, area="JP13"), + ) + return sign_in.jwt + + + def auth_userservice(self): + cachedata = self.cache.load("rajiko", "UserService") + if cachedata is not None: + lsid = cachedata.get("lsid") + else: + lsid = sign_up(self) + self.cache.store("rajiko", "UserService", {"lsid": lsid}) + jwt = sign_in(self, lsid) + return jwt + + + @protobug.message + class ListPodcastEpisodesRequest: + channel_id: protobug.String = protobug.field(1) + dontknow: protobug.Int32 = protobug.field(2) + page_length: protobug.Int32 = protobug.field(4) + cursor: protobug.String = protobug.field(5, default=None) + + + @protobug.message + class Audio: + revision: protobug.Int32 = protobug.field(1) + url: protobug.String = protobug.field(2) + fileSize: protobug.Int64 = protobug.field(3) + durationSec: protobug.Int64 = protobug.field(4) + transcoded: protobug.Bool = protobug.field(5) + + @protobug.message + class EpisodeStartAt: + seconds: protobug.UInt64 = protobug.field(1) + nanos: protobug.UInt64 = protobug.field(2, default=0) + + + @protobug.message + class PodcastEpisode: + id: protobug.String = protobug.field(1) + workspaceId: protobug.String = protobug.field(2) + channelId: protobug.String = protobug.field(3) + title: protobug.String = protobug.field(4) + description: protobug.String = protobug.field(5) + + audio: Audio = protobug.field(8) + channelImageUrl: protobug.String = protobug.field(16) + channelTitle: protobug.String = protobug.field(17) + channelStationName: protobug.String = protobug.field(18) + channelAuthor: protobug.String = protobug.field(19) + + channelThumbnailImageUrl: protobug.String = protobug.field(21) + channelStationType: protobug.UInt32 = protobug.field(22) + startAt: EpisodeStartAt = protobug.field(27) + isEnabled: protobug.Bool = protobug.field(29) + hasTranscription: protobug.Bool = protobug.field(32) + + imageUrl: protobug.String = protobug.field(7, default=None) + thumbnailImageUrl: protobug.String = protobug.field(20, default=None) + + @protobug.message + class ListPodcastEpisodesResponse: + episodes: list[PodcastEpisode] = protobug.field(1) + hasNextPage: protobug.Bool = protobug.field(2, default=False) + + + def get_podcast_episodes(self, channel_id, jwt, cursor, page_length=20): + # site uses 20 items + # cursor is the id of the last episode you've seen in the list + + return _download_grpc(self, 'https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes', + channel_id, ListPodcastEpisodesResponse, note="Downloading episode listings", + headers={'Authorization': f'Bearer {jwt}'}, + data=ListPodcastEpisodesRequest( + channel_id=channel_id, + dontknow=1, + page_length=page_length, + cursor=cursor, + ) + ) -- cgit v1.2.3-70-g09d2 From ac94bad6ed14f32adfeceac35cc60d39680508dd Mon Sep 17 00:00:00 2001 From: garret1317 Date: Mon, 11 Aug 2025 09:22:50 +0100 Subject: Implement multi-page podcasts with protobug needs core change in upstream to work (traverse_obj doesnt work with dataclasses) --- yt_dlp_plugins/extractor/radiko_podcast.py | 39 ++++++++++++++++++------------ 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'yt_dlp_plugins/extractor') diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py index af66f6a..904bb62 100644 --- a/yt_dlp_plugins/extractor/radiko_podcast.py +++ b/yt_dlp_plugins/extractor/radiko_podcast.py @@ -6,9 +6,12 @@ from yt_dlp.utils import ( str_or_none, ) -# nice simple one for a change... -# the app uses a similar system to regular programmes, thankfully the site doesn't -# but it does need protobufs to get more than 20 items... +try: + import protobug + import yt_dlp_plugins.extractor.radiko_protobufs as pb +except ImportError: + protobug = None + class _RadikoPodcastBaseIE(InfoExtractor): @@ -32,7 +35,7 @@ class _RadikoPodcastBaseIE(InfoExtractor): or traverse_obj(episode_info, ("channelImageUrl", {url_or_none})), # so that --download-archive still works if you download from the playlist page - "webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=episode_info.get("id")), + "webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=traverse_obj(episode_info, "id")), 'extractor_key': RadikoPodcastEpisodeIE.ie_key(), 'extractor': 'RadikoPodcastEpisode', } @@ -82,29 +85,35 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE): }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"] + channel_id = self._match_id(url) + webpage = self._download_webpage(url, channel_id) + next_data = self._search_nextjs_data(webpage, channel_id)["props"]["pageProps"] channel_info = next_data["podcastChannel"] episode_list_response = next_data["listPodcastEpisodesResponse"] def entries(): + has_next_page = episode_list_response.get("hasNextPage") for episode in episode_list_response["episodesList"]: + cursor = episode.get("id") yield self._extract_episode(episode) - if traverse_obj(episode_list_response, "hasNextPage"): - self.report_warning(f'Currently this extractor can only extract the latest {len(episode_list_response["episodesList"])} episodes') - - # TODO: GRPC/protobuf stuff to get the next page - # https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes - # see さらに表示 button on site - + if has_next_page: + if protobug: + userservice_token = pb.auth_userservice(self) + while has_next_page: + page = pb.get_podcast_episodes(self, channel_id, userservice_token, cursor) + has_next_page = page.hasNextPage + for episode in page.episodes: + cursor = episode.id + yield self._extract_episode(episode) + else: + self.report_warning(f'Only extracting the latest {len(episode_list_response["episodesList"])} episodes. Install protobug for more.') return { "_type": "playlist", - "id": video_id, + "id": channel_id, **traverse_obj(channel_info, { "playlist_title": "title", "playlist_id": "id", -- cgit v1.2.3-70-g09d2 From d5f824093b0748889916a1ba820398aecaa184c8 Mon Sep 17 00:00:00 2001 From: garret1317 Date: Wed, 13 Aug 2025 02:49:46 +0100 Subject: convert protobug obj to dict, for traverse_obj --- yt_dlp_plugins/extractor/radiko_podcast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'yt_dlp_plugins/extractor') diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py index 904bb62..84bc288 100644 --- a/yt_dlp_plugins/extractor/radiko_podcast.py +++ b/yt_dlp_plugins/extractor/radiko_podcast.py @@ -6,6 +6,7 @@ from yt_dlp.utils import ( str_or_none, ) +import dataclasses try: import protobug import yt_dlp_plugins.extractor.radiko_protobufs as pb @@ -107,7 +108,7 @@ class RadikoPodcastChannelIE(_RadikoPodcastBaseIE): has_next_page = page.hasNextPage for episode in page.episodes: cursor = episode.id - yield self._extract_episode(episode) + yield self._extract_episode(dataclasses.asdict(episode)) else: self.report_warning(f'Only extracting the latest {len(episode_list_response["episodesList"])} episodes. Install protobug for more.') -- cgit v1.2.3-70-g09d2 From 9e91cb5ee32a47eb05dc2d3885e13d274cdadd03 Mon Sep 17 00:00:00 2001 From: garret1317 Date: Wed, 13 Aug 2025 07:29:19 +0100 Subject: ListPodcastEpisodesRequest: dontknow -> sort_by_latest --- yt_dlp_plugins/extractor/radiko_protobufs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'yt_dlp_plugins/extractor') diff --git a/yt_dlp_plugins/extractor/radiko_protobufs.py b/yt_dlp_plugins/extractor/radiko_protobufs.py index 2336f10..ff4531e 100755 --- a/yt_dlp_plugins/extractor/radiko_protobufs.py +++ b/yt_dlp_plugins/extractor/radiko_protobufs.py @@ -88,7 +88,7 @@ if protobug: # i suppose it works lmao @protobug.message class ListPodcastEpisodesRequest: channel_id: protobug.String = protobug.field(1) - dontknow: protobug.Int32 = protobug.field(2) + sort_by_latest: protobug.Bool = protobug.field(2) page_length: protobug.Int32 = protobug.field(4) cursor: protobug.String = protobug.field(5, default=None) @@ -145,7 +145,7 @@ if protobug: # i suppose it works lmao headers={'Authorization': f'Bearer {jwt}'}, data=ListPodcastEpisodesRequest( channel_id=channel_id, - dontknow=1, + sort_by_latest=True, page_length=page_length, cursor=cursor, ) -- cgit v1.2.3-70-g09d2