aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/yt_dlp_plugins
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp_plugins')
-rw-r--r--yt_dlp_plugins/extractor/radiko.py399
-rw-r--r--yt_dlp_plugins/extractor/radiko_dependencies.py29
-rw-r--r--yt_dlp_plugins/extractor/radiko_hacks.py65
-rw-r--r--yt_dlp_plugins/extractor/radiko_podcast.py175
-rwxr-xr-xyt_dlp_plugins/extractor/radiko_protobufs.py146
5 files changed, 662 insertions, 152 deletions
diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py
index c6cea37..3fd19d9 100644
--- a/yt_dlp_plugins/extractor/radiko.py
+++ b/yt_dlp_plugins/extractor/radiko.py
@@ -6,18 +6,23 @@ import urllib.parse
import pkgutil
from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.networking.exceptions import HTTPError
from yt_dlp.utils import (
+ ExtractorError,
OnDemandPagedList,
clean_html,
int_or_none,
join_nonempty,
parse_qs,
traverse_obj,
+ urlencode_postdata,
url_or_none,
update_url_query,
)
+from yt_dlp_plugins.extractor.radiko_podcast import RadikoPodcastSearchIE
import yt_dlp_plugins.extractor.radiko_time as rtime
+import yt_dlp_plugins.extractor.radiko_hacks as hacks
class _RadikoBaseIE(InfoExtractor):
@@ -82,12 +87,15 @@ class _RadikoBaseIE(InfoExtractor):
_APP_VERSIONS = ["7.5.0", "7.4.17", "7.4.16", "7.4.15", "7.4.14", "7.4.13", "7.4.12", "7.4.11", "7.4.10", "7.4.9", "7.4.8", "7.4.7", "7.4.6", "7.4.5", "7.4.4", "7.4.3", "7.4.2", "7.4.1", "7.4.0", "7.3.8", "7.3.7", "7.3.6", "7.3.1", "7.3.0", "7.2.11", "7.2.10"]
_DELIVERED_ONDEMAND = ('radiko.jp',)
- _DOESNT_WORK_WITH_FFMPEG = ('tf-f-rpaa-radiko.smartstream.ne.jp', 'si-f-radiko.smartstream.ne.jp')
+ _DOESNT_WORK_WITH_FFMPEG = ('tf-f-rpaa-radiko.smartstream.ne.jp', 'si-f-radiko.smartstream.ne.jp', 'alliance-stream-radiko.smartstream.ne.jp')
+ _AD_INSERTION = ('si-f-radiko.smartstream.ne.jp', )
+
+ _has_tf30 = None
def _index_regions(self):
region_data = {}
- tree = self._download_xml("https://radiko.jp/v3/station/region/full.xml", None, note="Indexing regions")
+ tree = self._download_xml("https://radiko.jp/v3/station/region/full.xml", None, note="Indexing station regions")
for stations in tree:
for station in stations:
area = station.find("area_id").text
@@ -178,16 +186,21 @@ class _RadikoBaseIE(InfoExtractor):
"X-Radiko-AuthToken": auth_token,
},
"user": auth2_headers["X-Radiko-User"],
+ "has_tf30": self._has_tf30,
}
if not region_mismatch:
self.cache.store("rajiko", station_region, auth_data)
return auth_data
- def _auth(self, station_region):
+ def _auth(self, station_region, need_tf30=False):
cachedata = self.cache.load("rajiko", station_region)
self.write_debug(cachedata)
if cachedata is not None:
+ if need_tf30 and not cachedata.get("has_tf30"):
+ self.write_debug("Cached token doesn't have timefree 30, getting a new one")
+ return self._negotiate_token(station_region)
+
auth_headers = cachedata.get("token")
response = self._download_webpage("https://radiko.jp/v2/api/auth_check", station_region, "Checking cached token",
headers=auth_headers, expected_status=401)
@@ -205,6 +218,17 @@ class _RadikoBaseIE(InfoExtractor):
station = region.find(f'.//station/id[.="{station_id}"]/..') # a <station> with an <id> of our station_id
station_name = station.find("name").text
station_url = url_or_none(station.find("href").text)
+
+ thumbnails = []
+ for logo in station.findall("logo"):
+ thumbnails.append({
+ "url": logo.text,
+ **traverse_obj(logo.attrib, ({
+ "width": ("width", {int_or_none}),
+ "height": ("height", {int_or_none}),
+ }))
+ })
+
meta = {
"id": station_id,
"title": station_name,
@@ -218,7 +242,7 @@ class _RadikoBaseIE(InfoExtractor):
"uploader_id": station_id,
"uploader_url": station_url,
- "thumbnail": url_or_none(station.find("banner").text),
+ "thumbnails": thumbnails,
}
self.cache.store("rajiko", station_id, {
"expiry": (now + datetime.timedelta(days=1)).timestamp(),
@@ -229,8 +253,15 @@ class _RadikoBaseIE(InfoExtractor):
self.to_screen(f"{station_id}: Using cached station metadata")
return cachedata.get("meta")
- def _get_station_formats(self, station, timefree, auth_data, start_at=None, end_at=None):
- device = self._configuration_arg('device', ['aSmartPhone7a'], casesense=True, ie_key="rajiko")[0] # aSmartPhone7a formats = always happy path
+ def _get_station_formats(self, station, timefree, auth_data, start_at=None, end_at=None, use_pc_html5=False):
+ config_device = traverse_obj(self._configuration_arg('device', casesense=True, ie_key="rajiko"), 0)
+
+ if not use_pc_html5:
+ device = config_device or "aSmartPhone7a" # still has the radiko.jp on-demand one for timefree
+ else:
+ device = config_device or "pc_html5" # the on-demand one doesnt work with timefree30 stuff sadly
+ # so just use pc_html5 which has everything
+
url_data = self._download_xml(f"https://radiko.jp/v3/station/stream/{device}/{station}.xml",
station, note=f"Downloading {device} stream information")
@@ -238,8 +269,11 @@ class _RadikoBaseIE(InfoExtractor):
formats = []
timefree_int = 1 if timefree else 0
+ do_blacklist_streams = not len(self._configuration_arg("no_stream_blacklist", ie_key="rajiko")) > 0
+ do_as_live_chunks = not len(self._configuration_arg("no_as_live_chunks", ie_key="rajiko")) > 0
for element in url_data.findall(f".//url[@timefree='{timefree_int}'][@areafree='0']/playlist_create_url"):
# find <url>s with matching timefree and no areafree, then get their <playlist_create_url>
+ # we don't want areafree here because we should always be in-region
url = element.text
if url in seen_urls: # there are always dupes, even with ^ specific filtering
continue
@@ -249,7 +283,7 @@ class _RadikoBaseIE(InfoExtractor):
"station_id": station,
"l": "15", # l = length, ie how many seconds in the live m3u8 (max 300)
"lsid": auth_data["user"],
- "type": "b", # it is a mystery
+ "type": "b", # a/b = in-region, c = areafree
})
if timefree:
@@ -267,20 +301,56 @@ class _RadikoBaseIE(InfoExtractor):
delivered_live = True
preference = -1
entry_protocol = 'm3u8'
+ format_note=[]
- if domain in self._DOESNT_WORK_WITH_FFMPEG:
+ if timefree and domain in self._DOESNT_WORK_WITH_FFMPEG and do_blacklist_streams:
+ # TODO: remove this completely
+ # https://github.com/garret1317/yt-dlp-rajiko/issues/29
self.write_debug(f"skipping {domain} (known not working)")
continue
if domain in self._DELIVERED_ONDEMAND:
# override the defaults for delivered as on-demand
delivered_live = False
- preference = 1
+ preference += 2
entry_protocol = None
+ if domain in self._AD_INSERTION:
+ preference -= 3
+ format_note.append("Ad insertion")
+
+
+ auth_headers = auth_data["token"]
+
+ if delivered_live and timefree and do_as_live_chunks:
+
+ chunks_playlist = hacks._generate_as_live_playlist(
+ self, playlist_url, start_at, end_at, domain, auth_headers
+ )
+
+ m3u8_formats = [{
+ "format_id": join_nonempty(domain, "chunked"),
+ "hls_media_playlist_data": chunks_playlist,
+ "preference": preference,
+ "ext": "m4a",
+ "vcodec": "none",
+
+ # fallback to live for ffmpeg etc
+ "url": playlist_url,
+ "http_headers": auth_headers,
+ }]
+ format_note.append("Chunked")
+ else:
+
+ m3u8_formats = self._extract_m3u8_formats(
+ playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_headers,
+ live=delivered_live, preference=preference, entry_protocol=entry_protocol,
+ note=f"Downloading m3u8 information from {domain}")
+
+ for f in m3u8_formats:
+ # ffmpeg sends a Range header which some streams reject. here we disable that (and also some icecast header as well)
+ f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']}
+ f['format_note'] = ", ".join(format_note)
+ formats.append(f)
- formats += self._extract_m3u8_formats(
- playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_data["token"],
- live=delivered_live, preference=preference, entry_protocol=entry_protocol,
- note=f"Downloading m3u8 information from {domain}")
return formats
@@ -299,7 +369,7 @@ class RadikoLiveIE(_RadikoBaseIE):
"id": "FMT",
"title": "re:^TOKYO FM.+$",
"alt_title": "TOKYO FM",
- "thumbnail": "https://radiko.jp/res/banner/FMT/20220512162447.jpg",
+ "thumbnail": "https://radiko.jp/v2/static/station/logo/FMT/lrtrim/688x160.png",
"channel": "TOKYO FM",
"channel_id": "FMT",
@@ -319,7 +389,7 @@ class RadikoLiveIE(_RadikoBaseIE):
"id": "NORTHWAVE",
"title": "re:^FM NORTH WAVE.+$",
"alt_title": "FM NORTH WAVE",
- "thumbnail": "https://radiko.jp/res/banner/NORTHWAVE/20150731161543.png",
+ "thumbnail": "https://radiko.jp/v2/static/station/logo/NORTHWAVE/lrtrim/688x160.png",
"uploader": "FM NORTH WAVE",
"uploader_url": "https://www.fmnorth.co.jp/",
@@ -340,7 +410,7 @@ class RadikoLiveIE(_RadikoBaseIE):
"id": "RN1",
"title": "re:^ラジオNIKKEI第1.+$",
"alt_title": "RADIONIKKEI",
- "thumbnail": "https://radiko.jp/res/banner/RN1/20120802154152.png",
+ "thumbnail": "https://radiko.jp/v2/static/station/logo/RN1/lrtrim/688x160.png",
"channel": "ラジオNIKKEI第1",
"channel_url": "http://www.radionikkei.jp/",
@@ -357,7 +427,7 @@ class RadikoLiveIE(_RadikoBaseIE):
region = self._get_station_region(station)
station_meta = self._get_station_meta(region, station)
auth_data = self._auth(region)
- formats = self._get_station_formats(station, False, auth_data)
+ formats = self._get_station_formats(station, False, auth_data, use_pc_html5=True)
return {
"is_live": True,
@@ -368,71 +438,36 @@ class RadikoLiveIE(_RadikoBaseIE):
class RadikoTimeFreeIE(_RadikoBaseIE):
+ _NETRC_MACHINE = "rajiko"
_VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-_]+)/(?P<id>\d+)"
- _TESTS = [{
- "url": "https://radiko.jp/#!/ts/INT/20240809230000",
- "info_dict": {
- "live_status": "was_live",
- "ext": "m4a",
- "id": "INT-20240809230000",
-
- "title": "TOKYO MOON",
- "series": "Tokyo Moon",
- "description": "md5:20e68d2f400a391fa34d4e7c8c702cb8",
- "chapters": "count:14",
- "thumbnail": "https://program-static.cf.radiko.jp/ehwtw6mcvy.jpg",
-
- "upload_date": "20240809",
- "timestamp": 1723212000.0,
- "release_date": "20240809",
- "release_timestamp": 1723215600.0,
- "duration": 3600,
-
- "channel": "interfm",
- "channel_id": "INT",
- "channel_url": "https://www.interfm.co.jp/",
- "uploader": "interfm",
- "uploader_id": "INT",
- "uploader_url": "https://www.interfm.co.jp/",
-
- "cast": ["松浦\u3000俊夫"],
- "tags": ["松浦俊夫"],
- },
- }, {
- # late-night/early-morning show to test broadcast day checking
- "url": "https://radiko.jp/#!/ts/TBS/20240810033000",
- "info_dict": {
- "live_status": "was_live",
- "ext": "m4a",
- "id": "TBS-20240810033000",
-
- "title": "CITY CHILL CLUB",
- "series": "CITY CHILL CLUB",
- "description": "md5:3fba2c1125059bed27247c0be90e58fa",
- "chapters": "count:22",
- "thumbnail": "https://program-static.cf.radiko.jp/ku7t4ztnaq.jpg",
-
- "upload_date": "20240809",
- "timestamp": 1723228200.0,
- "release_date": "20240809",
- "release_timestamp": 1723233600.0,
- "duration": 5400,
-
- "channel": "TBSラジオ",
- "channel_url": "https://www.tbsradio.jp/",
- "channel_id": "TBS",
- "uploader": "TBSラジオ",
- "uploader_url": "https://www.tbsradio.jp/",
- "uploader_id": "TBS",
-
- "tags": ["CCC905", "音楽との出会いが楽しめる", "人気アーティストトーク", "音楽プロデューサー出演", "ドライブ中におすすめ", "寝る前におすすめ", "学生におすすめ"],
- "cast": ["PES"],
- },
- }]
+ # TESTS use a custom-ish script that updates the airdates automatically, see contrib/test_extractors.py
+
+ def _perform_login(self, username, password):
+ try:
+ login_info = self._download_json('https://radiko.jp/ap/member/webapi/member/login', None, note='Logging in',
+ data=urlencode_postdata({'mail': username, 'pass': password}))
+ self._has_tf30 = '2' in login_info.get('privileges')
+ # areafree = 1, timefree30 = 2, double plan = both
+ self.write_debug({**login_info, "radiko_session": "PRIVATE", "member_ukey": "PRIVATE"})
+ except ExtractorError as error:
+ if isinstance(error.cause, HTTPError) and error.cause.status == 401:
+ raise ExtractorError('Invalid username and/or password', expected=True)
+ raise
+
+ def _check_tf30(self):
+ if self._has_tf30 is not None:
+ return self._has_tf30
+ if self._get_cookies('https://radiko.jp').get('radiko_session') is None:
+ return
+ account_info = self._download_json('https://radiko.jp/ap/member/webapi/v2/member/login/check',
+ None, note='Checking account status from cookies', expected_status=400)
+ self.write_debug({**account_info, "user_key": "PRIVATE"})
+ self._has_tf30 = account_info.get('timefreeplus') == '1'
+ return self._has_tf30
def _get_programme_meta(self, station_id, url_time):
day = url_time.broadcast_day_string()
- meta = self._download_json(f"https://radiko.jp/v4/program/station/date/{day}/{station_id}.json", station_id,
+ meta = self._download_json(f"https://api.radiko.jp/program/v4/date/{day}/station/{station_id}.json", station_id,
note="Downloading programme data")
programmes = traverse_obj(meta, ("stations", lambda _, v: v["station_id"] == station_id,
"programs", "program"), get_all=False)
@@ -467,10 +502,12 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
"start_time_gte": start.isoformat(),
"end_time_lt": end.isoformat(),
})
- data = self._download_json(api_url, video_id, note="Downloading tracklist").get("data")
+ data_json = self._download_json(
+ api_url, video_id, note="Downloading tracklist", errnote="Downloading tracklist", fatal=False
+ )
chapters = []
- for track in data:
+ for track in traverse_obj(data_json, "data") or []:
artist = traverse_obj(track, ("artist", "name")) or track.get("artist_name")
chapters.append({
"title": join_nonempty(artist, track.get("title"), delim=" - "),
@@ -493,11 +530,11 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
end = times[1]
now = datetime.datetime.now(tz=rtime.JST)
expiry_free, expiry_tf30 = end.expiry()
- have_tf30 = False
if expiry_tf30 < now:
self.raise_no_formats("Programme is no longer available.", video_id=meta["id"], expected=True)
- elif not have_tf30 and expiry_free < now:
+ need_tf30 = expiry_free < now
+ if need_tf30 and not self._check_tf30():
self.raise_login_required("Programme is only available with a Timefree 30 subscription")
elif start > now:
self.raise_no_formats("Programme has not aired yet.", video_id=meta["id"], expected=True)
@@ -508,13 +545,19 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
region = self._get_station_region(station)
station_meta = self._get_station_meta(region, station)
- chapters = self._extract_chapters(station, start, end, video_id=meta["id"])
- auth_data = self._auth(region)
- formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end)
+ if live_status == "was_live":
+ chapters = self._extract_chapters(station, start, end, video_id=meta["id"])
+ auth_data = self._auth(region, need_tf30=need_tf30)
+ formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end, use_pc_html5=need_tf30)
+ else:
+ chapters = None
+ formats = None
return {
**station_meta,
- "alt_title": None,
+ "alt_title": None, # override from station metadata
+ "thumbnails": None,
+
**meta,
"chapters": chapters,
"formats": formats,
@@ -524,7 +567,7 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
class RadikoSearchIE(InfoExtractor):
- _VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/search/(?:timeshift|live|history)\?"
+ _VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/search/(?:radio/)?(?:timeshift|live|history)\?"
_TESTS = [{
# timefree, specific area
"url": "https://radiko.jp/#!/search/live?key=city%20chill%20club&filter=past&start_day=&end_day=&region_id=&area_id=JP13&cul_area_id=JP13&page_idx=0",
@@ -549,26 +592,70 @@ class RadikoSearchIE(InfoExtractor):
"id": "ニュース-all-all",
"title": "ニュース"
},
+ 'expected_warnings': ['Skipping podcasts. If you really want EVERY EPISODE of EVERY RESULT, set your search filter to Podcasts only.'],
}]
def _strip_date(self, date):
+ # lazy way of making a timestring (from eg 2025-05-20 01:00:00)
return date.replace(" ", "").replace("-", "").replace(":", "")
def _pagefunc(self, url, idx):
url = update_url_query(url, {"page_idx": idx})
data = self._download_json(url, None, note=f"Downloading page {idx+1}")
- return [self.url_result("https://radiko.jp/#!/ts/{station}/{time}".format(
- station = i.get("station_id"), time = self._strip_date(i.get("start_time"))))
- for i in data.get("data")]
+ results = []
+ for r in data.get("data"):
+ station = r.get("station_id")
+ timestring = self._strip_date(r.get("start_time"))
+
+ results.append(
+ self.url_result(
+ f"https://radiko.jp/#!/ts/{station}/{timestring}",
+ id=join_nonempty(station, timestring),
+ ie=RadikoTimeFreeIE,
+ )
+ )
+ return results
def _real_extract(self, url):
- url = url.replace("/#!/", "/!/", 1)
# urllib.parse interprets the path as just one giant fragment because of the #, so we hack it away
+ url = url.replace("/#!/", "/!/", 1)
queries = parse_qs(url)
+ key = traverse_obj(queries, ("key", 0))
- search_url = update_url_query("https://radiko.jp/v3/api/program/search", {
+ # site used to use "cul_area_id" in the search url, now it uses "cur_area_id" (with an r)
+ # and outright rejects the old one with HTTP Error 415: Unsupported Media Type
+ if queries.get("cul_area_id"):
+ queries["cur_area_id"] = queries.pop("cul_area_id")
+
+ if queries.get("filter"):
+ filter_set = set(queries["filter"][0].split("|"))
+ del queries["filter"]
+ else:
+ filter_set = {"future", "past", "channel"}
+
+ if filter_set == {"channel"}:
+ podcast_search_url = update_url_query(
+ "https://radiko.jp/!/search/podcast/live", {"key": key}
+ ).replace("!", "#!", 1) # same shit with urllib.parse
+ return self.url_result(podcast_search_url, ie=RadikoPodcastSearchIE)
+
+ if "channel" in filter_set:
+ self.report_warning("Skipping podcasts. If you really want EVERY EPISODE of EVERY RESULT, set your search filter to Podcasts only.")
+ filter_set.discard("channel")
+
+ if filter_set == {"future", "past"}:
+ filter_str = ""
+ else:
+ filter_str = "|".join(filter_set) # there should be only one filter now, so this should be the same as filter_set[0]
+ # but if there's more than one, then we should at least try to pass it through as-is, in the hope that it works
+ if len(filter_set) != 1:
+ # but also kick up a stink about it so it's clear it probably won't
+ self.report_warning("Your search has an unknown combination of filters, so this request will probably fail!")
+
+ search_url = update_url_query("https://api.annex-cf.radiko.jp/v1/programs/legacy/perl/program/search", {
**queries,
+ "filter": filter_str,
"uid": "".join(random.choices("0123456789abcdef", k=32)),
"app_id": "pc",
"row_limit": 50, # higher row_limit = more results = less requests = more good
@@ -576,60 +663,32 @@ class RadikoSearchIE(InfoExtractor):
results = OnDemandPagedList(lambda idx: self._pagefunc(search_url, idx), 50)
- key = traverse_obj(queries, ("key", 0))
day = traverse_obj(queries, ("start_day", 0)) or "all"
region = traverse_obj(queries, ("region_id", 0)) or traverse_obj(queries, ("area_id", 0))
- status_filter = traverse_obj(queries, ("filter", 0)) or "all"
+ status_filter = filter_str or "all"
playlist_id = join_nonempty(key, status_filter, day, region)
return {
"_type": "playlist",
- "title": traverse_obj(queries, ("key", 0)),
+ "title": key,
"id": playlist_id,
"entries": results,
}
+
class RadikoShareIE(InfoExtractor):
_VALID_URL = r"https?://(?:www\.)?radiko\.jp/share/"
- _TESTS = [{
- # 29-hour time -> 24-hour time
- "url": "http://radiko.jp/share/?sid=FMT&t=20240802240000",
- "info_dict": {
- "live_status": "was_live",
- "ext": "m4a",
- "id": "FMT-20240803000000", # the time given (24:00) works out to 00:00 the next day
-
- "title": "JET STREAM",
- "series": "JET STREAM",
- "description": "md5:c1a2172036ebb7a54eeafb47e0a08a50",
- "chapters": "count:9",
- "thumbnail": "https://program-static.cf.radiko.jp/greinlrspi.jpg",
-
- "upload_date": "20240802",
- "timestamp": 1722610800.0,
- "release_date": "20240802",
- "release_timestamp": 1722614100.0,
- "duration": 3300,
-
- "channel": "TOKYO FM",
- "channel_id": "FMT",
- "channel_url": "https://www.tfm.co.jp/",
- "uploader": "TOKYO FM",
- "uploader_id": "FMT",
- "uploader_url": "https://www.tfm.co.jp/",
-
- "cast": ["福山雅治"],
- "tags": ["福山雅治", "夜間飛行", "音楽との出会いが楽しめる", "朗読を楽しめる", "寝る前に聴きたい"],
- }
- }]
def _real_extract(self, url):
queries = parse_qs(url)
station = traverse_obj(queries, ("sid", 0))
time = traverse_obj(queries, ("t", 0))
time = rtime.RadikoShareTime(time).timestring()
- return self.url_result(f"https://radiko.jp/#!/ts/{station}/{time}", RadikoTimeFreeIE)
+ return self.url_result(
+ f"https://radiko.jp/#!/ts/{station}/{time}", RadikoTimeFreeIE,
+ id=join_nonempty(station, time)
+ )
class RadikoStationButtonIE(InfoExtractor):
@@ -642,19 +701,9 @@ class RadikoStationButtonIE(InfoExtractor):
"info_dict": {
"ext": "m4a",
'live_status': 'is_live',
-
"id": "QRR",
- "title": "re:^文化放送.+$",
- 'alt_title': 'JOQR BUNKA HOSO',
- 'thumbnail': 'https://radiko.jp/res/banner/QRR/20240423144553.png',
- 'channel': '文化放送',
- 'channel_id': 'QRR',
- 'channel_url': 'http://www.joqr.co.jp/',
- 'uploader': '文化放送',
- 'uploader_id': 'QRR',
- 'uploader_url': 'http://www.joqr.co.jp/',
-
- }
+ },
+ 'only_matching': True,
}]
_WEBPAGE_TESTS = [{
@@ -665,7 +714,7 @@ class RadikoStationButtonIE(InfoExtractor):
'id': 'CCL',
"title": "re:^FM COCOLO.+$",
'alt_title': 'FM COCOLO',
- 'thumbnail': 'https://radiko.jp/res/banner/CCL/20161014144826.png',
+ 'thumbnail': 'https://radiko.jp/v2/static/station/logo/CCL/lrtrim/688x160.png',
'channel': 'FM COCOLO',
'channel_id': 'CCL',
@@ -694,7 +743,7 @@ class RadikoPersonIE(InfoExtractor):
},{
"url": "https://radiko.jp/persons/11421",
"params": {'extractor_args': {'rajiko': {'key_station_only': ['']}}},
- "playlist_count": 1,
+ "playlist_mincount": 1,
"info_dict": {
"id": "person-11421",
},
@@ -720,19 +769,65 @@ class RadikoPersonIE(InfoExtractor):
def entries():
key_station_only = len(self._configuration_arg("key_station_only", ie_key="rajiko")) > 0
for episode in person_api.get("data"):
- if key_station_only and episode.get("key_station_id") != episode.get("station_id"):
- continue
- share_url = traverse_obj(episode, ("radiko_url", ("pc", "sp", "android", "ios", "app"),
- {url_or_none}), get_all=False)
- # they're all identical share links at the moment (5th aug 2024) but they might not be in the future
+ station = episode.get("station_id")
+ if key_station_only and episode.get("key_station_id") != station:
+ continue
- # predictions:
- # pc will probably stay the same
- # don't know what sp is, possibly "SmartPhone"?, anyway seems reasonably generic
- # android is easier for me to reverse-engineer than ios (no ithing)
- # i assume "app" would be some internal tell-it-to-do-something link, not a regular web link
+ start = episode.get("start_at")
+ timestring = rtime.RadikoTime.fromisoformat(start).timestring()
- yield self.url_result(share_url, ie=RadikoShareIE, video_title=episode.get("title"))
+ timefree_id = join_nonempty(station, timestring)
+ timefree_url = f"https://radiko.jp/#!/ts/{station}/{timestring}"
+ yield self.url_result(timefree_url, ie=RadikoTimeFreeIE, video_id=timefree_id)
return self.playlist_result(entries(), playlist_id=join_nonempty("person", person_id))
+
+
+class RadikoRSeasonsIE(InfoExtractor):
+ _VALID_URL = r"https?://(?:www\.)?radiko\.jp/(?:mobile/)?r_seasons/(?P<id>\d+$)"
+ _TESTS = [{
+ "url": "https://radiko.jp/r_seasons/10012302",
+ "playlist_mincount": 4,
+ "info_dict": {
+ "id": '10012302',
+ "title": '山下達郎の楽天カード サンデー・ソングブック',
+ 'thumbnail': 'https://program-static.cf.radiko.jp/935a87fc-4a52-48e5-9468-7b2ef9448d9f.jpeg',
+ }
+ }, {
+ "url": "https://radiko.jp/r_seasons/10002831",
+ "playlist_mincount": 4,
+ "info_dict": {
+ "id": "10002831",
+ "title": "Tokyo Moon",
+ 'description': 'md5:3eef525003bbe96ccf33ec647c43d904',
+ 'thumbnail': 'https://program-static.cf.radiko.jp/0368ee85-5d5f-41c9-8ee1-6c1035d87b3f.jpeg',
+ }
+ }]
+
+ def _real_extract(self, url):
+ season_id = self._match_id(url)
+ html = self._download_webpage(url, season_id)
+ pageProps = self._search_nextjs_data(html, season_id)["props"]["pageProps"]
+ season_id = traverse_obj(pageProps, ("rSeason", "id")) or season_id
+
+ def entries():
+ for episode in pageProps.get("pastPrograms"):
+ station = traverse_obj(episode, ("stationId"))
+ start = traverse_obj(episode, ("startAt", "seconds"))
+ timestring = rtime.RadikoTime.fromtimestamp(start, tz=rtime.JST).timestring()
+
+ timefree_id = join_nonempty(station, timestring)
+ timefree_url = f"https://radiko.jp/#!/ts/{station}/{timestring}"
+
+ yield self.url_result(timefree_url, ie=RadikoTimeFreeIE, video_id=timefree_id)
+
+ return self.playlist_result(
+ entries(),
+ playlist_id=season_id,
+ **traverse_obj(pageProps, ("rSeason", {
+ "playlist_title": "rSeasonName",
+ "thumbnail": "backgroundImageUrl",
+ "description": ("summary", filter),
+ })),
+ )
diff --git a/yt_dlp_plugins/extractor/radiko_dependencies.py b/yt_dlp_plugins/extractor/radiko_dependencies.py
new file mode 100644
index 0000000..769a5e3
--- /dev/null
+++ b/yt_dlp_plugins/extractor/radiko_dependencies.py
@@ -0,0 +1,29 @@
+# Bundle importing code Copyright (c) 2021-2022 Grub4K, from yt-dont-lock-p.
+# https://github.com/Grub4K/yt-dont-lock-p/blob/ff3b6e1d42ce8584153ae27544d2c05b50ab5954/yt_dlp_plugins/postprocessor/yt_dont_lock_p/__init__.py#L23-L46
+# Used under 0BSD with permission
+
+# https://discord.com/channels/807245652072857610/1112613156934668338/1416816007732920430 (yt-dlp discord server, https://discord.gg/H5MNcFW63r )
+# [17:00] garret1317: @Grub4K can i pinch your MIT-licensed dependency bundling code to use in my 0BSD-licensed plugin?
+# I will credit of course but i can't require that anyone else does the same
+# (Any response to this message will be considered a written consent or refusal of the request)
+# [17:04] Grub4K: Feel free to use that part under 0BSD
+# [17:05] garret1317: 👍 cheers
+
+try:
+ import protobug
+except ImportError:
+ import sys
+ from pathlib import Path
+
+ # Try importing from zip file bundle
+ search_path = str(Path(__file__).parent.parent)
+ sys.path.append(search_path)
+ try:
+ import protobug
+ except ImportError:
+ protobug = None
+ except Exception:
+ protobug = None
+
+ finally:
+ sys.path.remove(search_path)
diff --git a/yt_dlp_plugins/extractor/radiko_hacks.py b/yt_dlp_plugins/extractor/radiko_hacks.py
new file mode 100644
index 0000000..6486034
--- /dev/null
+++ b/yt_dlp_plugins/extractor/radiko_hacks.py
@@ -0,0 +1,65 @@
+import datetime
+import re
+
+from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.utils import (
+ join_nonempty,
+ update_url_query,
+ traverse_obj,
+)
+
+# "hacks" as in great jank/schizo shit that works anyway
+
+def _generate_as_live_playlist(self, playlist_url, start_at, end_at, domain, headers={}):
+ playlist = ""
+ chunk_length = 300 # max the api allows
+
+ duration = int(end_at.timestamp() - start_at.timestamp())
+ cursor = 0
+ chunk_num = 1
+ while cursor < duration:
+ chunk_length = min(chunk_length, duration - cursor)
+
+ chunk_start = start_at + datetime.timedelta(seconds=cursor)
+ chunk_url = update_url_query(playlist_url, {
+ "seek": chunk_start.timestring(),
+ "l": chunk_length,
+ })
+
+ chunk_playlist, real_chunk_length = _get_chunk_playlist(self, chunk_url, domain, chunk_num, headers)
+
+ playlist += chunk_playlist
+ cursor += real_chunk_length
+ chunk_num += 1
+
+ return playlist
+
+def _get_chunk_playlist(self, chunk_url, src_id, chunk_num, headers={}):
+ EXTINF_duration = re.compile(r"^#EXTINF:([\d.]+),", flags=re.MULTILINE)
+
+ playlist = ""
+ chunk_id = join_nonempty(src_id, chunk_num)
+ base_format = self._extract_m3u8_formats(
+ chunk_url, chunk_id, fatal=False, headers=headers,
+ note=f"Preparing {src_id} chunk {chunk_num}"
+ )
+ m3u8_url = traverse_obj(base_format, (..., "url",), get_all=False)
+ playlist = self._download_webpage(m3u8_url, chunk_id, note=f"Getting {src_id} chunk {chunk_num} fragments")
+
+ real_duration = 0
+ for i in EXTINF_duration.findall(playlist):
+ real_duration += float(i)
+ real_duration = round(real_duration)
+
+ # playlists can sometimes be longer than they should
+ # wowza stream does some strange things
+ # it goes along fine with every fragment 5s long as normal
+ # and then during the ad break it does one with a different length (2s here)
+ # i assume so they have a clean split to do ad insertion in? idk
+
+ # but anyway now the chunks aren't always a clean 5mins long
+ # and we get a repeated fragment going into the next chunk
+
+ # so to work around this, we track the real duration from the #EXTINF tags
+
+ return playlist, real_duration
diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py
new file mode 100644
index 0000000..27b91ad
--- /dev/null
+++ b/yt_dlp_plugins/extractor/radiko_podcast.py
@@ -0,0 +1,175 @@
+from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.utils import (
+ clean_html,
+ OnDemandPagedList,
+ parse_qs,
+ traverse_obj,
+ update_url_query,
+ url_or_none,
+ str_or_none,
+)
+
+import dataclasses
+import random
+
+from yt_dlp_plugins.extractor.radiko_dependencies import protobug
+if protobug:
+ import yt_dlp_plugins.extractor.radiko_protobufs as pb
+
+
+class _RadikoPodcastBaseIE(InfoExtractor):
+
+ def _extract_episode(self, episode_info):
+ return {
+ **traverse_obj(episode_info, {
+ "id": ("id", {str_or_none}),
+ "url": ("audio", "url"),
+ "duration": ("audio", "durationSec"),
+
+ "title": "title",
+ "description": ("description", {clean_html}),
+ "timestamp": ("startAt", "seconds"),
+
+ "series": "channelTitle",
+ "series_id": "channelId",
+ "channel": "channelStationName",
+ "uploader": "channelStationName",
+ }),
+ "thumbnail": traverse_obj(episode_info, ("imageUrl", {url_or_none}))
+ or traverse_obj(episode_info, ("channelImageUrl", {url_or_none})),
+
+ # so that --download-archive still works if you download from the playlist page
+ "webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=traverse_obj(episode_info, "id")),
+ 'extractor_key': RadikoPodcastEpisodeIE.ie_key(),
+ 'extractor': 'RadikoPodcastEpisode',
+ }
+
+
+class RadikoPodcastEpisodeIE(_RadikoPodcastBaseIE):
+ _VALID_URL = r"https?://radiko\.jp/podcast/episodes/(?P<id>[a-f0-9-]+)"
+
+ _TESTS = [{
+ "url": "https://radiko.jp/podcast/episodes/cc8cf709-a50b-4846-aa0e-91ab10cf8bff",
+ "info_dict": {
+ "id": "cc8cf709-a50b-4846-aa0e-91ab10cf8bff",
+ "ext": "mp3",
+ 'title': '2025.6.26 おしゃべり技術くん',
+ 'description': 'md5:1c4048025f68d6da053dd879a5d62304',
+ 'duration': 717,
+ 'thumbnail': 'https://podcast-static.cf.radiko.jp/09f27a48-ae04-4ce7-a024-572460e46eb7-20240214160012.png',
+ 'series': 'おしゃべり技術くん',
+ 'series_id': '09f27a48-ae04-4ce7-a024-572460e46eb7',
+ 'timestamp': 1751554800,
+ 'upload_date': '20250703',
+ 'uploader': 'IBCラジオ',
+ 'channel': 'IBCラジオ',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"]
+
+ episode_info = next_data["podcastEpisode"]
+
+ return self._extract_episode(episode_info)
+
+
+class RadikoPodcastChannelIE(_RadikoPodcastBaseIE):
+ _VALID_URL = r"https?://radiko\.jp/podcast/channels/(?P<id>[a-f0-9-]+)"
+
+ _TESTS = [{
+ "url": "https://radiko.jp/podcast/channels/09f27a48-ae04-4ce7-a024-572460e46eb7",
+ "info_dict": {
+ "id": "09f27a48-ae04-4ce7-a024-572460e46eb7"
+ },
+ 'playlist_mincount': 20,
+ 'expected_warnings': ['Currently this extractor can only extract the latest 20 episodes'],
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ webpage = self._download_webpage(url, channel_id)
+ next_data = self._search_nextjs_data(webpage, channel_id)["props"]["pageProps"]
+
+ channel_info = next_data["podcastChannel"]
+ episode_list_response = next_data["listPodcastEpisodesResponse"]
+
+
+ def entries():
+ has_next_page = episode_list_response.get("hasNextPage")
+ for episode in episode_list_response["episodesList"]:
+ cursor = episode.get("id")
+ yield self._extract_episode(episode)
+
+ if has_next_page:
+ if protobug:
+ userservice_token = pb.auth_userservice(self)
+ while has_next_page:
+ page = pb.get_podcast_episodes(self, channel_id, userservice_token, cursor)
+ has_next_page = page.hasNextPage
+ for episode in page.episodes:
+ cursor = episode.id
+ yield self._extract_episode(dataclasses.asdict(episode))
+ else:
+ self.report_warning(f'protobug is required to extract more than the latest {len(episode_list_response["episodesList"])} episodes.\nIf you installed yt-dlp-rajiko manually (with the .whl), use the .zip bundle instead. If you installed with pip, pip install protobug .')
+
+ return {
+ "_type": "playlist",
+ "id": channel_id,
+ **traverse_obj(channel_info, {
+ "playlist_title": "title",
+ "playlist_id": "id",
+ "playlist_description": ("description", {clean_html}),
+ "playlist_thumbnail": ("imageUrl", {url_or_none}),
+
+ }),
+ "entries": entries(),
+ }
+
+
+class RadikoPodcastSearchIE(InfoExtractor):
+ _VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/search/podcast/(?:timeshift|live)\?"
+ _TESTS = [{
+ "url": "https://radiko.jp/#!/search/podcast/live?key=ドラマ",
+ "playlist_mincount": 51,
+ "info_dict": {
+ "id": "ドラマ",
+ "title": "ドラマ",
+ },
+ }]
+
+ def _pagefunc(self, url, idx):
+ url = update_url_query(url, {"pageIdx": idx})
+ data = self._download_json(url, None, note=f"Downloading page {idx+1}")
+
+ results = []
+ for channel in data.get("channels"):
+ results.append(
+ self.url_result(
+ channel.get("channelUrl"),
+ id=channel.get("id"),
+ ie=RadikoPodcastChannelIE,
+ )
+ )
+ return results
+
+
+ def _real_extract(self, url):
+ # hack away the # so urllib.parse will work (same as normal RadikoSearchIE)
+ url = url.replace("/#!/", "/!/", 1)
+ queries = parse_qs(url)
+
+ keywords = traverse_obj(queries, ("key", 0))
+ search_url = update_url_query("https://api.annex-cf.radiko.jp/v1/podcasts/channels/search_with_keywords_by_offset", {
+ "keywords": keywords,
+ "uid": "".join(random.choices("0123456789abcdef", k=32)),
+ "limit": 50, # result limit. the actual limit before the api errors is 5000, but that seems a bit rude so i'll leave as 50 like the radio one
+ })
+
+ return self.playlist_result(
+ OnDemandPagedList(lambda idx: self._pagefunc(search_url, idx), 50),
+ title=keywords,
+ id=keywords, # i have to put some kind of id or the tests fail
+ )
diff --git a/yt_dlp_plugins/extractor/radiko_protobufs.py b/yt_dlp_plugins/extractor/radiko_protobufs.py
new file mode 100755
index 0000000..a8bbec1
--- /dev/null
+++ b/yt_dlp_plugins/extractor/radiko_protobufs.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+import struct
+import random
+
+from yt_dlp_plugins.extractor.radiko_dependencies import protobug
+
+if protobug: # i suppose it works lmao
+
+
+ def add_grpc_header(protobuf_data):
+ compression_flag = 0
+ message_length = len(protobuf_data)
+ header = struct.pack('>BI', compression_flag, message_length)
+ return header + protobuf_data
+
+ def strip_grpc_response(response):
+ return response[5:].rpartition(b"grpc-status:")[0]
+
+ def _download_grpc(self, url_or_request, video_id, response_message, note="Downloading GRPC information", *args, **kwargs):
+ urlh = self._request_webpage(url_or_request, video_id,
+ headers={
+ 'Content-Type': 'application/grpc-web+proto',
+ 'X-User-Agent': 'grpc-web-javascript/0.1',
+ 'X-Grpc-Web': '1',
+ **kwargs.pop('headers')
+ },
+ data=add_grpc_header(protobug.dumps(kwargs.pop('data'))), note=note,
+ *args, **kwargs,
+ )
+ response = urlh.read()
+
+ protobuf = strip_grpc_response(response)
+ if len(protobuf) > 0:
+ return protobug.loads(protobuf, response_message)
+
+
+ @protobug.message
+ class SignUpRequest:
+ lsid: protobug.String = protobug.field(1)
+
+ def sign_up(self):
+ lsid = ''.join(random.choices('0123456789abcdef', k=32))
+
+ signup = _download_grpc(self, "https://api.annex.radiko.jp/radiko.UserService/SignUp",
+ "UserService", None, note="Registering ID", headers={'Origin': 'https://radiko.jp'},
+ data=SignUpRequest(lsid=lsid),
+ )
+ # youre meant to only do the sign up ^ once and then keep your lsid for later
+ # so that you can sign in and get the token for the API to work
+ return lsid
+
+
+ @protobug.message
+ class SignInRequest:
+ lsid: protobug.String = protobug.field(2)
+ area: protobug.String = protobug.field(3)
+
+ @protobug.message
+ class SignInResponse:
+ jwt: protobug.String = protobug.field(1)
+
+
+ def sign_in(self, lsid):
+ sign_in = _download_grpc(self, "https://api.annex.radiko.jp/radiko.UserService/SignIn",
+ "UserService", SignInResponse, note="Getting auth token", headers={'Origin': 'https://radiko.jp'},
+ data=SignInRequest(lsid=lsid, area="JP13"),
+ )
+ return sign_in.jwt
+
+
+ def auth_userservice(self):
+ cachedata = self.cache.load("rajiko", "UserService")
+ if cachedata is not None:
+ lsid = cachedata.get("lsid")
+ else:
+ lsid = sign_up(self)
+ self.cache.store("rajiko", "UserService", {"lsid": lsid})
+ jwt = sign_in(self, lsid)
+ return jwt
+
+
+ @protobug.message
+ class ListPodcastEpisodesRequest:
+ channel_id: protobug.String = protobug.field(1)
+ sort_by_latest: protobug.Bool = protobug.field(2)
+ page_length: protobug.Int32 = protobug.field(4)
+ cursor: protobug.String = protobug.field(5, default=None)
+
+
+ @protobug.message
+ class Audio:
+ revision: protobug.Int32 = protobug.field(1)
+ url: protobug.String = protobug.field(2)
+ fileSize: protobug.Int64 = protobug.field(3)
+ durationSec: protobug.Int64 = protobug.field(4)
+ transcoded: protobug.Bool = protobug.field(5)
+
+ @protobug.message
+ class EpisodeStartAt:
+ seconds: protobug.UInt64 = protobug.field(1)
+ nanos: protobug.UInt64 = protobug.field(2, default=0)
+
+
+ @protobug.message
+ class PodcastEpisode:
+ id: protobug.String = protobug.field(1)
+ workspaceId: protobug.String = protobug.field(2)
+ channelId: protobug.String = protobug.field(3)
+ title: protobug.String = protobug.field(4)
+ description: protobug.String = protobug.field(5)
+
+ audio: Audio = protobug.field(8)
+ channelImageUrl: protobug.String = protobug.field(16)
+ channelTitle: protobug.String = protobug.field(17)
+ channelStationName: protobug.String = protobug.field(18)
+ channelAuthor: protobug.String = protobug.field(19)
+
+ channelThumbnailImageUrl: protobug.String = protobug.field(21)
+ channelStationType: protobug.UInt32 = protobug.field(22)
+ startAt: EpisodeStartAt = protobug.field(27)
+ isEnabled: protobug.Bool = protobug.field(29)
+ hasTranscription: protobug.Bool = protobug.field(32)
+
+ imageUrl: protobug.String = protobug.field(7, default=None)
+ thumbnailImageUrl: protobug.String = protobug.field(20, default=None)
+
+ @protobug.message
+ class ListPodcastEpisodesResponse:
+ episodes: list[PodcastEpisode] = protobug.field(1)
+ hasNextPage: protobug.Bool = protobug.field(2, default=False)
+
+
+ def get_podcast_episodes(self, channel_id, jwt, cursor, page_length=20):
+ # site uses 20 items
+ # cursor is the id of the last episode you've seen in the list
+
+ return _download_grpc(self, 'https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes',
+ channel_id, ListPodcastEpisodesResponse, note="Downloading episode listings",
+ headers={'Authorization': f'Bearer {jwt}'},
+ data=ListPodcastEpisodesRequest(
+ channel_id=channel_id,
+ sort_by_latest=True,
+ page_length=page_length,
+ cursor=cursor,
+ )
+ )