aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/yt_dlp_plugins/extractor/radiko.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp_plugins/extractor/radiko.py')
-rw-r--r--yt_dlp_plugins/extractor/radiko.py272
1 files changed, 137 insertions, 135 deletions
diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py
index d12f203..2996290 100644
--- a/yt_dlp_plugins/extractor/radiko.py
+++ b/yt_dlp_plugins/extractor/radiko.py
@@ -6,18 +6,22 @@ import urllib.parse
import pkgutil
from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.networking.exceptions import HTTPError
from yt_dlp.utils import (
+ ExtractorError,
OnDemandPagedList,
clean_html,
int_or_none,
join_nonempty,
parse_qs,
traverse_obj,
+ urlencode_postdata,
url_or_none,
update_url_query,
)
import yt_dlp_plugins.extractor.radiko_time as rtime
+import yt_dlp_plugins.extractor.radiko_hacks as hacks
class _RadikoBaseIE(InfoExtractor):
@@ -82,7 +86,9 @@ class _RadikoBaseIE(InfoExtractor):
_APP_VERSIONS = ["7.5.0", "7.4.17", "7.4.16", "7.4.15", "7.4.14", "7.4.13", "7.4.12", "7.4.11", "7.4.10", "7.4.9", "7.4.8", "7.4.7", "7.4.6", "7.4.5", "7.4.4", "7.4.3", "7.4.2", "7.4.1", "7.4.0", "7.3.8", "7.3.7", "7.3.6", "7.3.1", "7.3.0", "7.2.11", "7.2.10"]
_DELIVERED_ONDEMAND = ('radiko.jp',)
- _DOESNT_WORK_WITH_FFMPEG = ('tf-f-rpaa-radiko.smartstream.ne.jp', 'si-f-radiko.smartstream.ne.jp')
+ _DOESNT_WORK_WITH_FFMPEG = ('tf-f-rpaa-radiko.smartstream.ne.jp', 'si-f-radiko.smartstream.ne.jp', 'alliance-stream-radiko.smartstream.ne.jp')
+
+ _has_tf30 = None
def _index_regions(self):
region_data = {}
@@ -178,16 +184,21 @@ class _RadikoBaseIE(InfoExtractor):
"X-Radiko-AuthToken": auth_token,
},
"user": auth2_headers["X-Radiko-User"],
+ "has_tf30": self._has_tf30,
}
if not region_mismatch:
self.cache.store("rajiko", station_region, auth_data)
return auth_data
- def _auth(self, station_region):
+ def _auth(self, station_region, need_tf30=False):
cachedata = self.cache.load("rajiko", station_region)
self.write_debug(cachedata)
if cachedata is not None:
+ if need_tf30 and not cachedata.get("has_tf30"):
+ self.write_debug("Cached token doesn't have timefree 30, getting a new one")
+ return self._negotiate_token(station_region)
+
auth_headers = cachedata.get("token")
response = self._download_webpage("https://radiko.jp/v2/api/auth_check", station_region, "Checking cached token",
headers=auth_headers, expected_status=401)
@@ -205,6 +216,17 @@ class _RadikoBaseIE(InfoExtractor):
station = region.find(f'.//station/id[.="{station_id}"]/..') # a <station> with an <id> of our station_id
station_name = station.find("name").text
station_url = url_or_none(station.find("href").text)
+
+ thumbnails = []
+ for logo in station.findall("logo"):
+ thumbnails.append({
+ "url": logo.text,
+ **traverse_obj(logo.attrib, ({
+ "width": ("width", {int_or_none}),
+ "height": ("height", {int_or_none}),
+ }))
+ })
+
meta = {
"id": station_id,
"title": station_name,
@@ -218,7 +240,7 @@ class _RadikoBaseIE(InfoExtractor):
"uploader_id": station_id,
"uploader_url": station_url,
- "thumbnail": url_or_none(station.find("banner").text),
+ "thumbnails": thumbnails,
}
self.cache.store("rajiko", station_id, {
"expiry": (now + datetime.timedelta(days=1)).timestamp(),
@@ -229,8 +251,16 @@ class _RadikoBaseIE(InfoExtractor):
self.to_screen(f"{station_id}: Using cached station metadata")
return cachedata.get("meta")
- def _get_station_formats(self, station, timefree, auth_data, start_at=None, end_at=None):
- device = self._configuration_arg('device', ['aSmartPhone7a'], casesense=True, ie_key="rajiko")[0] # aSmartPhone7a formats = always happy path
+ def _get_station_formats(self, station, timefree, auth_data, start_at=None, end_at=None, use_pc_html5=False):
+ config_device = traverse_obj(self._configuration_arg('device', casesense=True, ie_key="rajiko"), 0)
+
+ if not use_pc_html5:
+ device = config_device or "aSmartPhone7a" # this device only gives us the on-demand one for timefree
+ # that's good imo - we just get the one that works, and don't bother with probing the rest as well
+ else:
+ device = config_device or "pc_html5" # the on-demand one doesnt work with timefree30 stuff sadly
+ # so just use pc_html5 which has everything
+
url_data = self._download_xml(f"https://radiko.jp/v3/station/stream/{device}/{station}.xml",
station, note=f"Downloading {device} stream information")
@@ -238,6 +268,8 @@ class _RadikoBaseIE(InfoExtractor):
formats = []
timefree_int = 1 if timefree else 0
+ do_blacklist_streams = not len(self._configuration_arg("no_stream_blacklist", ie_key="rajiko")) > 0
+ do_as_live_chunks = not len(self._configuration_arg("no_as_live_chunks", ie_key="rajiko")) > 0
for element in url_data.findall(f".//url[@timefree='{timefree_int}'][@areafree='0']/playlist_create_url"):
# find <url>s with matching timefree and no areafree, then get their <playlist_create_url>
url = element.text
@@ -249,7 +281,7 @@ class _RadikoBaseIE(InfoExtractor):
"station_id": station,
"l": "15", # l = length, ie how many seconds in the live m3u8 (max 300)
"lsid": auth_data["user"],
- "type": "b", # it is a mystery
+ "type": "b", # a/b = in-region, c = areafree
})
if timefree:
@@ -268,7 +300,7 @@ class _RadikoBaseIE(InfoExtractor):
preference = -1
entry_protocol = 'm3u8'
- if domain in self._DOESNT_WORK_WITH_FFMPEG:
+ if domain in self._DOESNT_WORK_WITH_FFMPEG and do_blacklist_streams:
self.write_debug(f"skipping {domain} (known not working)")
continue
if domain in self._DELIVERED_ONDEMAND:
@@ -277,10 +309,30 @@ class _RadikoBaseIE(InfoExtractor):
preference = 1
entry_protocol = None
- formats += self._extract_m3u8_formats(
- playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_data["token"],
- live=delivered_live, preference=preference, entry_protocol=entry_protocol,
- note=f"Downloading m3u8 information from {domain}")
+ auth_headers = auth_data["token"]
+
+ if delivered_live and timefree and do_as_live_chunks:
+
+ chunks_playlist = hacks._generate_as_live_playlist(
+ self, playlist_url, start_at, end_at, domain, auth_headers
+ )
+
+ formats.append({
+ "format_id": join_nonempty(domain, "chunked"),
+ "hls_media_playlist_data": chunks_playlist,
+ "preference": preference,
+ "ext": "m4a",
+
+ # fallback to live for ffmpeg etc
+ "url": playlist_url,
+ "http_headers": auth_headers,
+ })
+ else:
+
+ formats += self._extract_m3u8_formats(
+ playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_headers,
+ live=delivered_live, preference=preference, entry_protocol=entry_protocol,
+ note=f"Downloading m3u8 information from {domain}")
return formats
@@ -299,7 +351,7 @@ class RadikoLiveIE(_RadikoBaseIE):
"id": "FMT",
"title": "re:^TOKYO FM.+$",
"alt_title": "TOKYO FM",
- "thumbnail": "https://radiko.jp/res/banner/FMT/20220512162447.jpg",
+ "thumbnail": "https://radiko.jp/v2/static/station/logo/FMT/lrtrim/688x160.png",
"channel": "TOKYO FM",
"channel_id": "FMT",
@@ -319,7 +371,7 @@ class RadikoLiveIE(_RadikoBaseIE):
"id": "NORTHWAVE",
"title": "re:^FM NORTH WAVE.+$",
"alt_title": "FM NORTH WAVE",
- "thumbnail": "https://radiko.jp/res/banner/NORTHWAVE/20150731161543.png",
+ "thumbnail": "https://radiko.jp/v2/static/station/logo/NORTHWAVE/lrtrim/688x160.png",
"uploader": "FM NORTH WAVE",
"uploader_url": "https://www.fmnorth.co.jp/",
@@ -340,7 +392,7 @@ class RadikoLiveIE(_RadikoBaseIE):
"id": "RN1",
"title": "re:^ラジオNIKKEI第1.+$",
"alt_title": "RADIONIKKEI",
- "thumbnail": "https://radiko.jp/res/banner/RN1/20120802154152.png",
+ "thumbnail": "https://radiko.jp/v2/static/station/logo/RN1/lrtrim/688x160.png",
"channel": "ラジオNIKKEI第1",
"channel_url": "http://www.radionikkei.jp/",
@@ -357,7 +409,7 @@ class RadikoLiveIE(_RadikoBaseIE):
region = self._get_station_region(station)
station_meta = self._get_station_meta(region, station)
auth_data = self._auth(region)
- formats = self._get_station_formats(station, False, auth_data)
+ formats = self._get_station_formats(station, False, auth_data, use_pc_html5=True)
return {
"is_live": True,
@@ -368,71 +420,36 @@ class RadikoLiveIE(_RadikoBaseIE):
class RadikoTimeFreeIE(_RadikoBaseIE):
+ _NETRC_MACHINE = "rajiko"
_VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-_]+)/(?P<id>\d+)"
- _TESTS = [{
- "url": "https://radiko.jp/#!/ts/INT/20240809230000",
- "info_dict": {
- "live_status": "was_live",
- "ext": "m4a",
- "id": "INT-20240809230000",
-
- "title": "TOKYO MOON",
- "series": "Tokyo Moon",
- "description": "md5:20e68d2f400a391fa34d4e7c8c702cb8",
- "chapters": "count:14",
- "thumbnail": "https://program-static.cf.radiko.jp/ehwtw6mcvy.jpg",
-
- "upload_date": "20240809",
- "timestamp": 1723212000.0,
- "release_date": "20240809",
- "release_timestamp": 1723215600.0,
- "duration": 3600,
-
- "channel": "interfm",
- "channel_id": "INT",
- "channel_url": "https://www.interfm.co.jp/",
- "uploader": "interfm",
- "uploader_id": "INT",
- "uploader_url": "https://www.interfm.co.jp/",
-
- "cast": ["松浦\u3000俊夫"],
- "tags": ["松浦俊夫"],
- },
- }, {
- # late-night/early-morning show to test broadcast day checking
- "url": "https://radiko.jp/#!/ts/TBS/20240810033000",
- "info_dict": {
- "live_status": "was_live",
- "ext": "m4a",
- "id": "TBS-20240810033000",
-
- "title": "CITY CHILL CLUB",
- "series": "CITY CHILL CLUB",
- "description": "md5:3fba2c1125059bed27247c0be90e58fa",
- "chapters": "count:22",
- "thumbnail": "https://program-static.cf.radiko.jp/ku7t4ztnaq.jpg",
-
- "upload_date": "20240809",
- "timestamp": 1723228200.0,
- "release_date": "20240809",
- "release_timestamp": 1723233600.0,
- "duration": 5400,
-
- "channel": "TBSラジオ",
- "channel_url": "https://www.tbsradio.jp/",
- "channel_id": "TBS",
- "uploader": "TBSラジオ",
- "uploader_url": "https://www.tbsradio.jp/",
- "uploader_id": "TBS",
-
- "tags": ["CCC905", "音楽との出会いが楽しめる", "人気アーティストトーク", "音楽プロデューサー出演", "ドライブ中におすすめ", "寝る前におすすめ", "学生におすすめ"],
- "cast": ["PES"],
- },
- }]
+ # TESTS use a custom-ish script that updates the airdates automatically, see misc/test_extractors.py
+
+ def _perform_login(self, username, password):
+ try:
+ login_info = self._download_json('https://radiko.jp/ap/member/webapi/member/login', None, note='Logging in',
+ data=urlencode_postdata({'mail': username, 'pass': password}))
+ self._has_tf30 = '2' in login_info.get('privileges')
+ # areafree = 1, timefree30 = 2, double plan = both
+ self.write_debug({**login_info, "radiko_session": "PRIVATE", "member_ukey": "PRIVATE"})
+ except ExtractorError as error:
+ if isinstance(error.cause, HTTPError) and error.cause.status == 401:
+ raise ExtractorError('Invalid username and/or password', expected=True)
+ raise
+
+ def _check_tf30(self):
+ if self._has_tf30 is not None:
+ return self._has_tf30
+ if self._get_cookies('https://radiko.jp').get('radiko_session') is None:
+ return
+ account_info = self._download_json('https://radiko.jp/ap/member/webapi/v2/member/login/check',
+ None, note='Checking account status from cookies', expected_status=400)
+ self.write_debug({**account_info, "user_key": "PRIVATE"})
+ self._has_tf30 = account_info.get('timefreeplus') == '1'
+ return self._has_tf30
def _get_programme_meta(self, station_id, url_time):
day = url_time.broadcast_day_string()
- meta = self._download_json(f"https://radiko.jp/v4/program/station/date/{day}/{station_id}.json", station_id,
+ meta = self._download_json(f"https://api.radiko.jp/program/v4/date/{day}/station/{station_id}.json", station_id,
note="Downloading programme data")
programmes = traverse_obj(meta, ("stations", lambda _, v: v["station_id"] == station_id,
"programs", "program"), get_all=False)
@@ -467,10 +484,12 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
"start_time_gte": start.isoformat(),
"end_time_lt": end.isoformat(),
})
- data = self._download_json(api_url, video_id, note="Downloading tracklist").get("data")
+ data_json = self._download_json(
+ api_url, video_id, note="Downloading tracklist", errnote="Downloading tracklist", fatal=False
+ )
chapters = []
- for track in data:
+ for track in traverse_obj(data_json, "data") or []:
artist = traverse_obj(track, ("artist", "name")) or track.get("artist_name")
chapters.append({
"title": join_nonempty(artist, track.get("title"), delim=" - "),
@@ -492,9 +511,13 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
start = times[0]
end = times[1]
now = datetime.datetime.now(tz=rtime.JST)
+ expiry_free, expiry_tf30 = end.expiry()
- if end.expiry(False) < now:
+ if expiry_tf30 < now:
self.raise_no_formats("Programme is no longer available.", video_id=meta["id"], expected=True)
+ need_tf30 = expiry_free < now
+ if need_tf30 and not self._check_tf30():
+ self.raise_login_required("Programme is only available with a Timefree 30 subscription")
elif start > now:
self.raise_no_formats("Programme has not aired yet.", video_id=meta["id"], expected=True)
live_status = "is_upcoming"
@@ -505,12 +528,14 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
region = self._get_station_region(station)
station_meta = self._get_station_meta(region, station)
chapters = self._extract_chapters(station, start, end, video_id=meta["id"])
- auth_data = self._auth(region)
- formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end)
+ auth_data = self._auth(region, need_tf30=need_tf30)
+ formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end, use_pc_html5=need_tf30)
return {
**station_meta,
- "alt_title": None,
+ "alt_title": None, # override from station metadata
+ "thumbnails": None,
+
**meta,
"chapters": chapters,
"formats": formats,
@@ -548,22 +573,37 @@ class RadikoSearchIE(InfoExtractor):
}]
def _strip_date(self, date):
+ # lazy way of making a timestring (from eg 2025-05-20 01:00:00)
return date.replace(" ", "").replace("-", "").replace(":", "")
def _pagefunc(self, url, idx):
url = update_url_query(url, {"page_idx": idx})
data = self._download_json(url, None, note=f"Downloading page {idx+1}")
- return [self.url_result("https://radiko.jp/#!/ts/{station}/{time}".format(
- station = i.get("station_id"), time = self._strip_date(i.get("start_time"))))
- for i in data.get("data")]
+ results = []
+ for r in data.get("data"):
+ station = r.get("station_id")
+ timestring = self._strip_date(r.get("start_time"))
+
+ results.append(
+ self.url_result(
+ f"https://radiko.jp/#!/ts/{station}/{timestring}",
+ id=join_nonempty(station, timestring)
+ )
+ )
+ return results
def _real_extract(self, url):
url = url.replace("/#!/", "/!/", 1)
# urllib.parse interprets the path as just one giant fragment because of the #, so we hack it away
queries = parse_qs(url)
- search_url = update_url_query("https://radiko.jp/v3/api/program/search", {
+ if queries.get("cul_area_id"):
+ queries["cur_area_id"] = queries.pop("cul_area_id")
+ # site used to use "cul_area_id" in the search url, now it uses "cur_area_id" (with an r)
+ # and outright rejects the old one with HTTP Error 415: Unsupported Media Type
+
+ search_url = update_url_query("https://api.annex-cf.radiko.jp/v1/programs/legacy/perl/program/search", {
**queries,
"uid": "".join(random.choices("0123456789abcdef", k=32)),
"app_id": "pc",
@@ -588,44 +628,16 @@ class RadikoSearchIE(InfoExtractor):
class RadikoShareIE(InfoExtractor):
_VALID_URL = r"https?://(?:www\.)?radiko\.jp/share/"
- _TESTS = [{
- # 29-hour time -> 24-hour time
- "url": "http://radiko.jp/share/?sid=FMT&t=20240802240000",
- "info_dict": {
- "live_status": "was_live",
- "ext": "m4a",
- "id": "FMT-20240803000000", # the time given (24:00) works out to 00:00 the next day
-
- "title": "JET STREAM",
- "series": "JET STREAM",
- "description": "md5:c1a2172036ebb7a54eeafb47e0a08a50",
- "chapters": "count:9",
- "thumbnail": "https://program-static.cf.radiko.jp/greinlrspi.jpg",
-
- "upload_date": "20240802",
- "timestamp": 1722610800.0,
- "release_date": "20240802",
- "release_timestamp": 1722614100.0,
- "duration": 3300,
-
- "channel": "TOKYO FM",
- "channel_id": "FMT",
- "channel_url": "https://www.tfm.co.jp/",
- "uploader": "TOKYO FM",
- "uploader_id": "FMT",
- "uploader_url": "https://www.tfm.co.jp/",
-
- "cast": ["福山雅治"],
- "tags": ["福山雅治", "夜間飛行", "音楽との出会いが楽しめる", "朗読を楽しめる", "寝る前に聴きたい"],
- }
- }]
def _real_extract(self, url):
queries = parse_qs(url)
station = traverse_obj(queries, ("sid", 0))
time = traverse_obj(queries, ("t", 0))
time = rtime.RadikoShareTime(time).timestring()
- return self.url_result(f"https://radiko.jp/#!/ts/{station}/{time}", RadikoTimeFreeIE)
+ return self.url_result(
+ f"https://radiko.jp/#!/ts/{station}/{time}", RadikoTimeFreeIE,
+ id=join_nonempty(station, time)
+ )
class RadikoStationButtonIE(InfoExtractor):
@@ -638,19 +650,9 @@ class RadikoStationButtonIE(InfoExtractor):
"info_dict": {
"ext": "m4a",
'live_status': 'is_live',
-
"id": "QRR",
- "title": "re:^文化放送.+$",
- 'alt_title': 'JOQR BUNKA HOSO',
- 'thumbnail': 'https://radiko.jp/res/banner/QRR/20240423144553.png',
- 'channel': '文化放送',
- 'channel_id': 'QRR',
- 'channel_url': 'http://www.joqr.co.jp/',
- 'uploader': '文化放送',
- 'uploader_id': 'QRR',
- 'uploader_url': 'http://www.joqr.co.jp/',
-
- }
+ },
+ 'only_matching': True,
}]
_WEBPAGE_TESTS = [{
@@ -661,7 +663,7 @@ class RadikoStationButtonIE(InfoExtractor):
'id': 'CCL',
"title": "re:^FM COCOLO.+$",
'alt_title': 'FM COCOLO',
- 'thumbnail': 'https://radiko.jp/res/banner/CCL/20161014144826.png',
+ 'thumbnail': 'https://radiko.jp/v2/static/station/logo/CCL/lrtrim/688x160.png',
'channel': 'FM COCOLO',
'channel_id': 'CCL',
@@ -690,7 +692,7 @@ class RadikoPersonIE(InfoExtractor):
},{
"url": "https://radiko.jp/persons/11421",
"params": {'extractor_args': {'rajiko': {'key_station_only': ['']}}},
- "playlist_count": 1,
+ "playlist_mincount": 1,
"info_dict": {
"id": "person-11421",
},
@@ -701,9 +703,9 @@ class RadikoPersonIE(InfoExtractor):
now = rtime.RadikoTime.now(tz=rtime.JST)
- min_start = rtime.earliest_available(False)
- # we set the earliest time as the earliest we can get,
- # so, the start of the broadcast day 1 week ago
+ min_start = (now - datetime.timedelta(days=30)).broadcast_day_start()
+ # we set the earliest time as the earliest we can get (or at least, that it's possible to get),
+ # so, the start of the broadcast day 30 days ago
# that way we can get everything we can actually download, including stuff that aired at eg "26:00"
person_api_url = update_url_query("https://api.radiko.jp/program/api/v1/programs", {