1 files changed, 137 insertions, 135 deletions
diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py
index d12f203..2996290 100644
--- a/yt_dlp_plugins/extractor/radiko.py
+++ b/yt_dlp_plugins/extractor/radiko.py
@@ -6,18 +6,22 @@ import urllib.parse
 import pkgutil
 
 from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.networking.exceptions import HTTPError
 from yt_dlp.utils import (
+	ExtractorError,
 	OnDemandPagedList,
 	clean_html,
 	int_or_none,
 	join_nonempty,
 	parse_qs,
 	traverse_obj,
+	urlencode_postdata,
 	url_or_none,
 	update_url_query,
 )
 
 import yt_dlp_plugins.extractor.radiko_time as rtime
+import yt_dlp_plugins.extractor.radiko_hacks as hacks
 
 
 class _RadikoBaseIE(InfoExtractor):
@@ -82,7 +86,9 @@ class _RadikoBaseIE(InfoExtractor):
 	_APP_VERSIONS = ["7.5.0", "7.4.17", "7.4.16", "7.4.15", "7.4.14", "7.4.13", "7.4.12", "7.4.11", "7.4.10", "7.4.9", "7.4.8", "7.4.7", "7.4.6", "7.4.5", "7.4.4", "7.4.3", "7.4.2", "7.4.1", "7.4.0", "7.3.8", "7.3.7", "7.3.6", "7.3.1", "7.3.0", "7.2.11", "7.2.10"]
 
 	_DELIVERED_ONDEMAND = ('radiko.jp',)
-	_DOESNT_WORK_WITH_FFMPEG = ('tf-f-rpaa-radiko.smartstream.ne.jp', 'si-f-radiko.smartstream.ne.jp')
+	_DOESNT_WORK_WITH_FFMPEG = ('tf-f-rpaa-radiko.smartstream.ne.jp', 'si-f-radiko.smartstream.ne.jp', 'alliance-stream-radiko.smartstream.ne.jp')
+
+	_has_tf30 = None
 
 	def _index_regions(self):
 		region_data = {}
@@ -178,16 +184,21 @@ class _RadikoBaseIE(InfoExtractor):
 				"X-Radiko-AuthToken": auth_token,
 			},
 			"user": auth2_headers["X-Radiko-User"],
+			"has_tf30": self._has_tf30,
 		}
 
 		if not region_mismatch:
 			self.cache.store("rajiko", station_region, auth_data)
 		return auth_data
 
-	def _auth(self, station_region):
+	def _auth(self, station_region, need_tf30=False):
 		cachedata = self.cache.load("rajiko", station_region)
 		self.write_debug(cachedata)
 		if cachedata is not None:
+			if need_tf30 and not cachedata.get("has_tf30"):
+				self.write_debug("Cached token doesn't have timefree 30, getting a new one")
+				return self._negotiate_token(station_region)
+
 			auth_headers = cachedata.get("token")
 			response = self._download_webpage("https://radiko.jp/v2/api/auth_check", station_region, "Checking cached token",
 				headers=auth_headers, expected_status=401)
@@ -205,6 +216,17 @@ class _RadikoBaseIE(InfoExtractor):
 			station = region.find(f'.//station/id[.="{station_id}"]/..')  # a <station> with an <id> of our station_id
 			station_name = station.find("name").text
 			station_url = url_or_none(station.find("href").text)
+
+			thumbnails = []
+			for logo in station.findall("logo"):
+				thumbnails.append({
+					"url": logo.text,
+					**traverse_obj(logo.attrib, ({
+						"width": ("width", {int_or_none}),
+						"height": ("height", {int_or_none}),
+					}))
+				})
+
 			meta = {
 				"id": station_id,
 				"title": station_name,
@@ -218,7 +240,7 @@ class _RadikoBaseIE(InfoExtractor):
 				"uploader_id": station_id,
 				"uploader_url": station_url,
 
-				"thumbnail": url_or_none(station.find("banner").text),
+				"thumbnails": thumbnails,
 			}
 			self.cache.store("rajiko", station_id, {
 				"expiry": (now + datetime.timedelta(days=1)).timestamp(),
@@ -229,8 +251,16 @@ class _RadikoBaseIE(InfoExtractor):
 		self.to_screen(f"{station_id}: Using cached station metadata")
 		return cachedata.get("meta")
 
-	def _get_station_formats(self, station, timefree, auth_data, start_at=None, end_at=None):
-		device = self._configuration_arg('device', ['aSmartPhone7a'], casesense=True, ie_key="rajiko")[0]  # aSmartPhone7a formats = always happy path
+	def _get_station_formats(self, station, timefree, auth_data, start_at=None, end_at=None, use_pc_html5=False):
+		config_device = traverse_obj(self._configuration_arg('device', casesense=True, ie_key="rajiko"), 0)
+
+		if not use_pc_html5:
+			device = config_device or "aSmartPhone7a"  # this device only gives us the on-demand one for timefree
+			# that's good imo - we just get the one that works, and don't bother with probing the rest as well
+		else:
+			device = config_device or "pc_html5" # the on-demand one doesnt work with timefree30 stuff sadly
+			# so just use pc_html5 which has everything
+
 		url_data = self._download_xml(f"https://radiko.jp/v3/station/stream/{device}/{station}.xml",
 			station, note=f"Downloading {device} stream information")
 
@@ -238,6 +268,8 @@ class _RadikoBaseIE(InfoExtractor):
 		formats = []
 
 		timefree_int = 1 if timefree else 0
+		do_blacklist_streams = not len(self._configuration_arg("no_stream_blacklist", ie_key="rajiko")) > 0
+		do_as_live_chunks = not len(self._configuration_arg("no_as_live_chunks", ie_key="rajiko")) > 0
 		for element in url_data.findall(f".//url[@timefree='{timefree_int}'][@areafree='0']/playlist_create_url"):
 		# find <url>s with matching timefree and no areafree, then get their <playlist_create_url>
 			url = element.text
@@ -249,7 +281,7 @@ class _RadikoBaseIE(InfoExtractor):
 					"station_id": station,
 					"l": "15",  # l = length, ie how many seconds in the live m3u8 (max 300)
 					"lsid": auth_data["user"],
-					"type": "b",  # it is a mystery
+					"type": "b",  # a/b = in-region, c = areafree
 				})
 
 			if timefree:
@@ -268,7 +300,7 @@ class _RadikoBaseIE(InfoExtractor):
 			preference = -1
 			entry_protocol = 'm3u8'
 
-			if domain in self._DOESNT_WORK_WITH_FFMPEG:
+			if domain in self._DOESNT_WORK_WITH_FFMPEG and do_blacklist_streams:
 				self.write_debug(f"skipping {domain} (known not working)")
 				continue
 			if domain in self._DELIVERED_ONDEMAND:
@@ -277,10 +309,30 @@ class _RadikoBaseIE(InfoExtractor):
 				preference = 1
 				entry_protocol = None
 
-			formats += self._extract_m3u8_formats(
-				playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_data["token"],
-				live=delivered_live, preference=preference, entry_protocol=entry_protocol,
-				note=f"Downloading m3u8 information from {domain}")
+			auth_headers = auth_data["token"]
+
+			if delivered_live and timefree and do_as_live_chunks:
+
+				chunks_playlist = hacks._generate_as_live_playlist(
+					self, playlist_url, start_at, end_at, domain, auth_headers
+				)
+
+				formats.append({
+					"format_id": join_nonempty(domain, "chunked"),
+					"hls_media_playlist_data": chunks_playlist,
+					"preference": preference,
+					"ext": "m4a",
+
+					# fallback to live for ffmpeg etc
+					"url": playlist_url,
+					"http_headers": auth_headers,
+				})
+			else:
+
+				formats += self._extract_m3u8_formats(
+					playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_headers,
+					live=delivered_live, preference=preference, entry_protocol=entry_protocol,
+					note=f"Downloading m3u8 information from {domain}")
 		return formats
 
 
@@ -299,7 +351,7 @@ class RadikoLiveIE(_RadikoBaseIE):
 			"id": "FMT",
 			"title": "re:^TOKYO FM.+$",
 			"alt_title": "TOKYO FM",
-			"thumbnail": "https://radiko.jp/res/banner/FMT/20220512162447.jpg",
+			"thumbnail": "https://radiko.jp/v2/static/station/logo/FMT/lrtrim/688x160.png",
 
 			"channel": "TOKYO FM",
 			"channel_id": "FMT",
@@ -319,7 +371,7 @@ class RadikoLiveIE(_RadikoBaseIE):
 			"id": "NORTHWAVE",
 			"title": "re:^FM NORTH WAVE.+$",
 			"alt_title": "FM NORTH WAVE",
-			"thumbnail": "https://radiko.jp/res/banner/NORTHWAVE/20150731161543.png",
+			"thumbnail": "https://radiko.jp/v2/static/station/logo/NORTHWAVE/lrtrim/688x160.png",
 
 			"uploader": "FM NORTH WAVE",
 			"uploader_url": "https://www.fmnorth.co.jp/",
@@ -340,7 +392,7 @@ class RadikoLiveIE(_RadikoBaseIE):
 			"id": "RN1",
 			"title": "re:^ラジオNIKKEI第1.+$",
 			"alt_title": "RADIONIKKEI",
-			"thumbnail": "https://radiko.jp/res/banner/RN1/20120802154152.png",
+			"thumbnail": "https://radiko.jp/v2/static/station/logo/RN1/lrtrim/688x160.png",
 
 			"channel": "ラジオNIKKEI第1",
 			"channel_url": "http://www.radionikkei.jp/",
@@ -357,7 +409,7 @@ class RadikoLiveIE(_RadikoBaseIE):
 		region = self._get_station_region(station)
 		station_meta = self._get_station_meta(region, station)
 		auth_data = self._auth(region)
-		formats = self._get_station_formats(station, False, auth_data)
+		formats = self._get_station_formats(station, False, auth_data, use_pc_html5=True)
 
 		return {
 			"is_live": True,
@@ -368,71 +420,36 @@ class RadikoLiveIE(_RadikoBaseIE):
 
 
 class RadikoTimeFreeIE(_RadikoBaseIE):
+	_NETRC_MACHINE = "rajiko"
 	_VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-_]+)/(?P<id>\d+)"
-	_TESTS = [{
-		"url": "https://radiko.jp/#!/ts/INT/20240809230000",
-		"info_dict": {
-			"live_status": "was_live",
-			"ext": "m4a",
-			"id": "INT-20240809230000",
-
-			"title": "TOKYO MOON",
-			"series": "Tokyo Moon",
-			"description": "md5:20e68d2f400a391fa34d4e7c8c702cb8",
-			"chapters": "count:14",
-			"thumbnail": "https://program-static.cf.radiko.jp/ehwtw6mcvy.jpg",
-
-			"upload_date": "20240809",
-			"timestamp": 1723212000.0,
-			"release_date": "20240809",
-			"release_timestamp": 1723215600.0,
-			"duration": 3600,
-
-			"channel": "interfm",
-			"channel_id": "INT",
-			"channel_url": "https://www.interfm.co.jp/",
-			"uploader": "interfm",
-			"uploader_id": "INT",
-			"uploader_url": "https://www.interfm.co.jp/",
-
-			"cast": ["松浦\u3000俊夫"],
-			"tags": ["松浦俊夫"],
-		},
-	}, {
-		# late-night/early-morning show to test broadcast day checking
-		"url": "https://radiko.jp/#!/ts/TBS/20240810033000",
-		"info_dict": {
-			"live_status": "was_live",
-			"ext": "m4a",
-			"id": "TBS-20240810033000",
-
-			"title": "CITY CHILL CLUB",
-			"series": "CITY CHILL CLUB",
-			"description": "md5:3fba2c1125059bed27247c0be90e58fa",
-			"chapters": "count:22",
-			"thumbnail": "https://program-static.cf.radiko.jp/ku7t4ztnaq.jpg",
-
-			"upload_date": "20240809",
-			"timestamp": 1723228200.0,
-			"release_date": "20240809",
-			"release_timestamp": 1723233600.0,
-			"duration": 5400,
-
-			"channel": "TBSラジオ",
-			"channel_url": "https://www.tbsradio.jp/",
-			"channel_id": "TBS",
-			"uploader": "TBSラジオ",
-			"uploader_url": "https://www.tbsradio.jp/",
-			"uploader_id": "TBS",
-
-			"tags": ["CCC905", "音楽との出会いが楽しめる", "人気アーティストトーク", "音楽プロデューサー出演", "ドライブ中におすすめ", "寝る前におすすめ", "学生におすすめ"],
-			"cast": ["PES"],
-		},
-	}]
+	# TESTS use a custom-ish script that updates the airdates automatically, see misc/test_extractors.py
+
+	def _perform_login(self, username, password):
+		try:
+			login_info = self._download_json('https://radiko.jp/ap/member/webapi/member/login', None, note='Logging in',
+				data=urlencode_postdata({'mail': username, 'pass': password}))
+			self._has_tf30 = '2' in login_info.get('privileges')
+			# areafree = 1, timefree30 = 2, double plan = both
+			self.write_debug({**login_info, "radiko_session": "PRIVATE", "member_ukey": "PRIVATE"})
+		except ExtractorError as error:
+			if isinstance(error.cause, HTTPError) and error.cause.status == 401:
+				raise ExtractorError('Invalid username and/or password', expected=True)
+			raise
+
+	def _check_tf30(self):
+		if self._has_tf30 is not None:
+			return self._has_tf30
+		if self._get_cookies('https://radiko.jp').get('radiko_session') is None:
+			return
+		account_info = self._download_json('https://radiko.jp/ap/member/webapi/v2/member/login/check',
+			None, note='Checking account status from cookies', expected_status=400)
+		self.write_debug({**account_info, "user_key": "PRIVATE"})
+		self._has_tf30 = account_info.get('timefreeplus') == '1'
+		return self._has_tf30
 
 	def _get_programme_meta(self, station_id, url_time):
 		day = url_time.broadcast_day_string()
-		meta = self._download_json(f"https://radiko.jp/v4/program/station/date/{day}/{station_id}.json", station_id,
+		meta = self._download_json(f"https://api.radiko.jp/program/v4/date/{day}/station/{station_id}.json", station_id,
 			note="Downloading programme data")
 		programmes = traverse_obj(meta, ("stations", lambda _, v: v["station_id"] == station_id,
 			"programs", "program"), get_all=False)
@@ -467,10 +484,12 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
 			"start_time_gte": start.isoformat(),
 			"end_time_lt": end.isoformat(),
 		})
-		data = self._download_json(api_url, video_id, note="Downloading tracklist").get("data")
+		data_json = self._download_json(
+			api_url, video_id, note="Downloading tracklist", errnote="Downloading tracklist", fatal=False
+		)
 
 		chapters = []
-		for track in data:
+		for track in traverse_obj(data_json, "data") or []:
 			artist = traverse_obj(track, ("artist", "name")) or track.get("artist_name")
 			chapters.append({
 				"title": join_nonempty(artist, track.get("title"), delim=" - "),
@@ -492,9 +511,13 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
 		start = times[0]
 		end = times[1]
 		now = datetime.datetime.now(tz=rtime.JST)
+		expiry_free, expiry_tf30 = end.expiry()
 
-		if end.expiry(False) < now:
+		if expiry_tf30 < now:
 			self.raise_no_formats("Programme is no longer available.", video_id=meta["id"], expected=True)
+		need_tf30 = expiry_free < now
+		if need_tf30 and not self._check_tf30():
+			self.raise_login_required("Programme is only available with a Timefree 30 subscription")
 		elif start > now:
 			self.raise_no_formats("Programme has not aired yet.", video_id=meta["id"], expected=True)
 			live_status = "is_upcoming"
@@ -505,12 +528,14 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
 		region = self._get_station_region(station)
 		station_meta = self._get_station_meta(region, station)
 		chapters = self._extract_chapters(station, start, end, video_id=meta["id"])
-		auth_data = self._auth(region)
-		formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end)
+		auth_data = self._auth(region, need_tf30=need_tf30)
+		formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end, use_pc_html5=need_tf30)
 
 		return {
 			**station_meta,
-			"alt_title": None,
+			"alt_title": None,  # override from station metadata
+			"thumbnails": None,
+
 			**meta,
 			"chapters": chapters,
 			"formats": formats,
@@ -548,22 +573,37 @@ class RadikoSearchIE(InfoExtractor):
 	}]
 
 	def _strip_date(self, date):
+		# lazy way of making a timestring (from eg 2025-05-20 01:00:00)
 		return date.replace(" ", "").replace("-", "").replace(":", "")
 
 	def _pagefunc(self, url, idx):
 		url = update_url_query(url, {"page_idx": idx})
 		data = self._download_json(url, None, note=f"Downloading page {idx+1}")
 
-		return [self.url_result("https://radiko.jp/#!/ts/{station}/{time}".format(
-				station = i.get("station_id"), time = self._strip_date(i.get("start_time"))))
-			for i in data.get("data")]
+		results = []
+		for r in data.get("data"):
+			station = r.get("station_id")
+			timestring = self._strip_date(r.get("start_time"))
+
+			results.append(
+				self.url_result(
+					f"https://radiko.jp/#!/ts/{station}/{timestring}",
+					id=join_nonempty(station, timestring)
+				)
+			)
+		return results
 
 	def _real_extract(self, url):
 		url = url.replace("/#!/", "/!/", 1)
 		# urllib.parse interprets the path as just one giant fragment because of the #, so we hack it away
 		queries = parse_qs(url)
 
-		search_url = update_url_query("https://radiko.jp/v3/api/program/search", {
+		if queries.get("cul_area_id"):
+			queries["cur_area_id"] =  queries.pop("cul_area_id")
+		# site used to use "cul_area_id" in the search url, now it uses "cur_area_id" (with an r)
+		# and outright rejects the old one with HTTP Error 415: Unsupported Media Type
+
+		search_url = update_url_query("https://api.annex-cf.radiko.jp/v1/programs/legacy/perl/program/search", {
 			**queries,
 			"uid": "".join(random.choices("0123456789abcdef", k=32)),
 			"app_id": "pc",
@@ -588,44 +628,16 @@ class RadikoSearchIE(InfoExtractor):
 
 class RadikoShareIE(InfoExtractor):
 	_VALID_URL = r"https?://(?:www\.)?radiko\.jp/share/"
-	_TESTS = [{
-		# 29-hour time -> 24-hour time
-		"url": "http://radiko.jp/share/?sid=FMT&t=20240802240000",
-		"info_dict": {
-			"live_status": "was_live",
-			"ext": "m4a",
-			"id": "FMT-20240803000000",  # the time given (24:00) works out to 00:00 the next day
-
-			"title": "JET STREAM",
-			"series": "JET STREAM",
-			"description": "md5:c1a2172036ebb7a54eeafb47e0a08a50",
-			"chapters": "count:9",
-			"thumbnail": "https://program-static.cf.radiko.jp/greinlrspi.jpg",
-
-			"upload_date": "20240802",
-			"timestamp": 1722610800.0,
-			"release_date": "20240802",
-			"release_timestamp": 1722614100.0,
-			"duration": 3300,
-
-			"channel": "TOKYO FM",
-			"channel_id": "FMT",
-			"channel_url": "https://www.tfm.co.jp/",
-			"uploader": "TOKYO FM",
-			"uploader_id": "FMT",
-			"uploader_url": "https://www.tfm.co.jp/",
-
-			"cast": ["福山雅治"],
-			"tags": ["福山雅治", "夜間飛行", "音楽との出会いが楽しめる", "朗読を楽しめる", "寝る前に聴きたい"],
-		}
-	}]
 
 	def _real_extract(self, url):
 		queries = parse_qs(url)
 		station = traverse_obj(queries, ("sid", 0))
 		time = traverse_obj(queries, ("t", 0))
 		time = rtime.RadikoShareTime(time).timestring()
-		return self.url_result(f"https://radiko.jp/#!/ts/{station}/{time}", RadikoTimeFreeIE)
+		return self.url_result(
+			f"https://radiko.jp/#!/ts/{station}/{time}", RadikoTimeFreeIE,
+			id=join_nonempty(station, time)
+		)
 
 
 class RadikoStationButtonIE(InfoExtractor):
@@ -638,19 +650,9 @@ class RadikoStationButtonIE(InfoExtractor):
 		"info_dict": {
 			"ext": "m4a",
 			'live_status': 'is_live',
-
 			"id": "QRR",
-			"title": "re:^文化放送.+$",
-			'alt_title': 'JOQR BUNKA HOSO',
-			'thumbnail': 'https://radiko.jp/res/banner/QRR/20240423144553.png',
-			'channel': '文化放送',
-			'channel_id': 'QRR',
-			'channel_url': 'http://www.joqr.co.jp/',
-			'uploader': '文化放送',
-			'uploader_id': 'QRR',
-			'uploader_url': 'http://www.joqr.co.jp/',
-
-		}
+		},
+		'only_matching': True,
 	}]
 
 	_WEBPAGE_TESTS = [{
@@ -661,7 +663,7 @@ class RadikoStationButtonIE(InfoExtractor):
 			'id': 'CCL',
 			"title": "re:^FM COCOLO.+$",
 			'alt_title': 'FM COCOLO',
-			'thumbnail': 'https://radiko.jp/res/banner/CCL/20161014144826.png',
+			'thumbnail': 'https://radiko.jp/v2/static/station/logo/CCL/lrtrim/688x160.png',
 
 			'channel': 'FM COCOLO',
 			'channel_id': 'CCL',
@@ -690,7 +692,7 @@ class RadikoPersonIE(InfoExtractor):
 	},{
 		"url": "https://radiko.jp/persons/11421",
 		"params": {'extractor_args': {'rajiko': {'key_station_only': ['']}}},
-		"playlist_count": 1,
+		"playlist_mincount": 1,
 		"info_dict": {
 			"id": "person-11421",
 		},
@@ -701,9 +703,9 @@ class RadikoPersonIE(InfoExtractor):
 
 		now = rtime.RadikoTime.now(tz=rtime.JST)
 
-		min_start = rtime.earliest_available(False)
-		# we set the earliest time as the earliest we can get,
-		# so, the start of the broadcast day 1 week ago
+		min_start = (now - datetime.timedelta(days=30)).broadcast_day_start()
+		# we set the earliest time as the earliest we can get (or at least, that it's possible to get),
+		# so, the start of the broadcast day 30 days ago
 		# that way we can get everything we can actually download, including stuff that aired at eg "26:00"
 
 		person_api_url = update_url_query("https://api.radiko.jp/program/api/v1/programs", {