diff options
Diffstat (limited to 'yt_dlp_plugins/extractor')
-rwxr-xr-x | yt_dlp_plugins/extractor/radiko.py | 84 | ||||
-rw-r--r-- | yt_dlp_plugins/extractor/radiko_time.py | 114 |
2 files changed, 142 insertions, 56 deletions
diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py index 4bda6aa..696ccae 100755 --- a/yt_dlp_plugins/extractor/radiko.py +++ b/yt_dlp_plugins/extractor/radiko.py @@ -12,11 +12,12 @@ from yt_dlp.utils import ( join_nonempty, parse_qs, traverse_obj, - unified_timestamp, url_or_none, update_url_query, ) +import yt_dlp_plugins.extractor.radiko_time as rtime + class _RadikoBaseIE(InfoExtractor): _FULL_KEY = base64.b64decode(""" @@ -540,10 +541,10 @@ class _RadikoBaseIE(InfoExtractor): }) if timefree: playlist_url = update_url_query(playlist_url, { - "start_at": start_at, - "ft": start_at, - "end_at": end_at, - "to": end_at, + "start_at": start_at.timestring(), + "ft": start_at.timestring(), + "end_at": end_at.timestring(), + "to": end_at.timestring(), }) domain = urllib.parse.urlparse(playlist_url).netloc formats += self._extract_m3u8_formats( @@ -722,46 +723,26 @@ class RadikoTimeFreeIE(_RadikoBaseIE): }, }] - _JST = datetime.timezone(datetime.timedelta(hours=9)) - - def _timestring_to_datetime(self, time): - return datetime.datetime(int(time[:4]), int(time[4:6]), int(time[6:8]), - hour=int(time[8:10]), minute=int(time[10:12]), second=int(time[12:14]), tzinfo=self._JST) - - def _unfuck_day(self, time): - # api counts 05:00 -> 28:59 (04:59 next day) as all the same day - # like the 30-hour day, 06:00 -> 29:59 (05:59) - # https://en.wikipedia.org/wiki/Date_and_time_notation_in_Japan#Time - # but ends earlier, presumably so the early morning programmes dont look like late night ones - # this means we have to shift back by a day so we can use the right api - hour_mins = int(time[8:]) - if hour_mins < 50000: # 050000 - 5AM - date = self._timestring_to_datetime(time) - date -= datetime.timedelta(days=1) - time = date.strftime("%Y%m%d") - - return time - return time[:8] - - def _get_programme_meta(self, station_id, start_time): - day = self._unfuck_day(start_time) + def _get_programme_meta(self, station_id, url_time): + day = url_time.broadcast_day() meta = self._download_json(f"https://radiko.jp/v4/program/station/date/{day}/{station_id}.json", station_id, note="Downloading programme data") programmes = traverse_obj(meta, ("stations", lambda _, v: v["station_id"] == station_id, "programs", "program"), get_all=False) for prog in programmes: - if prog["ft"] <= start_time < prog["to"]: - actual_start = prog["ft"] + if prog["ft"] <= url_time.timestring() < prog["to"]: + actual_start = rtime.RadikoSiteTime(prog["ft"]) + actual_end = rtime.RadikoSiteTime(prog["to"]) if len(prog.get("person")) > 0: cast = [person.get("name") for person in prog.get("person")] else: cast = [prog.get("performer")] return { - "id": join_nonempty(station_id, actual_start), - "timestamp": unified_timestamp(f"{actual_start}+0900"), # hack to account for timezone - "release_timestamp": unified_timestamp(f"{prog['to']}+0900"), + "id": join_nonempty(station_id, actual_start.timestring()), + "timestamp": actual_start.timestamp(), + "release_timestamp": actual_end.timestamp(), "cast": cast, "description": clean_html(join_nonempty("summary", "description", from_dict=prog, delim="\n")), **traverse_obj(prog, { @@ -771,7 +752,7 @@ class RadikoTimeFreeIE(_RadikoBaseIE): "series": "season_name", "tags": "tag", } - )}, (prog.get("ft"), prog.get("to")), int_or_none(prog.get("ts_in_ng")) != 2 + )}, (actual_start, actual_end), int_or_none(prog.get("ts_in_ng")) != 2 def _extract_chapters(self, station, start, end, video_id=None): start_str = urllib.parse.quote(start.isoformat()) @@ -784,39 +765,40 @@ class RadikoTimeFreeIE(_RadikoBaseIE): artist = traverse_obj(track, ("artist", "name")) or track.get("artist_name") chapters.append({ "title": join_nonempty(artist, track.get("title"), delim=" - "), - "start_time": (datetime.datetime.fromisoformat(track.get("displayed_start_time")) - start).total_seconds(), + "start_time": (datetime.datetime.fromisoformat(track.get("displayed_start_time")) - start.datetime).total_seconds(), }) return chapters def _real_extract(self, url): - station, start_time = self._match_valid_url(url).group("station", "id") - meta, times, available = self._get_programme_meta(station, start_time) + station, timestring = self._match_valid_url(url).group("station", "id") + url_time = rtime.RadikoSiteTime(timestring) + meta, times, available = self._get_programme_meta(station, url_time) live_status = "was_live" if not available: self.raise_no_formats("This programme is not available. If this is an NHK station, you may wish to try NHK Radiru.", video_id=meta["id"], expected=True) - start_datetime = self._timestring_to_datetime(times[0]) - end_datetime = self._timestring_to_datetime(times[1]) + start = times[0] + end = times[1] - now = datetime.datetime.now(tz=self._JST) + now = datetime.datetime.now(tz=rtime.JST) - if end_datetime < now - datetime.timedelta(days=7): + if end < now - datetime.timedelta(days=7): self.raise_no_formats("Programme is no longer available.", video_id=meta["id"], expected=True) - elif start_datetime > now: + elif start > now: self.raise_no_formats("Programme has not aired yet.", video_id=meta["id"], expected=True) live_status = "is_upcoming" - elif start_datetime <= now < end_datetime: + elif start <= now < end: live_status = "is_upcoming" self.raise_no_formats("Programme has not finished airing yet.", video_id=meta["id"], expected=True) region = self._get_station_region(station) station_meta = self._get_station_meta(region, station) - chapters = self._extract_chapters(station, start_datetime, end_datetime, video_id=meta["id"]) + chapters = self._extract_chapters(station, start, end, video_id=meta["id"]) auth_data = self._auth(region) - formats = self._get_station_formats(station, True, auth_data, start_at=times[0], end_at=times[1]) + formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end) return { **station_meta, @@ -929,17 +911,7 @@ class RadikoShareIE(_RadikoBaseIE): queries = parse_qs(url) station = traverse_obj(queries, ("sid", 0)) time = traverse_obj(queries, ("t", 0)) - - hour = int(time[8:10]) - if hour >= 24: # 29-hour time is valid here, see _unfuck_day in RadikoTimeFreeIE - hour = hour - 24 # move back by a day - - date = datetime.datetime(int(time[:4]), int(time[4:6]), int(time[6:8]), - hour=hour, minute=int(time[10:12]), second=int(time[12:14])) - - date += datetime.timedelta(days=1) # move forward a day in datetime to compensate - time = date.strftime("%Y%m%d%H%M%S") - + time = rtime.RadikoShareTime(time).timestring() return self.url_result(f"https://radiko.jp/#!/ts/{station}/{time}", RadikoTimeFreeIE) diff --git a/yt_dlp_plugins/extractor/radiko_time.py b/yt_dlp_plugins/extractor/radiko_time.py new file mode 100644 index 0000000..441085e --- /dev/null +++ b/yt_dlp_plugins/extractor/radiko_time.py @@ -0,0 +1,114 @@ +import datetime + +JST = datetime.timezone(datetime.timedelta(hours=9)) + +class RadikoTime(): + datetime = None + + def timestring(self): + return self.datetime.strftime("%Y%m%d%H%M%S") + + def broadcast_day(self): + # timetable api counts 05:00 -> 28:59 (04:59 next day) as all the same day + # like the 30-hour day, 06:00 -> 29:59 (05:59) + # https://en.wikipedia.org/wiki/Date_and_time_notation_in_Japan#Time + # but ends earlier, presumably so the early morning programmes dont look like late night ones + # this means we have to shift back by a day so we can get the right programme + + dt = self.datetime + if dt.hour < 5: + dt -= datetime.timedelta(days=1) + return dt.strftime("%Y%m%d") + + def timestamp(self): + return self.datetime.timestamp() + def isoformat(self): + return self.datetime.isoformat() + + def __str__(self): + return str(self.datetime) + def __eq__(self, other): + return self.datetime == other + def __ne__(self, other): + return self.datetime != other + def __lt__(self, other): + return self.datetime < other + def __gt__(self, other): + return self.datetime > other + def __le__(self, other): + return self.datetime <= other + def __ge__(self, other): + return self.datetime >= other + + +class RadikoSiteTime(RadikoTime): + + def __init__(self, timestring): + + timestring = str(timestring) + year = int(timestring[:4]); month = int(timestring[4:6]); day = int(timestring[6:8]) + hour = min(int(timestring[8:10]), 24) + minute = min(int(timestring[10:12]), 59) + second = timestring[12:14] + + # edge cases + next_day = False # hour is 24, meaning 00 the next day + no_second = second == "" # there's no second, meaning we have to -1 second for some reason + + if hour > 23: + hour = hour - 24 + next_day = True + if not no_second: + second = min(int(second), 59) + else: + second = 0 + + self.datetime = datetime.datetime(year, month, day, hour, minute, second, tzinfo = JST) + + if next_day: + self.datetime += datetime.timedelta(days=1) + if no_second: + self.datetime -= datetime.timedelta(seconds=1) + +if __name__ == "__main__": + # normal + assert RadikoSiteTime('20230823180000').timestring() == "20230823180000" + # seconds (clamped to 59) + assert RadikoSiteTime('20230819105563').timestring() == "20230819105559" + # minutes (clamped to 59) + assert RadikoSiteTime('20230819106200').timestring() == "20230819105900" + # hours (clamped to 23) + assert RadikoSiteTime('20230819240000').timestring() == "20230820000000" + # cursed (no seconds) - seems to do -1s + assert RadikoSiteTime('202308240100').timestring() == "20230824005959" + # broadcast day starts at 05:00, ends at 04:59 (29:59) + assert RadikoSiteTime('20230824030000').broadcast_day() == '20230823' + # checking timezone + assert RadikoSiteTime('20230823090000').datetime.timestamp() == 1692748800 + +class RadikoShareTime(RadikoTime): + + def __init__(self, timestring): + + timestring = str(timestring) + year = int(timestring[:4]); month = int(timestring[4:6]); day = int(timestring[6:8]) + hour = int(timestring[8:10]); minute = int(timestring[10:12]); second = int(timestring[12:14]) + + minutes_to_add = second // 60 + second = second % 60 + minute += minutes_to_add + hours_to_add = minute // 60 + minute = minute % 60 + hour += hours_to_add + + days_to_add = hour // 24 + hour = hour % 24 + + # XXX: doesnt handle day invalid for month (the site actually works with this) + + self.datetime = datetime.datetime(year, month, day, hour, minute, second, tzinfo = JST) + self.datetime += datetime.timedelta(days=days_to_add) + +if __name__ == "__main__": + assert RadikoShareTime('20230630296200').timestring() == '20230701060200' + assert RadikoShareTime('20230630235960').timestring() == '20230701000000' |