aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/yt_dlp_plugins/extractor
diff options
context:
space:
mode:
authorgarret <garret@airmail.cc>2023-08-24 23:18:03 +0100
committergarret <garret@airmail.cc>2023-08-24 23:18:03 +0100
commitf6e242278535317f15548cf5da55692afe8b09e9 (patch)
tree80381f2a9d88c5db87c0929f4d2b90ef5bff4d0c /yt_dlp_plugins/extractor
parent321c7b69afc3f987f3a94160711ca0023d4841cd (diff)
downloadyt-dlp-rajiko-f6e242278535317f15548cf5da55692afe8b09e9.tar.gz
yt-dlp-rajiko-f6e242278535317f15548cf5da55692afe8b09e9.tar.bz2
yt-dlp-rajiko-f6e242278535317f15548cf5da55692afe8b09e9.zip
Migrate to unified time handler thing
now only one thing gets passed around and it has most everything we need closes #11
Diffstat (limited to 'yt_dlp_plugins/extractor')
-rwxr-xr-xyt_dlp_plugins/extractor/radiko.py84
-rw-r--r--yt_dlp_plugins/extractor/radiko_time.py114
2 files changed, 142 insertions, 56 deletions
diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py
index 4bda6aa..696ccae 100755
--- a/yt_dlp_plugins/extractor/radiko.py
+++ b/yt_dlp_plugins/extractor/radiko.py
@@ -12,11 +12,12 @@ from yt_dlp.utils import (
join_nonempty,
parse_qs,
traverse_obj,
- unified_timestamp,
url_or_none,
update_url_query,
)
+import yt_dlp_plugins.extractor.radiko_time as rtime
+
class _RadikoBaseIE(InfoExtractor):
_FULL_KEY = base64.b64decode("""
@@ -540,10 +541,10 @@ class _RadikoBaseIE(InfoExtractor):
})
if timefree:
playlist_url = update_url_query(playlist_url, {
- "start_at": start_at,
- "ft": start_at,
- "end_at": end_at,
- "to": end_at,
+ "start_at": start_at.timestring(),
+ "ft": start_at.timestring(),
+ "end_at": end_at.timestring(),
+ "to": end_at.timestring(),
})
domain = urllib.parse.urlparse(playlist_url).netloc
formats += self._extract_m3u8_formats(
@@ -722,46 +723,26 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
},
}]
- _JST = datetime.timezone(datetime.timedelta(hours=9))
-
- def _timestring_to_datetime(self, time):
- return datetime.datetime(int(time[:4]), int(time[4:6]), int(time[6:8]),
- hour=int(time[8:10]), minute=int(time[10:12]), second=int(time[12:14]), tzinfo=self._JST)
-
- def _unfuck_day(self, time):
- # api counts 05:00 -> 28:59 (04:59 next day) as all the same day
- # like the 30-hour day, 06:00 -> 29:59 (05:59)
- # https://en.wikipedia.org/wiki/Date_and_time_notation_in_Japan#Time
- # but ends earlier, presumably so the early morning programmes dont look like late night ones
- # this means we have to shift back by a day so we can use the right api
- hour_mins = int(time[8:])
- if hour_mins < 50000: # 050000 - 5AM
- date = self._timestring_to_datetime(time)
- date -= datetime.timedelta(days=1)
- time = date.strftime("%Y%m%d")
-
- return time
- return time[:8]
-
- def _get_programme_meta(self, station_id, start_time):
- day = self._unfuck_day(start_time)
+ def _get_programme_meta(self, station_id, url_time):
+ day = url_time.broadcast_day()
meta = self._download_json(f"https://radiko.jp/v4/program/station/date/{day}/{station_id}.json", station_id,
note="Downloading programme data")
programmes = traverse_obj(meta, ("stations", lambda _, v: v["station_id"] == station_id,
"programs", "program"), get_all=False)
for prog in programmes:
- if prog["ft"] <= start_time < prog["to"]:
- actual_start = prog["ft"]
+ if prog["ft"] <= url_time.timestring() < prog["to"]:
+ actual_start = rtime.RadikoSiteTime(prog["ft"])
+ actual_end = rtime.RadikoSiteTime(prog["to"])
if len(prog.get("person")) > 0:
cast = [person.get("name") for person in prog.get("person")]
else:
cast = [prog.get("performer")]
return {
- "id": join_nonempty(station_id, actual_start),
- "timestamp": unified_timestamp(f"{actual_start}+0900"), # hack to account for timezone
- "release_timestamp": unified_timestamp(f"{prog['to']}+0900"),
+ "id": join_nonempty(station_id, actual_start.timestring()),
+ "timestamp": actual_start.timestamp(),
+ "release_timestamp": actual_end.timestamp(),
"cast": cast,
"description": clean_html(join_nonempty("summary", "description", from_dict=prog, delim="\n")),
**traverse_obj(prog, {
@@ -771,7 +752,7 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
"series": "season_name",
"tags": "tag",
}
- )}, (prog.get("ft"), prog.get("to")), int_or_none(prog.get("ts_in_ng")) != 2
+ )}, (actual_start, actual_end), int_or_none(prog.get("ts_in_ng")) != 2
def _extract_chapters(self, station, start, end, video_id=None):
start_str = urllib.parse.quote(start.isoformat())
@@ -784,39 +765,40 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
artist = traverse_obj(track, ("artist", "name")) or track.get("artist_name")
chapters.append({
"title": join_nonempty(artist, track.get("title"), delim=" - "),
- "start_time": (datetime.datetime.fromisoformat(track.get("displayed_start_time")) - start).total_seconds(),
+ "start_time": (datetime.datetime.fromisoformat(track.get("displayed_start_time")) - start.datetime).total_seconds(),
})
return chapters
def _real_extract(self, url):
- station, start_time = self._match_valid_url(url).group("station", "id")
- meta, times, available = self._get_programme_meta(station, start_time)
+ station, timestring = self._match_valid_url(url).group("station", "id")
+ url_time = rtime.RadikoSiteTime(timestring)
+ meta, times, available = self._get_programme_meta(station, url_time)
live_status = "was_live"
if not available:
self.raise_no_formats("This programme is not available. If this is an NHK station, you may wish to try NHK Radiru.",
video_id=meta["id"], expected=True)
- start_datetime = self._timestring_to_datetime(times[0])
- end_datetime = self._timestring_to_datetime(times[1])
+ start = times[0]
+ end = times[1]
- now = datetime.datetime.now(tz=self._JST)
+ now = datetime.datetime.now(tz=rtime.JST)
- if end_datetime < now - datetime.timedelta(days=7):
+ if end < now - datetime.timedelta(days=7):
self.raise_no_formats("Programme is no longer available.", video_id=meta["id"], expected=True)
- elif start_datetime > now:
+ elif start > now:
self.raise_no_formats("Programme has not aired yet.", video_id=meta["id"], expected=True)
live_status = "is_upcoming"
- elif start_datetime <= now < end_datetime:
+ elif start <= now < end:
live_status = "is_upcoming"
self.raise_no_formats("Programme has not finished airing yet.", video_id=meta["id"], expected=True)
region = self._get_station_region(station)
station_meta = self._get_station_meta(region, station)
- chapters = self._extract_chapters(station, start_datetime, end_datetime, video_id=meta["id"])
+ chapters = self._extract_chapters(station, start, end, video_id=meta["id"])
auth_data = self._auth(region)
- formats = self._get_station_formats(station, True, auth_data, start_at=times[0], end_at=times[1])
+ formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end)
return {
**station_meta,
@@ -929,17 +911,7 @@ class RadikoShareIE(_RadikoBaseIE):
queries = parse_qs(url)
station = traverse_obj(queries, ("sid", 0))
time = traverse_obj(queries, ("t", 0))
-
- hour = int(time[8:10])
- if hour >= 24: # 29-hour time is valid here, see _unfuck_day in RadikoTimeFreeIE
- hour = hour - 24 # move back by a day
-
- date = datetime.datetime(int(time[:4]), int(time[4:6]), int(time[6:8]),
- hour=hour, minute=int(time[10:12]), second=int(time[12:14]))
-
- date += datetime.timedelta(days=1) # move forward a day in datetime to compensate
- time = date.strftime("%Y%m%d%H%M%S")
-
+ time = rtime.RadikoShareTime(time).timestring()
return self.url_result(f"https://radiko.jp/#!/ts/{station}/{time}", RadikoTimeFreeIE)
diff --git a/yt_dlp_plugins/extractor/radiko_time.py b/yt_dlp_plugins/extractor/radiko_time.py
new file mode 100644
index 0000000..441085e
--- /dev/null
+++ b/yt_dlp_plugins/extractor/radiko_time.py
@@ -0,0 +1,114 @@
+import datetime
+
+JST = datetime.timezone(datetime.timedelta(hours=9))
+
+class RadikoTime():
+ datetime = None
+
+ def timestring(self):
+ return self.datetime.strftime("%Y%m%d%H%M%S")
+
+ def broadcast_day(self):
+ # timetable api counts 05:00 -> 28:59 (04:59 next day) as all the same day
+ # like the 30-hour day, 06:00 -> 29:59 (05:59)
+ # https://en.wikipedia.org/wiki/Date_and_time_notation_in_Japan#Time
+ # but ends earlier, presumably so the early morning programmes dont look like late night ones
+ # this means we have to shift back by a day so we can get the right programme
+
+ dt = self.datetime
+ if dt.hour < 5:
+ dt -= datetime.timedelta(days=1)
+ return dt.strftime("%Y%m%d")
+
+ def timestamp(self):
+ return self.datetime.timestamp()
+ def isoformat(self):
+ return self.datetime.isoformat()
+
+ def __str__(self):
+ return str(self.datetime)
+ def __eq__(self, other):
+ return self.datetime == other
+ def __ne__(self, other):
+ return self.datetime != other
+ def __lt__(self, other):
+ return self.datetime < other
+ def __gt__(self, other):
+ return self.datetime > other
+ def __le__(self, other):
+ return self.datetime <= other
+ def __ge__(self, other):
+ return self.datetime >= other
+
+
+class RadikoSiteTime(RadikoTime):
+
+ def __init__(self, timestring):
+
+ timestring = str(timestring)
+ year = int(timestring[:4]); month = int(timestring[4:6]); day = int(timestring[6:8])
+ hour = min(int(timestring[8:10]), 24)
+ minute = min(int(timestring[10:12]), 59)
+ second = timestring[12:14]
+
+ # edge cases
+ next_day = False # hour is 24, meaning 00 the next day
+ no_second = second == "" # there's no second, meaning we have to -1 second for some reason
+
+ if hour > 23:
+ hour = hour - 24
+ next_day = True
+ if not no_second:
+ second = min(int(second), 59)
+ else:
+ second = 0
+
+ self.datetime = datetime.datetime(year, month, day, hour, minute, second, tzinfo = JST)
+
+ if next_day:
+ self.datetime += datetime.timedelta(days=1)
+ if no_second:
+ self.datetime -= datetime.timedelta(seconds=1)
+
+if __name__ == "__main__":
+ # normal
+ assert RadikoSiteTime('20230823180000').timestring() == "20230823180000"
+ # seconds (clamped to 59)
+ assert RadikoSiteTime('20230819105563').timestring() == "20230819105559"
+ # minutes (clamped to 59)
+ assert RadikoSiteTime('20230819106200').timestring() == "20230819105900"
+ # hours (clamped to 23)
+ assert RadikoSiteTime('20230819240000').timestring() == "20230820000000"
+ # cursed (no seconds) - seems to do -1s
+ assert RadikoSiteTime('202308240100').timestring() == "20230824005959"
+ # broadcast day starts at 05:00, ends at 04:59 (29:59)
+ assert RadikoSiteTime('20230824030000').broadcast_day() == '20230823'
+ # checking timezone
+ assert RadikoSiteTime('20230823090000').datetime.timestamp() == 1692748800
+
+class RadikoShareTime(RadikoTime):
+
+ def __init__(self, timestring):
+
+ timestring = str(timestring)
+ year = int(timestring[:4]); month = int(timestring[4:6]); day = int(timestring[6:8])
+ hour = int(timestring[8:10]); minute = int(timestring[10:12]); second = int(timestring[12:14])
+
+ minutes_to_add = second // 60
+ second = second % 60
+ minute += minutes_to_add
+ hours_to_add = minute // 60
+ minute = minute % 60
+ hour += hours_to_add
+
+ days_to_add = hour // 24
+ hour = hour % 24
+
+ # XXX: doesnt handle day invalid for month (the site actually works with this)
+
+ self.datetime = datetime.datetime(year, month, day, hour, minute, second, tzinfo = JST)
+ self.datetime += datetime.timedelta(days=days_to_add)
+
+if __name__ == "__main__":
+ assert RadikoShareTime('20230630296200').timestring() == '20230701060200'
+ assert RadikoShareTime('20230630235960').timestring() == '20230701000000'