diff options
| author | garret1317 <garret@airmail.cc> | 2025-11-06 21:26:59 +0000 |
|---|---|---|
| committer | garret1317 <garret@airmail.cc> | 2025-11-07 00:17:47 +0000 |
| commit | 92918e2ba1e5c745aff2dcf046c82a3c1f9529af (patch) | |
| tree | cdeac22ba1c823f9ddb33a5f6fbb7bd8f1a836bd /yt_dlp_plugins/extractor | |
| parent | 68632864f32e47a5f0961d7de32da054d21bf24f (diff) | |
| download | yt-dlp-rajiko-92918e2ba1e5c745aff2dcf046c82a3c1f9529af.tar.gz yt-dlp-rajiko-92918e2ba1e5c745aff2dcf046c82a3c1f9529af.tar.bz2 yt-dlp-rajiko-92918e2ba1e5c745aff2dcf046c82a3c1f9529af.zip | |
Implement lazy chunk downloading
using `http_dash_segments_generator` as seen in youtube live-from-start
this way it only goes through the chunks etc one a format has actually been selected
so we don't have it downloading eg 20 chunks of every stream only for most to be discarded
closes #29
Diffstat (limited to 'yt_dlp_plugins/extractor')
| -rw-r--r-- | yt_dlp_plugins/extractor/radiko.py | 11 | ||||
| -rw-r--r-- | yt_dlp_plugins/extractor/radiko_hacks.py | 49 |
2 files changed, 40 insertions, 20 deletions
diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py index 6718201..2ec363a 100644 --- a/yt_dlp_plugins/extractor/radiko.py +++ b/yt_dlp_plugins/extractor/radiko.py @@ -310,13 +310,15 @@ class _RadikoBaseIE(InfoExtractor): if delivered_live and timefree and do_as_live_chunks: - chunks_playlist = hacks._generate_as_live_playlist( - self, playlist_url, start_at, end_at, domain, auth_headers - ) + def fragments_generator(_): + return hacks._generate_as_live_fragments( + self, playlist_url, start_at, end_at, domain, auth_headers + ) m3u8_formats = [{ "format_id": join_nonempty(domain, "chunked"), - "hls_media_playlist_data": chunks_playlist, + "fragments": fragments_generator, + "protocol": "http_dash_segments_generator", "preference": preference, "ext": "m4a", "vcodec": "none", @@ -324,6 +326,7 @@ class _RadikoBaseIE(InfoExtractor): # fallback to live for ffmpeg etc "url": playlist_url, "http_headers": auth_headers, + "is_live": "yesn't", }] format_note.append("Chunked") else: diff --git a/yt_dlp_plugins/extractor/radiko_hacks.py b/yt_dlp_plugins/extractor/radiko_hacks.py index 6486034..418aa70 100644 --- a/yt_dlp_plugins/extractor/radiko_hacks.py +++ b/yt_dlp_plugins/extractor/radiko_hacks.py @@ -10,8 +10,8 @@ from yt_dlp.utils import ( # "hacks" as in great jank/schizo shit that works anyway -def _generate_as_live_playlist(self, playlist_url, start_at, end_at, domain, headers={}): - playlist = "" +def _generate_as_live_fragments(self, playlist_base_url, start_at, end_at, domain, headers={}): + playlist = [] chunk_length = 300 # max the api allows duration = int(end_at.timestamp() - start_at.timestamp()) @@ -21,18 +21,19 @@ def _generate_as_live_playlist(self, playlist_url, start_at, end_at, domain, hea chunk_length = min(chunk_length, duration - cursor) chunk_start = start_at + datetime.timedelta(seconds=cursor) - chunk_url = update_url_query(playlist_url, { + chunk_url = update_url_query(playlist_base_url, { "seek": chunk_start.timestring(), "l": chunk_length, }) - chunk_playlist, real_chunk_length = _get_chunk_playlist(self, chunk_url, domain, chunk_num, headers) + chunk_fragments, real_chunk_length = _get_chunk_playlist(self, chunk_url, domain, chunk_num, headers) - playlist += chunk_playlist - cursor += real_chunk_length + cursor += round(real_chunk_length) chunk_num += 1 - return playlist + for frag in chunk_fragments: + yield frag + def _get_chunk_playlist(self, chunk_url, src_id, chunk_num, headers={}): EXTINF_duration = re.compile(r"^#EXTINF:([\d.]+),", flags=re.MULTILINE) @@ -41,20 +42,23 @@ def _get_chunk_playlist(self, chunk_url, src_id, chunk_num, headers={}): chunk_id = join_nonempty(src_id, chunk_num) base_format = self._extract_m3u8_formats( chunk_url, chunk_id, fatal=False, headers=headers, - note=f"Preparing {src_id} chunk {chunk_num}" +# note=f"Preparing {src_id} chunk {chunk_num}" + note=False, + errnote=f"Failed to get {src_id} chunk {chunk_num} base format", ) m3u8_url = traverse_obj(base_format, (..., "url",), get_all=False) - playlist = self._download_webpage(m3u8_url, chunk_id, note=f"Getting {src_id} chunk {chunk_num} fragments") + playlist = self._download_webpage(m3u8_url, chunk_id, note=False, errnote=f"Failed to get {src_id} chunk {chunk_num} playlist") + #note=f"Getting {src_id} chunk {chunk_num} fragments") + + return _parse_hls(playlist) - real_duration = 0 - for i in EXTINF_duration.findall(playlist): - real_duration += float(i) - real_duration = round(real_duration) +def _parse_hls(m3u8_doc): + fragments = [] # playlists can sometimes be longer than they should - # wowza stream does some strange things + # e.g. wowza stream does some strange things # it goes along fine with every fragment 5s long as normal - # and then during the ad break it does one with a different length (2s here) + # and then during the ad break it does one with a different length (eg 2s) # i assume so they have a clean split to do ad insertion in? idk # but anyway now the chunks aren't always a clean 5mins long @@ -62,4 +66,17 @@ def _get_chunk_playlist(self, chunk_url, src_id, chunk_num, headers={}): # so to work around this, we track the real duration from the #EXTINF tags - return playlist, real_duration + playlist_duration = 0 + fragment_duration = None + for line in m3u8_doc.splitlines(): + if line.startswith("#EXTINF:"): + fragment_duration = float(line[len('#EXTINF:'):].split(',')[0]) # from common._parse_m3u8_vod_duration + continue + elif line.startswith("#"): + continue + + fragments.append({"url": line, "duration": fragment_duration}) + playlist_duration += fragment_duration or 0 + fragment_duration = None + + return fragments, playlist_duration |