aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/yt_dlp_plugins/extractor
diff options
context:
space:
mode:
authorgarret1317 <garret@airmail.cc>2025-11-06 21:26:59 +0000
committergarret1317 <garret@airmail.cc>2025-11-07 00:17:47 +0000
commit92918e2ba1e5c745aff2dcf046c82a3c1f9529af (patch)
treecdeac22ba1c823f9ddb33a5f6fbb7bd8f1a836bd /yt_dlp_plugins/extractor
parent68632864f32e47a5f0961d7de32da054d21bf24f (diff)
downloadyt-dlp-rajiko-92918e2ba1e5c745aff2dcf046c82a3c1f9529af.tar.gz
yt-dlp-rajiko-92918e2ba1e5c745aff2dcf046c82a3c1f9529af.tar.bz2
yt-dlp-rajiko-92918e2ba1e5c745aff2dcf046c82a3c1f9529af.zip
Implement lazy chunk downloading
using `http_dash_segments_generator` as seen in youtube live-from-start this way it only goes through the chunks etc one a format has actually been selected so we don't have it downloading eg 20 chunks of every stream only for most to be discarded closes #29
Diffstat (limited to 'yt_dlp_plugins/extractor')
-rw-r--r--yt_dlp_plugins/extractor/radiko.py11
-rw-r--r--yt_dlp_plugins/extractor/radiko_hacks.py49
2 files changed, 40 insertions, 20 deletions
diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py
index 6718201..2ec363a 100644
--- a/yt_dlp_plugins/extractor/radiko.py
+++ b/yt_dlp_plugins/extractor/radiko.py
@@ -310,13 +310,15 @@ class _RadikoBaseIE(InfoExtractor):
if delivered_live and timefree and do_as_live_chunks:
- chunks_playlist = hacks._generate_as_live_playlist(
- self, playlist_url, start_at, end_at, domain, auth_headers
- )
+ def fragments_generator(_):
+ return hacks._generate_as_live_fragments(
+ self, playlist_url, start_at, end_at, domain, auth_headers
+ )
m3u8_formats = [{
"format_id": join_nonempty(domain, "chunked"),
- "hls_media_playlist_data": chunks_playlist,
+ "fragments": fragments_generator,
+ "protocol": "http_dash_segments_generator",
"preference": preference,
"ext": "m4a",
"vcodec": "none",
@@ -324,6 +326,7 @@ class _RadikoBaseIE(InfoExtractor):
# fallback to live for ffmpeg etc
"url": playlist_url,
"http_headers": auth_headers,
+ "is_live": "yesn't",
}]
format_note.append("Chunked")
else:
diff --git a/yt_dlp_plugins/extractor/radiko_hacks.py b/yt_dlp_plugins/extractor/radiko_hacks.py
index 6486034..418aa70 100644
--- a/yt_dlp_plugins/extractor/radiko_hacks.py
+++ b/yt_dlp_plugins/extractor/radiko_hacks.py
@@ -10,8 +10,8 @@ from yt_dlp.utils import (
# "hacks" as in great jank/schizo shit that works anyway
-def _generate_as_live_playlist(self, playlist_url, start_at, end_at, domain, headers={}):
- playlist = ""
+def _generate_as_live_fragments(self, playlist_base_url, start_at, end_at, domain, headers={}):
+ playlist = []
chunk_length = 300 # max the api allows
duration = int(end_at.timestamp() - start_at.timestamp())
@@ -21,18 +21,19 @@ def _generate_as_live_playlist(self, playlist_url, start_at, end_at, domain, hea
chunk_length = min(chunk_length, duration - cursor)
chunk_start = start_at + datetime.timedelta(seconds=cursor)
- chunk_url = update_url_query(playlist_url, {
+ chunk_url = update_url_query(playlist_base_url, {
"seek": chunk_start.timestring(),
"l": chunk_length,
})
- chunk_playlist, real_chunk_length = _get_chunk_playlist(self, chunk_url, domain, chunk_num, headers)
+ chunk_fragments, real_chunk_length = _get_chunk_playlist(self, chunk_url, domain, chunk_num, headers)
- playlist += chunk_playlist
- cursor += real_chunk_length
+ cursor += round(real_chunk_length)
chunk_num += 1
- return playlist
+ for frag in chunk_fragments:
+ yield frag
+
def _get_chunk_playlist(self, chunk_url, src_id, chunk_num, headers={}):
EXTINF_duration = re.compile(r"^#EXTINF:([\d.]+),", flags=re.MULTILINE)
@@ -41,20 +42,23 @@ def _get_chunk_playlist(self, chunk_url, src_id, chunk_num, headers={}):
chunk_id = join_nonempty(src_id, chunk_num)
base_format = self._extract_m3u8_formats(
chunk_url, chunk_id, fatal=False, headers=headers,
- note=f"Preparing {src_id} chunk {chunk_num}"
+# note=f"Preparing {src_id} chunk {chunk_num}"
+ note=False,
+ errnote=f"Failed to get {src_id} chunk {chunk_num} base format",
)
m3u8_url = traverse_obj(base_format, (..., "url",), get_all=False)
- playlist = self._download_webpage(m3u8_url, chunk_id, note=f"Getting {src_id} chunk {chunk_num} fragments")
+ playlist = self._download_webpage(m3u8_url, chunk_id, note=False, errnote=f"Failed to get {src_id} chunk {chunk_num} playlist")
+ #note=f"Getting {src_id} chunk {chunk_num} fragments")
+
+ return _parse_hls(playlist)
- real_duration = 0
- for i in EXTINF_duration.findall(playlist):
- real_duration += float(i)
- real_duration = round(real_duration)
+def _parse_hls(m3u8_doc):
+ fragments = []
# playlists can sometimes be longer than they should
- # wowza stream does some strange things
+ # e.g. wowza stream does some strange things
# it goes along fine with every fragment 5s long as normal
- # and then during the ad break it does one with a different length (2s here)
+ # and then during the ad break it does one with a different length (eg 2s)
# i assume so they have a clean split to do ad insertion in? idk
# but anyway now the chunks aren't always a clean 5mins long
@@ -62,4 +66,17 @@ def _get_chunk_playlist(self, chunk_url, src_id, chunk_num, headers={}):
# so to work around this, we track the real duration from the #EXTINF tags
- return playlist, real_duration
+ playlist_duration = 0
+ fragment_duration = None
+ for line in m3u8_doc.splitlines():
+ if line.startswith("#EXTINF:"):
+ fragment_duration = float(line[len('#EXTINF:'):].split(',')[0]) # from common._parse_m3u8_vod_duration
+ continue
+ elif line.startswith("#"):
+ continue
+
+ fragments.append({"url": line, "duration": fragment_duration})
+ playlist_duration += fragment_duration or 0
+ fragment_duration = None
+
+ return fragments, playlist_duration