diff options
-rw-r--r-- | .github/workflows/download.yml | 4 | ||||
-rwxr-xr-x | contrib/generate_html.py (renamed from misc/generate_html.py) | 0 | ||||
-rw-r--r-- | contrib/how to do a release (renamed from misc/how to do a release) | 0 | ||||
-rwxr-xr-x | contrib/old_generate_changelog.py (renamed from misc/old_generate_changelog.py) | 0 | ||||
-rwxr-xr-x | contrib/protostuff.py (renamed from misc/protostuff.py) | 0 | ||||
-rwxr-xr-x | contrib/randominfo.py (renamed from misc/randominfo.py) | 0 | ||||
-rwxr-xr-x | contrib/streammon.py (renamed from misc/streammon.py) | 29 | ||||
-rwxr-xr-x | contrib/test_areas.py (renamed from misc/test_areas.py) | 0 | ||||
-rwxr-xr-x | contrib/test_extractors.py (renamed from misc/test_extractors.py) | 6 | ||||
-rw-r--r-- | yt_dlp_plugins/extractor/radiko.py | 106 |
10 files changed, 111 insertions, 34 deletions
diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml index 66274b3..115ae4e 100644 --- a/.github/workflows/download.yml +++ b/.github/workflows/download.yml @@ -30,6 +30,8 @@ jobs: - name: install ffmpeg uses: AnimMouse/setup-ffmpeg@v1 + with: + version: master - name: get yt-dlp source (for the test_download script we override) uses: actions/checkout@v4 @@ -43,4 +45,4 @@ jobs: - name: Run tests env: PYTHONPATH: ${{ github.workspace }}/yt-dlp${{ runner.os == 'Windows' && ';' || ':' }}${{ env.PYTHONPATH }} - run: python ./yt-dlp-plugins/yt-dlp-rajiko/misc/test_extractors.py + run: python ./yt-dlp-plugins/yt-dlp-rajiko/contrib/test_extractors.py diff --git a/misc/generate_html.py b/contrib/generate_html.py index 0e15d6a..0e15d6a 100755 --- a/misc/generate_html.py +++ b/contrib/generate_html.py diff --git a/misc/how to do a release b/contrib/how to do a release index 6e91e14..6e91e14 100644 --- a/misc/how to do a release +++ b/contrib/how to do a release diff --git a/misc/old_generate_changelog.py b/contrib/old_generate_changelog.py index 1bce073..1bce073 100755 --- a/misc/old_generate_changelog.py +++ b/contrib/old_generate_changelog.py diff --git a/misc/protostuff.py b/contrib/protostuff.py index 7ef0e95..7ef0e95 100755 --- a/misc/protostuff.py +++ b/contrib/protostuff.py diff --git a/misc/randominfo.py b/contrib/randominfo.py index bdb7660..bdb7660 100755 --- a/misc/randominfo.py +++ b/contrib/randominfo.py diff --git a/misc/streammon.py b/contrib/streammon.py index 4051833..8f52bb4 100755 --- a/misc/streammon.py +++ b/contrib/streammon.py @@ -5,6 +5,7 @@ import difflib import os import sys +import xml.etree.ElementTree as ET from datetime import datetime import requests @@ -22,10 +23,25 @@ else: devices = ('pc_html5', 'aSmartPhone7a', 'aSmartPhone8') stations = ('FMT', 'CCL', 'NORTHWAVE', 'TBS') +def format_xml(txt): + root = ET.fromstring(txt) + res = "" + for el in root.findall("url"): + res += el.find("playlist_create_url").text + for k, v in el.attrib.items(): + res += f" {k}:{v}" + + res += "\n" + return res + for device in devices: for station in stations: url = STREAMS_API.format(device=device, station=station) - now = s.get(url).text + now_response = s.get(url) + now = now_response.text + now_modified = now_response.headers["last-modified"] + now_datetime = datetime.strptime(now_modified, "%a, %d %b %Y %H:%M:%S %Z") + filename = f"{PATH}{station}-{device}.xml" with open(filename, "a+") as f: @@ -34,9 +50,9 @@ for device in devices: modtime = datetime.fromtimestamp(os.path.getmtime(filename)) diff = difflib.unified_diff( - past.splitlines(), now.splitlines(), + format_xml(past).splitlines(), format_xml(now).splitlines(), fromfile=url, tofile=url, - fromfiledate=str(modtime), tofiledate=str(datetime.now()), + fromfiledate=str(modtime), tofiledate=str(now_datetime.now()), ) diff_str = "\n".join(diff) @@ -45,9 +61,6 @@ for device in devices: f.write(now) s.post(DISCORD_WEBHOOK, json={ - "embeds": [{ - "type": "rich", - "title": f"Streams changed: {station} {device}", - "description": "\n".join(("```diff", diff_str, "```")) - }] + "content": f"**Streams changed: {station} {device}**\n" + "\n".join(("```diff", diff_str, "```")), }) + os.utime(filename, (now_datetime.timestamp(), now_datetime.timestamp())) diff --git a/misc/test_areas.py b/contrib/test_areas.py index ba6475f..ba6475f 100755 --- a/misc/test_areas.py +++ b/contrib/test_areas.py diff --git a/misc/test_extractors.py b/contrib/test_extractors.py index 9289498..21800c5 100755 --- a/misc/test_extractors.py +++ b/contrib/test_extractors.py @@ -44,7 +44,8 @@ def get_test_timefields(airtime, release_time): from yt_dlp_plugins.extractor.radiko import ( RadikoTimeFreeIE, RadikoShareIE, - RadikoLiveIE, RadikoPersonIE, RadikoStationButtonIE + RadikoLiveIE, RadikoPersonIE, RadikoStationButtonIE, + RadikoRSeasonsIE ) from yt_dlp_plugins.extractor.radiko_podcast import ( @@ -125,7 +126,7 @@ RadikoShareIE._TESTS = [{ "title": "JET STREAM", "series": "JET STREAM", - "description": r"re:^JET STREAM・・・作家が描く世界への旅。[\s\S]+https://www.tfm.co.jp/f/jetstream/message$", + "description": r"re:^JET STREAM・・・[\s\S]+https://www.tfm.co.jp/f/jetstream/message$", "chapters": list, "thumbnail": "https://program-static.cf.radiko.jp/greinlrspi.jpg", @@ -147,6 +148,7 @@ IEs = [ RadikoTimeFreeIE, RadikoShareIE, RadikoLiveIE, RadikoPersonIE, RadikoStationButtonIE, RadikoPodcastEpisodeIE, RadikoPodcastChannelIE, + RadikoRSeasonsIE, ] import test.helper as th diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py index 2996290..ad5c77f 100644 --- a/yt_dlp_plugins/extractor/radiko.py +++ b/yt_dlp_plugins/extractor/radiko.py @@ -87,13 +87,14 @@ class _RadikoBaseIE(InfoExtractor): _DELIVERED_ONDEMAND = ('radiko.jp',) _DOESNT_WORK_WITH_FFMPEG = ('tf-f-rpaa-radiko.smartstream.ne.jp', 'si-f-radiko.smartstream.ne.jp', 'alliance-stream-radiko.smartstream.ne.jp') + _AD_INSERTION = ('si-f-radiko.smartstream.ne.jp', ) _has_tf30 = None def _index_regions(self): region_data = {} - tree = self._download_xml("https://radiko.jp/v3/station/region/full.xml", None, note="Indexing regions") + tree = self._download_xml("https://radiko.jp/v3/station/region/full.xml", None, note="Indexing station regions") for stations in tree: for station in stations: area = station.find("area_id").text @@ -255,8 +256,7 @@ class _RadikoBaseIE(InfoExtractor): config_device = traverse_obj(self._configuration_arg('device', casesense=True, ie_key="rajiko"), 0) if not use_pc_html5: - device = config_device or "aSmartPhone7a" # this device only gives us the on-demand one for timefree - # that's good imo - we just get the one that works, and don't bother with probing the rest as well + device = config_device or "aSmartPhone7a" # still has the radiko.jp on-demand one for timefree else: device = config_device or "pc_html5" # the on-demand one doesnt work with timefree30 stuff sadly # so just use pc_html5 which has everything @@ -272,6 +272,7 @@ class _RadikoBaseIE(InfoExtractor): do_as_live_chunks = not len(self._configuration_arg("no_as_live_chunks", ie_key="rajiko")) > 0 for element in url_data.findall(f".//url[@timefree='{timefree_int}'][@areafree='0']/playlist_create_url"): # find <url>s with matching timefree and no areafree, then get their <playlist_create_url> + # we don't want areafree here because we should always be in-region url = element.text if url in seen_urls: # there are always dupes, even with ^ specific filtering continue @@ -299,15 +300,22 @@ class _RadikoBaseIE(InfoExtractor): delivered_live = True preference = -1 entry_protocol = 'm3u8' + format_note=[] - if domain in self._DOESNT_WORK_WITH_FFMPEG and do_blacklist_streams: + if timefree and domain in self._DOESNT_WORK_WITH_FFMPEG and do_blacklist_streams: + # TODO: remove this completely + # https://github.com/garret1317/yt-dlp-rajiko/issues/29 self.write_debug(f"skipping {domain} (known not working)") continue if domain in self._DELIVERED_ONDEMAND: # override the defaults for delivered as on-demand delivered_live = False - preference = 1 + preference += 2 entry_protocol = None + if domain in self._AD_INSERTION: + preference -= 3 + format_note.append("Ad insertion") + auth_headers = auth_data["token"] @@ -317,22 +325,31 @@ class _RadikoBaseIE(InfoExtractor): self, playlist_url, start_at, end_at, domain, auth_headers ) - formats.append({ + m3u8_formats = [{ "format_id": join_nonempty(domain, "chunked"), "hls_media_playlist_data": chunks_playlist, "preference": preference, "ext": "m4a", + "vcodec": "none", # fallback to live for ffmpeg etc "url": playlist_url, "http_headers": auth_headers, - }) + }] + format_note.append("Chunked") else: - formats += self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_headers, live=delivered_live, preference=preference, entry_protocol=entry_protocol, note=f"Downloading m3u8 information from {domain}") + + for f in m3u8_formats: + # ffmpeg sends a Range header which some streams reject. here we disable that (and also some icecast header as well) + f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']} + f['format_note'] = ", ".join(format_note) + formats.append(f) + return formats @@ -422,7 +439,7 @@ class RadikoLiveIE(_RadikoBaseIE): class RadikoTimeFreeIE(_RadikoBaseIE): _NETRC_MACHINE = "rajiko" _VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-_]+)/(?P<id>\d+)" - # TESTS use a custom-ish script that updates the airdates automatically, see misc/test_extractors.py + # TESTS use a custom-ish script that updates the airdates automatically, see contrib/test_extractors.py def _perform_login(self, username, password): try: @@ -527,9 +544,13 @@ class RadikoTimeFreeIE(_RadikoBaseIE): region = self._get_station_region(station) station_meta = self._get_station_meta(region, station) - chapters = self._extract_chapters(station, start, end, video_id=meta["id"]) - auth_data = self._auth(region, need_tf30=need_tf30) - formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end, use_pc_html5=need_tf30) + if live_status == "was_live": + chapters = self._extract_chapters(station, start, end, video_id=meta["id"]) + auth_data = self._auth(region, need_tf30=need_tf30) + formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end, use_pc_html5=need_tf30) + else: + chapters = None + formats = None return { **station_meta, @@ -718,19 +739,58 @@ class RadikoPersonIE(InfoExtractor): def entries(): key_station_only = len(self._configuration_arg("key_station_only", ie_key="rajiko")) > 0 for episode in person_api.get("data"): - if key_station_only and episode.get("key_station_id") != episode.get("station_id"): - continue - share_url = traverse_obj(episode, ("radiko_url", ("pc", "sp", "android", "ios", "app"), - {url_or_none}), get_all=False) - # they're all identical share links at the moment (5th aug 2024) but they might not be in the future + station = episode.get("station_id") + if key_station_only and episode.get("key_station_id") != station: + continue - # predictions: - # pc will probably stay the same - # don't know what sp is, possibly "SmartPhone"?, anyway seems reasonably generic - # android is easier for me to reverse-engineer than ios (no ithing) - # i assume "app" would be some internal tell-it-to-do-something link, not a regular web link + start = episode.get("start_at") + timestring = rtime.RadikoTime.fromisoformat(start).timestring() - yield self.url_result(share_url, ie=RadikoShareIE, video_title=episode.get("title")) + timefree_id = join_nonempty(station, timestring) + timefree_url = f"https://radiko.jp/#!/ts/{station}/{timestring}" + yield self.url_result(timefree_url, ie=RadikoTimeFreeIE, video_id=timefree_id) return self.playlist_result(entries(), playlist_id=join_nonempty("person", person_id)) + + +class RadikoRSeasonsIE(InfoExtractor): + _VALID_URL = r"https?://(?:www\.)?radiko\.jp/(?:mobile/)?r_seasons/(?P<id>\d+$)" + _TESTS = [{ + "url": "https://radiko.jp/r_seasons/10012302", + "playlist_mincount": 4, + "info_dict": { + "id": '10012302', + "title": '山下達郎の楽天カード サンデー・ソングブック', + } + }, { + "url": "https://radiko.jp/r_seasons/10002831", + "playlist_mincount": 4, + "info_dict": { + "id": "10002831", + "title": "Tokyo Moon", + } + }] + + def _real_extract(self, url): + season_id = self._match_id(url) + html = self._download_webpage(url, season_id) + pageProps = self._search_nextjs_data(html, season_id)["props"]["pageProps"] + season_id = traverse_obj(pageProps, ("rSeason", "id")) or season_id + + def entries(): + for episode in pageProps.get("pastPrograms"): + station = traverse_obj(episode, ("stationId")) + start = traverse_obj(episode, ("startAt", "seconds")) + timestring = rtime.RadikoTime.fromtimestamp(start, tz=rtime.JST).timestring() + + timefree_id = join_nonempty(station, timestring) + timefree_url = f"https://radiko.jp/#!/ts/{station}/{timestring}" + + yield self.url_result(timefree_url, ie=RadikoTimeFreeIE, video_id=timefree_id) + + return self.playlist_result( + entries(), + playlist_id=season_id, + playlist_title=traverse_obj(pageProps, ("rSeason", "rSeasonName")), + ) |