aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/download.yml48
-rw-r--r--.gitignore4
-rw-r--r--LICENCE2
-rw-r--r--README.md44
-rwxr-xr-xcontrib/bundle.sh18
-rwxr-xr-xcontrib/generate_html.py102
-rw-r--r--contrib/how to do a release46
-rwxr-xr-xcontrib/old_generate_changelog.py (renamed from misc/generate_changelog.py)0
-rwxr-xr-xcontrib/protostuff.py154
-rwxr-xr-xcontrib/randominfo.py (renamed from misc/randominfo.py)0
-rwxr-xr-xcontrib/streammon.py66
-rwxr-xr-xcontrib/test_areas.py (renamed from misc/test-tokens.py)0
-rwxr-xr-xcontrib/test_extractors.py183
-rw-r--r--misc/how to do a release47
-rw-r--r--pyproject.toml18
-rw-r--r--yt_dlp_plugins/extractor/radiko.py444
-rw-r--r--yt_dlp_plugins/extractor/radiko_dependencies.py29
-rw-r--r--yt_dlp_plugins/extractor/radiko_hacks.py90
-rw-r--r--yt_dlp_plugins/extractor/radiko_podcast.py175
-rwxr-xr-xyt_dlp_plugins/extractor/radiko_protobufs.py146
20 files changed, 1354 insertions, 262 deletions
diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml
new file mode 100644
index 0000000..6094947
--- /dev/null
+++ b/.github/workflows/download.yml
@@ -0,0 +1,48 @@
+name: Tests
+on: [push, pull_request]
+permissions:
+ contents: read
+
+jobs:
+ full:
+ name: Tests
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: true
+ matrix:
+ os: [ubuntu-latest]
+ python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10]
+ include:
+ # atleast one of each CPython/PyPy tests must be in windows
+ - os: windows-latest
+ python-version: '3.10'
+ - os: windows-latest
+ python-version: pypy-3.10
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ path: './yt-dlp-plugins/yt-dlp-rajiko/'
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: install ffmpeg
+ uses: AnimMouse/setup-ffmpeg@v1
+ with:
+ version: master
+
+ - name: get yt-dlp source (for the test_download script we override)
+ uses: actions/checkout@v4
+ with:
+ path: './yt-dlp/'
+ repository: yt-dlp/yt-dlp
+
+ - name: Install yt-dlp from source (editable mode)
+ run: pip install -e ./yt-dlp/
+
+ - name: Run tests
+ env:
+ PYTHONPATH: ${{ github.workspace }}/yt-dlp${{ runner.os == 'Windows' && ';' || ':' }}${{ env.PYTHONPATH }}
+ run: python ./yt-dlp-plugins/yt-dlp-rajiko/contrib/test_extractors.py
diff --git a/.gitignore b/.gitignore
index 04a43b4..cd9177d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,7 @@ __pycache__
*.pyc
wiki/
dist/
+build/
+bundle/
+*.m4a*
+*.m3u8
diff --git a/LICENCE b/LICENCE
index 9041052..ba3d837 100644
--- a/LICENCE
+++ b/LICENCE
@@ -1,6 +1,6 @@
BSD Zero Clause License
-Copyright (c) 2023, 2024 garret1317
+Copyright (c) 2023, 2024, 2025 garret1317
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.
diff --git a/README.md b/README.md
index 31b8601..41b7ff9 100644
--- a/README.md
+++ b/README.md
@@ -2,54 +2,40 @@
yt-dlp-rajiko is an improved [radiko.jp](https://radiko.jp) extractor plugin for yt-dlp.
+### [日本語](https://427738.xyz/yt-dlp-rajiko/index.ja.html)
+
## Installation
-[Download the Python wheel](https://427738.xyz/yt-dlp-rajiko/dl/yt_dlp_rajiko-latest.whl) or `pip install
---extra-index-url https://427738.xyz/yt-dlp-rajiko/pip/ yt-dlp-rajiko`
+[Download the plugin bundle](https://427738.xyz/yt-dlp-rajiko/dl/yt_dlp_rajiko-latest.bundle.zip) or `pip install yt-dlp-rajiko`
-Requires yt-dlp 2023.06.22 or above.
+Requires yt-dlp 2025.02.19 or above.
Use the pip command if you installed yt-dlp with pip. If you installed
-yt-dlp with `pipx`, use `pipx inject --index-url
-https://427738.xyz/yt-dlp-rajiko/pip/ yt-dlp yt-dlp-rajiko` to install
+yt-dlp with `pipx`, use `pipx inject yt-dlp yt-dlp-rajiko` to install
the plugin in yt-dlp's environment.
-Otherwise, download the wheel, and place it in one of these locations:
+Otherwise, download the plugin bundle, and place it in one of these locations:
- `~/.config/yt-dlp/plugins/` (on Linux and Mac)
- `%appdata%/yt-dlp/plugins/` (on Windows)
+ - a `yt-dlp-plugins` folder next to your `yt-dlp.exe` (<a href="https://427738.xyz/yt-dlp-rajiko/i/installation-pluginsfolder.png">like this</a>)
- anywhere else listed in [the yt-dlp
documentation](https://github.com/yt-dlp/yt-dlp#installing-plugins).
You'll have to create those folders if they don't already exist.
+There is no need to unzip the plugin bundle.
More information about yt-dlp plugins is available from [yt-dlp's documentation](https://github.com/yt-dlp/yt-dlp#plugins).
-## Usage
-
-simply:
-
- # timefree download
- yt-dlp 'https://radiko.jp/#!/ts/INT/20240308230000'
- # live recording
- yt-dlp 'https://radiko.jp/#!/live/CCL'
- # live shorthand
- yt-dlp 'https://radiko.jp/#FMT'
-
-You can somewhat automate downloading programmes by using the search
-page.
+## More info
- # all programmes related to Toshiki Kadomatsu
- yt-dlp 'https://radiko.jp/#!/search/live?key=角松敏生&filter=past&region_id=all'
- # specific programme from Osaka
- yt-dlp 'https://radiko.jp/#!/search/live?key=world%20jazz%20warehouse&filter=past&area_id=JP27'
+[Please see the website for more information](https://427738.xyz/yt-dlp-rajiko/) (usage, contact methods, etc)
-Just copying from the browser URL bar should work with no changes.
+[日本語訳もあります](https://427738.xyz/yt-dlp-rajiko/index.ja.html)
-----
+## notes about this repository
-[Please see the website for more information](https://427738.xyz/yt-dlp-rajiko/).
-If the website is down, an archived copy may be available on [the Internet Archive's Wayback Machine](https://web.archive.org/web/*/https://427738.xyz/yt-dlp-rajiko/).
+this is just where the source code and bug tracker live. most of the info is on the website.
-[日本語訳は公式サイトをご覧ください。](https://427738.xyz/yt-dlp-rajiko/index.ja.html)
-サイトが無くなった場合は、多分[Internet ArchiveのWayback Machineにアーカイブされています](https://web.archive.org/web/*/https://427738.xyz/yt-dlp-rajiko/index.ja.html)。
+Generally you should use the release versions.
+`master` branch usually works, but should be considered experimental and may have bugs
diff --git a/contrib/bundle.sh b/contrib/bundle.sh
new file mode 100755
index 0000000..cef3f79
--- /dev/null
+++ b/contrib/bundle.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+version="$(uv tool run hatch version)"
+mkdir bundle/
+uv pip install --python-version 3.9 --python-platform linux --requirements pyproject.toml --target bundle/yt_dlp_plugins/
+uv pip install --python-version 3.9 --python-platform linux --no-deps --target bundle/ .
+
+for lib_distinfo in bundle/yt_dlp_plugins/*.dist-info bundle/*.dist-info; do
+ lib=$(basename "$lib_distinfo" .dist-info)
+ for licence_file in "$lib_distinfo"/licenses/*; do
+ licence_filename=$(basename "$licence_file")
+ cp "${licence_file}" "bundle/${lib}_${licence_filename}"
+ done
+done
+
+rm -rf bundle/yt_dlp_plugins/*.dist-info bundle/*.dist-info bundle/yt_dlp_plugins/bin
+
+mkdir -p dist/
+(cd bundle/ && zip -9 --recurse-paths ../dist/yt_dlp_rajiko-"${version}".bundle.zip .)
diff --git a/contrib/generate_html.py b/contrib/generate_html.py
new file mode 100755
index 0000000..10f4335
--- /dev/null
+++ b/contrib/generate_html.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+import os
+import hashlib
+import re
+
+pip_index = open("index.html", "w")
+
+pip_index.write("""<!DOCTYPE HTML>
+<html lang="en-GB">
+<head>
+ <title>yt-dlp-rajiko pip index</title>
+ <link rel="canonical" href="https://427738.xyz/yt-dlp-rajiko/pip/yt-dlp-rajiko/">
+</head>
+<body>
+
+<ul>
+""")
+
+site_sha256 = []
+
+tarballs = []
+wheels = []
+bundles = []
+
+def sortkey(item):
+ if os.path.islink(item):
+ return [0]
+ ver = re.search(r"\d+\.\d+", item)
+ if not ver:
+ return [0]
+
+ ver=ver.group()
+ return list(map(int, ver.split(".")))
+
+
+for item in sorted(os.listdir(), key=sortkey):
+ if os.path.islink(item):
+ continue
+
+ if item.endswith(".tar.gz"):
+ tarballs.append(item)
+ elif item.endswith(".whl"):
+ wheels.append(item)
+ elif item.endswith(".bundle.zip"):
+ bundles.append(item)
+ else:
+ continue
+
+ pip_index.write("\t<li>")
+ pip_index.write('<a href="')
+ pip_index.write(item)
+
+ with open(item, "rb") as f:
+ checksum = hashlib.sha256(f.read()).hexdigest()
+
+ pip_index.write("#sha256=")
+ pip_index.write(checksum)
+ pip_index.write('">')
+ pip_index.write(item)
+ pip_index.write("</a>\n")
+
+ site_string = checksum + " " + '<a href="dl/' + item + '">' + item + "</a>"
+ site_sha256.append(site_string)
+
+pip_index.write("""</ul>
+
+</body>
+</html>""")
+
+latest_tarball = tarballs[-1]
+latest_wheel = wheels[-1]
+latest_bundle = bundles[-1]
+print(latest_tarball, latest_wheel, latest_bundle)
+
+os.remove("yt_dlp_rajiko-latest.tar.gz")
+os.symlink(latest_tarball, "yt_dlp_rajiko-latest.tar.gz")
+
+os.remove("yt_dlp_rajiko-latest.whl")
+os.symlink(latest_wheel, "yt_dlp_rajiko-latest.whl")
+
+os.remove("yt_dlp_rajiko-latest.bundle.zip")
+os.symlink(latest_bundle, "yt_dlp_rajiko-latest.bundle.zip")
+
+site_sha256.reverse()
+
+latest_list = site_sha256[:3]
+previous_list = site_sha256[3:]
+
+latest = "\n".join(["<!-- LATEST SHA256 START -->", "<pre>", "\n".join(latest_list), "</pre>", "<!-- LATEST SHA256 END -->"])
+
+previous = "\n".join(["<!-- PREVIOUS SHA256 START -->", "<code>", "\n".join(previous_list), "</code>", "<!-- PREVIOUS SHA256 END -->"])
+
+for i in ["../../index.html", "../../index.ja.html"]:
+ with open(i, "r+") as f:
+ page = f.read()
+
+ page = re.sub(r"<!-- LATEST SHA256 START -->.+<!-- LATEST SHA256 END -->", latest, page, flags=re.DOTALL)
+ page = re.sub(r"<!-- PREVIOUS SHA256 START -->.+<!-- PREVIOUS SHA256 END -->", previous, page, flags=re.DOTALL)
+
+ f.seek(0)
+ f.truncate(0)
+ f.write(page)
diff --git a/contrib/how to do a release b/contrib/how to do a release
new file mode 100644
index 0000000..ba27910
--- /dev/null
+++ b/contrib/how to do a release
@@ -0,0 +1,46 @@
+putting this here because i'll forget how to do it otherwise
+
+update the pyproject.toml
+tag it in git, eg v1.0
+
+## build the builds
+
+WHEEL + SOURCE TARBALL
+python3 -m build
+
+ZIP BUNDLE
+contrib/bundle.sh
+
+and then put ALL items from `dist` into the pip index dir - ~/site2/yt-dlp-rajiko/pip/yt-dlp-rajiko/
+because without the .whl pip has to "build" it itself, with all the stuff that needs to be installed for that to work
+
+run script to update the pip index html and the dl/ "latest" symlinks
+this also updates the sha256 blocks on the site
+
+## update the changelog file
+
+write in html, paste into the feed xml like <![CDATA[
+stuff
+]]>
+make sure to set the link, date
+to get date use:
+git log --pretty --date=rfc2822
+
+include the pip instructions, sha256sum etc
+
+now push to the server
+
+NOW UPLOAD TO PYPI AS WELL
+
+go to dl/ dir and do like
+twine upload yt_dlp_rajiko-1.x-py3-none-any.whl yt_dlp_rajiko-1.x.tar.gz
+
+
+## update github
+
+paste the changelog output into a github release, upload the new builds
+change link at the bottom to just "below"
+
+post in the radiko thread on 5ch if i can be bothered
+
+and thats probably all
diff --git a/misc/generate_changelog.py b/contrib/old_generate_changelog.py
index 1bce073..1bce073 100755
--- a/misc/generate_changelog.py
+++ b/contrib/old_generate_changelog.py
diff --git a/contrib/protostuff.py b/contrib/protostuff.py
new file mode 100755
index 0000000..7ef0e95
--- /dev/null
+++ b/contrib/protostuff.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+
+import protobug
+import base64
+import struct
+
+import random
+import requests
+
+@protobug.message
+class SignInRequest:
+ lsid: protobug.String = protobug.field(2)
+ area: protobug.String = protobug.field(3)
+
+@protobug.message
+class SignInResponse:
+ jwt: protobug.String = protobug.field(1)
+
+
+
+@protobug.message
+class ListPodcastEpisodesRequest:
+ channel_id: protobug.String = protobug.field(1)
+ dontknow: protobug.Int32 = protobug.field(2)
+ page_length: protobug.Int32 = protobug.field(4)
+ cursor: protobug.String = protobug.field(5, default=None)
+
+def add_grpc_header(protobuf_data):
+ compression_flag = 0
+ message_length = len(protobuf_data)
+ header = struct.pack('>BI', compression_flag, message_length)
+ return header + protobuf_data
+
+def strip_grpc_response(response):
+ return response[5:].rpartition(b"grpc-status:")[0]
+
+print("SIGNUP")
+# why do they have to make it so bloody complicated
+
+lsid = ''.join(random.choices('0123456789abcdef', k=32))
+big_funny = ("\n " + lsid).encode()
+
+signup = requests.post("https://api.annex.radiko.jp/radiko.UserService/SignUp", headers={
+ 'Origin': 'https://radiko.jp',
+ 'Content-Type': 'application/grpc-web+proto',
+ 'X-User-Agent': 'grpc-web-javascript/0.1',
+ 'X-Grpc-Web': '1',
+ }, data=( add_grpc_header(big_funny)),
+)
+
+print(signup.content)
+
+# youre meant to only do the sign up ^ once and then keep your id for later
+# so that you can V sign in and get the token for the API to work
+
+print("SIGNIN")
+
+si=add_grpc_header(protobug.dumps(SignInRequest(
+ lsid=lsid,
+ area="JP13",
+)))
+
+print(si)
+print(base64.b64encode(si))
+
+signin = requests.post("https://api.annex.radiko.jp/radiko.UserService/SignIn", headers={
+ 'Origin': 'https://radiko.jp',
+ 'Content-Type': 'application/grpc-web+proto',
+ 'X-User-Agent': 'grpc-web-javascript/0.1',
+ 'X-Grpc-Web': '1',
+}, data=si)
+
+print(signin.content)
+
+signin_result = protobug.loads(strip_grpc_response(signin.content), SignInResponse)
+
+
+headers = {
+ 'Origin': 'https://radiko.jp',
+ 'Authorization': f'Bearer {signin_result.jwt}',
+ 'x-annex-proto-version': '1.0.0',
+ 'Content-Type': 'application/grpc-web+proto',
+ 'X-User-Agent': 'grpc-web-javascript/0.1',
+ 'X-Grpc-Web': '1',
+}
+
+response = requests.post('https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes', headers=headers,
+ data=add_grpc_header(protobug.dumps(ListPodcastEpisodesRequest(
+ channel_id="0ce1d2d7-5e07-4ec5-901a-d0eacdacc332",
+ dontknow=1,
+ page_length=200, # site uses 20
+# cursor="ef693874-0ad2-48cc-8c52-ac4de31cbf54" # here you put the id of the last episode you've seen in the list
+ )))
+)
+
+print(response)
+
+episodes = strip_grpc_response(response.content)
+
+
+with open("ListPodcastEpisodes.bin", "wb") as f:
+ f.write(episodes)
+
+
+@protobug.message
+class Audio:
+ revision: protobug.Int32 = protobug.field(1)
+ url: protobug.String = protobug.field(2)
+ fileSize: protobug.Int64 = protobug.field(3)
+ durationSec: protobug.Int64 = protobug.field(4)
+ transcoded: protobug.Bool = protobug.field(5)
+
+@protobug.message
+class EpisodeStartAt:
+ seconds: protobug.UInt64 = protobug.field(1)
+ nanos: protobug.UInt64 = protobug.field(2, default=0)
+
+
+@protobug.message
+class PodcastEpisode:
+ id: protobug.String = protobug.field(1)
+ workspaceId: protobug.String = protobug.field(2)
+ channelId: protobug.String = protobug.field(3)
+ title: protobug.String = protobug.field(4)
+ description: protobug.String = protobug.field(5)
+
+ audio: Audio = protobug.field(8)
+ channelImageUrl: protobug.String = protobug.field(16)
+ channelTitle: protobug.String = protobug.field(17)
+ channelStationName: protobug.String = protobug.field(18)
+ channelAuthor: protobug.String = protobug.field(19)
+
+ channelThumbnailImageUrl: protobug.String = protobug.field(21)
+ channelStationType: protobug.UInt32 = protobug.field(22)
+ startAt: EpisodeStartAt = protobug.field(27)
+ isEnabled: protobug.Bool = protobug.field(29)
+ hasTranscription: protobug.Bool = protobug.field(32)
+
+ imageUrl: protobug.String = protobug.field(7, default=None)
+ thumbnailImageUrl: protobug.String = protobug.field(20, default=None)
+
+@protobug.message
+class ListPodcastEpisodesResponse:
+ episodes: list[PodcastEpisode] = protobug.field(1)
+ hasNextPage: protobug.Bool = protobug.field(2, default=False)
+
+
+episodes_response = protobug.loads(episodes, ListPodcastEpisodesResponse)
+
+print(episodes_response)
+
+for e in episodes_response.episodes:
+ print(e.title, e.id)
+print(episodes_response.hasNextPage)
diff --git a/misc/randominfo.py b/contrib/randominfo.py
index bdb7660..bdb7660 100755
--- a/misc/randominfo.py
+++ b/contrib/randominfo.py
diff --git a/contrib/streammon.py b/contrib/streammon.py
new file mode 100755
index 0000000..8f52bb4
--- /dev/null
+++ b/contrib/streammon.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+# monitor stream APIs for any changes, so I can check they don't break anything
+# run via cronjob every now and then
+
+import difflib
+import os
+import sys
+import xml.etree.ElementTree as ET
+from datetime import datetime
+
+import requests
+
+s = requests.Session()
+
+DISCORD_WEBHOOK = "PUT WEBHOOK HERE"
+STREAMS_API = "https://radiko.jp/v3/station/stream/{device}/{station}.xml"
+
+if len(sys.argv) > 1:
+ PATH = sys.argv[1]
+else:
+ PATH = ""
+
+devices = ('pc_html5', 'aSmartPhone7a', 'aSmartPhone8')
+stations = ('FMT', 'CCL', 'NORTHWAVE', 'TBS')
+
+def format_xml(txt):
+ root = ET.fromstring(txt)
+ res = ""
+ for el in root.findall("url"):
+ res += el.find("playlist_create_url").text
+ for k, v in el.attrib.items():
+ res += f" {k}:{v}"
+
+ res += "\n"
+ return res
+
+for device in devices:
+ for station in stations:
+ url = STREAMS_API.format(device=device, station=station)
+ now_response = s.get(url)
+ now = now_response.text
+ now_modified = now_response.headers["last-modified"]
+ now_datetime = datetime.strptime(now_modified, "%a, %d %b %Y %H:%M:%S %Z")
+
+
+ filename = f"{PATH}{station}-{device}.xml"
+ with open(filename, "a+") as f:
+ f.seek(0)
+ past = f.read()
+
+ modtime = datetime.fromtimestamp(os.path.getmtime(filename))
+ diff = difflib.unified_diff(
+ format_xml(past).splitlines(), format_xml(now).splitlines(),
+ fromfile=url, tofile=url,
+ fromfiledate=str(modtime), tofiledate=str(now_datetime.now()),
+ )
+
+ diff_str = "\n".join(diff)
+ if diff_str != "":
+ f.truncate(0)
+ f.write(now)
+
+ s.post(DISCORD_WEBHOOK, json={
+ "content": f"**Streams changed: {station} {device}**\n" + "\n".join(("```diff", diff_str, "```")),
+ })
+ os.utime(filename, (now_datetime.timestamp(), now_datetime.timestamp()))
diff --git a/misc/test-tokens.py b/contrib/test_areas.py
index ba6475f..ba6475f 100755
--- a/misc/test-tokens.py
+++ b/contrib/test_areas.py
diff --git a/contrib/test_extractors.py b/contrib/test_extractors.py
new file mode 100755
index 0000000..0b505b8
--- /dev/null
+++ b/contrib/test_extractors.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+
+# programmes expire, so i have to update the times in the tests every time i run them
+# but thats a massive ballache, so i end up just not running them, which leads to cockups
+# so, this script has the tests automatically use the latest episode as you run it, by setting dynamically generated time values
+# everything else is always the same so it should be fine lol
+
+
+import datetime
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.insert(0, "/home/g/Downloads/yt-dlp/") # TODO: un-hardcode. has to be the source/git repo because pip doesnt carry the tests
+
+from yt_dlp_plugins.extractor import radiko_time as rtime
+
+MON, TUE, WED, THU, FRI, SAT, SUN = range(7)
+weekdays = {0: "MON", 1: "TUE", 2: "WED", 3: "THU", 4: "FRI", 5: "SAT", 6: "SUN"}
+
+now = rtime.RadikoTime.now(tz = rtime.JST)
+UTC = datetime.timezone.utc
+
+def get_latest_airtimes(now, weekday, hour, minute, duration):
+ days_after_weekday = (7 - (now.weekday() - weekday)) % 7
+ latest_airdate = (now + datetime.timedelta(days=days_after_weekday)).replace(hour=hour, minute=minute, second=0, microsecond=0)
+ if (latest_airdate + duration) > now:
+ latest_airdate -= datetime.timedelta(days=7)
+ return latest_airdate, latest_airdate + duration
+
+def get_test_timefields(airtime, release_time):
+ return {
+ "timestamp": airtime.timestamp(),
+ "release_timestamp": release_time.timestamp(),
+ "upload_date": airtime.astimezone(UTC).strftime("%Y%m%d"),
+ "release_date": release_time.astimezone(UTC).strftime("%Y%m%d"),
+
+ "duration": (release_time - airtime).total_seconds(),
+ }
+
+
+
+
+from yt_dlp_plugins.extractor.radiko import (
+ RadikoTimeFreeIE, RadikoShareIE,
+ RadikoLiveIE, RadikoPersonIE, RadikoStationButtonIE,
+ RadikoSearchIE, RadikoRSeasonsIE
+)
+
+from yt_dlp_plugins.extractor.radiko_podcast import (
+ RadikoPodcastEpisodeIE, RadikoPodcastChannelIE, RadikoPodcastSearchIE,
+)
+RadikoTimeFreeIE._TESTS = []
+
+
+
+# TOKYO MOON - interfm - EVERY FRI 2300
+airtime, release_time = get_latest_airtimes(now, FRI, 23, 0, datetime.timedelta(hours=1))
+RadikoTimeFreeIE._TESTS.append({
+ "url": f"https://radiko.jp/#!/ts/INT/{airtime.timestring()}",
+ "info_dict": {
+ "ext": "m4a",
+ "id": f"INT-{airtime.timestring()}",
+
+ **get_test_timefields(airtime, release_time),
+
+ 'title': 'TOKYO MOON',
+ 'description': r're:[\S\s]+Xハッシュタグは「#tokyomoon」$',
+ 'uploader': 'interfm',
+ 'uploader_id': 'INT',
+ 'uploader_url': 'https://www.interfm.co.jp/',
+ 'channel': 'interfm',
+ 'channel_id': 'INT',
+ 'channel_url': 'https://www.interfm.co.jp/',
+ 'thumbnail': 'https://program-static.cf.radiko.jp/ehwtw6mcvy.jpg',
+ 'chapters': list,
+ 'tags': ['松浦俊夫', 'ジャズの魅力を楽しめる'],
+ 'cast': ['松浦\u3000俊夫'],
+ 'series': 'Tokyo Moon',
+ 'live_status': 'was_live',
+ }
+})
+
+
+# late-night/v. early morning show, to test broadcast day handling
+# this should be monday 27:00 / tuesday 03:00
+airtime, release_time = get_latest_airtimes(now, TUE, 3, 0, datetime.timedelta(hours=2))
+RadikoTimeFreeIE._TESTS.append({
+ "url": f"https://radiko.jp/#!/ts/TBS/{airtime.timestring()}",
+ "info_dict": {
+ "ext": "m4a",
+ "id": f"TBS-{airtime.timestring()}",
+
+ **get_test_timefields(airtime, release_time),
+ 'title': 'CITY CHILL CLUB',
+ 'description': r"re:^目を閉じて…リラックスして[\S\s]+chill@tbs.co.jp$",
+ 'uploader': 'TBSラジオ',
+ 'uploader_id': 'TBS',
+ 'uploader_url': 'https://www.tbsradio.jp/',
+ 'channel': 'TBSラジオ',
+ 'channel_id': 'TBS',
+ 'channel_url': 'https://www.tbsradio.jp/',
+ 'thumbnail': 'https://program-static.cf.radiko.jp/nrf8fowbjo.jpg',
+ 'chapters': list,
+ 'tags': ['CCC905', '音楽との出会いが楽しめる', '人気アーティストトーク', '音楽プロデューサー出演', 'ドライブ中におすすめ', '寝る前におすすめ', '学生におすすめ'],
+ 'cast': list,
+ 'series': 'CITY CHILL CLUB',
+ 'live_status': 'was_live',
+ },
+})
+
+
+# testing 29-hour clock handling
+airtime, release_time = get_latest_airtimes(now, WED, 0, 0, datetime.timedelta(minutes=55))
+share_timestring = (airtime - datetime.timedelta(days=1)).strftime("%Y%m%d") + "240000"
+
+RadikoShareIE._TESTS = [{
+ "url": f"http://radiko.jp/share/?sid=FMT&t={share_timestring}",
+ "info_dict": {
+ "live_status": "was_live",
+ "ext": "m4a",
+ "id": f"FMT-{airtime.timestring()}",
+
+ **get_test_timefields(airtime, release_time),
+
+ "title": "JET STREAM",
+ "series": "JET STREAM",
+ "description": r"re:^JET STREAM・・・[\s\S]+https://www.tfm.co.jp/f/jetstream/message$",
+ "chapters": list,
+ "thumbnail": "https://program-static.cf.radiko.jp/greinlrspi.jpg",
+
+ "channel": "TOKYO FM",
+ "channel_id": "FMT",
+ "channel_url": "https://www.tfm.co.jp/",
+ "uploader": "TOKYO FM",
+ "uploader_id": "FMT",
+ "uploader_url": "https://www.tfm.co.jp/",
+
+ "cast": ["福山雅治"],
+ "tags": ["福山雅治", "夜間飛行", "音楽との出会いが楽しめる", "朗読を楽しめる", "寝る前に聴きたい"],
+ },
+ }]
+
+
+
+IEs = [
+ RadikoTimeFreeIE, RadikoShareIE,
+ RadikoLiveIE, RadikoPersonIE, RadikoStationButtonIE,
+ RadikoPodcastEpisodeIE, RadikoPodcastChannelIE,
+ RadikoSearchIE, RadikoPodcastSearchIE, RadikoRSeasonsIE,
+]
+
+import test.helper as th
+
+# override to only get testcases from our IEs
+
+def _new_gettestcases(include_onlymatching=False):
+ import yt_dlp.plugins as plugins
+ plugins.load_all_plugins()
+
+ for ie in IEs:
+ yield from ie.get_testcases(include_onlymatching)
+
+def _new_getwebpagetestcases():
+ import yt_dlp.plugins as plugins
+ plugins.load_all_plugins()
+
+ for ie in IEs:
+ for tc in ie.get_webpage_testcases():
+ tc.setdefault('add_ie', []).append('Generic')
+ yield tc
+
+th.gettestcases = _new_gettestcases
+th.getwebpagetestcases = _new_getwebpagetestcases
+
+import test.test_download as td
+
+class TestDownload(td.TestDownload):
+ pass
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/misc/how to do a release b/misc/how to do a release
deleted file mode 100644
index f5eb85f..0000000
--- a/misc/how to do a release
+++ /dev/null
@@ -1,47 +0,0 @@
-putting this here because i'll forget how to do it otherwise
-
-update the pyproject.toml
-tag it in git, eg v1.0
-
-## build the builds
-python3 -m build
-
-and then put BOTH items from `dist` into the pip index dir - ~/site2/yt-dlp-rajiko/pip/yt-dlp-rajiko/
-because without the .whl pip has to "build" it itself, with all the stuff that needs to be installed for that to work
-update the pip index html
-update the dl/ "latest" symlinks
-
-## update the changelog file
-
-~/site2/yt-dlp-rajiko/CHANGELOG
-
-```
-version number
-date (git log v1.0 --pretty --date=rfc2822)
-url: whl download link
-sha256: sha256 of the whl
-brief summary of the release
-can span multiple lines
-
-bullet points of changes, 1 per line
-simple present tense, third person singular - continue "this release...", eg..
-fixes a bug where the computer would explode
-makes downloading 5000x faster
-```
-
-./generate_changelog.py to make the new rss feed
-
-## update the website
-
-move the previous release into the "Previous releases" <details>
-update the sha256 (just sha256 command in the pip dir)
-update the whl link
-repeat for japanese version
-
-now push to the server
-
-## update github
-
-paste the changelog output into a github release, upload the new builds
-
-and thats probably all
diff --git a/pyproject.toml b/pyproject.toml
index a0bcc03..500e2c3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,18 +1,30 @@
[project]
name = "yt-dlp-rajiko"
-version = "1.1"
-description = "improved radiko.jp extractor for yt-dlp"
+version = "1.10"
+description = "improved radiko.jp extractor for yt-dlp (fast and areafree)"
+
+readme = "README.md"
+license = "0BSD"
+license-files = ["LICENCE"]
+
authors = [
{ name="garret1317" },
]
+
requires-python = ">=3.8"
classifiers = [
- "License :: OSI Approved :: Zero-Clause BSD (0BSD)",
"Environment :: Plugins",
]
+dependencies = [
+ "protobug"
+]
+
[project.urls]
Homepage = "https://427738.xyz/yt-dlp-rajiko/"
+"日本語" = "https://427738.xyz/yt-dlp-rajiko/index.ja.html"
+"Source Code" = "https://github.com/garret1317/yt-dlp-rajiko/"
+"Release Notes" = "https://427738.xyz/yt-dlp-rajiko/CHANGELOG.xml"
[build-system]
requires = ["setuptools>=61.0"]
diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py
index c6cea37..62a2489 100644
--- a/yt_dlp_plugins/extractor/radiko.py
+++ b/yt_dlp_plugins/extractor/radiko.py
@@ -6,18 +6,24 @@ import urllib.parse
import pkgutil
from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.networking.exceptions import HTTPError
from yt_dlp.utils import (
+ ExtractorError,
OnDemandPagedList,
clean_html,
int_or_none,
join_nonempty,
+ make_archive_id,
parse_qs,
traverse_obj,
+ urlencode_postdata,
url_or_none,
update_url_query,
)
+from yt_dlp_plugins.extractor.radiko_podcast import RadikoPodcastSearchIE
import yt_dlp_plugins.extractor.radiko_time as rtime
+import yt_dlp_plugins.extractor.radiko_hacks as hacks
class _RadikoBaseIE(InfoExtractor):
@@ -82,12 +88,14 @@ class _RadikoBaseIE(InfoExtractor):
_APP_VERSIONS = ["7.5.0", "7.4.17", "7.4.16", "7.4.15", "7.4.14", "7.4.13", "7.4.12", "7.4.11", "7.4.10", "7.4.9", "7.4.8", "7.4.7", "7.4.6", "7.4.5", "7.4.4", "7.4.3", "7.4.2", "7.4.1", "7.4.0", "7.3.8", "7.3.7", "7.3.6", "7.3.1", "7.3.0", "7.2.11", "7.2.10"]
_DELIVERED_ONDEMAND = ('radiko.jp',)
- _DOESNT_WORK_WITH_FFMPEG = ('tf-f-rpaa-radiko.smartstream.ne.jp', 'si-f-radiko.smartstream.ne.jp')
+ _AD_INSERTION = ('si-f-radiko.smartstream.ne.jp', )
+
+ _has_tf30 = None
def _index_regions(self):
region_data = {}
- tree = self._download_xml("https://radiko.jp/v3/station/region/full.xml", None, note="Indexing regions")
+ tree = self._download_xml("https://radiko.jp/v3/station/region/full.xml", None, note="Indexing station regions")
for stations in tree:
for station in stations:
area = station.find("area_id").text
@@ -178,16 +186,21 @@ class _RadikoBaseIE(InfoExtractor):
"X-Radiko-AuthToken": auth_token,
},
"user": auth2_headers["X-Radiko-User"],
+ "has_tf30": self._has_tf30,
}
if not region_mismatch:
self.cache.store("rajiko", station_region, auth_data)
return auth_data
- def _auth(self, station_region):
+ def _auth(self, station_region, need_tf30=False):
cachedata = self.cache.load("rajiko", station_region)
self.write_debug(cachedata)
if cachedata is not None:
+ if need_tf30 and not cachedata.get("has_tf30"):
+ self.write_debug("Cached token doesn't have timefree 30, getting a new one")
+ return self._negotiate_token(station_region)
+
auth_headers = cachedata.get("token")
response = self._download_webpage("https://radiko.jp/v2/api/auth_check", station_region, "Checking cached token",
headers=auth_headers, expected_status=401)
@@ -205,6 +218,17 @@ class _RadikoBaseIE(InfoExtractor):
station = region.find(f'.//station/id[.="{station_id}"]/..') # a <station> with an <id> of our station_id
station_name = station.find("name").text
station_url = url_or_none(station.find("href").text)
+
+ thumbnails = []
+ for logo in station.findall("logo"):
+ thumbnails.append({
+ "url": logo.text,
+ **traverse_obj(logo.attrib, ({
+ "width": ("width", {int_or_none}),
+ "height": ("height", {int_or_none}),
+ }))
+ })
+
meta = {
"id": station_id,
"title": station_name,
@@ -218,7 +242,7 @@ class _RadikoBaseIE(InfoExtractor):
"uploader_id": station_id,
"uploader_url": station_url,
- "thumbnail": url_or_none(station.find("banner").text),
+ "thumbnails": thumbnails,
}
self.cache.store("rajiko", station_id, {
"expiry": (now + datetime.timedelta(days=1)).timestamp(),
@@ -229,8 +253,10 @@ class _RadikoBaseIE(InfoExtractor):
self.to_screen(f"{station_id}: Using cached station metadata")
return cachedata.get("meta")
- def _get_station_formats(self, station, timefree, auth_data, start_at=None, end_at=None):
- device = self._configuration_arg('device', ['aSmartPhone7a'], casesense=True, ie_key="rajiko")[0] # aSmartPhone7a formats = always happy path
+ def _get_station_formats(self, station, timefree, auth_data, start_at=None, end_at=None, use_pc_html5=False):
+ config_device = traverse_obj(self._configuration_arg('device', casesense=True, ie_key="rajiko"), 0)
+ device = config_device or "pc_html5"
+
url_data = self._download_xml(f"https://radiko.jp/v3/station/stream/{device}/{station}.xml",
station, note=f"Downloading {device} stream information")
@@ -238,8 +264,10 @@ class _RadikoBaseIE(InfoExtractor):
formats = []
timefree_int = 1 if timefree else 0
+ do_as_live_chunks = not len(self._configuration_arg("no_as_live_chunks", ie_key="rajiko")) > 0
for element in url_data.findall(f".//url[@timefree='{timefree_int}'][@areafree='0']/playlist_create_url"):
# find <url>s with matching timefree and no areafree, then get their <playlist_create_url>
+ # we don't want areafree here because we should always be in-region
url = element.text
if url in seen_urls: # there are always dupes, even with ^ specific filtering
continue
@@ -249,7 +277,7 @@ class _RadikoBaseIE(InfoExtractor):
"station_id": station,
"l": "15", # l = length, ie how many seconds in the live m3u8 (max 300)
"lsid": auth_data["user"],
- "type": "b", # it is a mystery
+ "type": "b", # a/b = in-region, c = areafree
})
if timefree:
@@ -259,6 +287,7 @@ class _RadikoBaseIE(InfoExtractor):
"end_at": end_at.timestring(),
"to": end_at.timestring(),
+ "l": 300,
})
domain = urllib.parse.urlparse(playlist_url).netloc
@@ -267,20 +296,57 @@ class _RadikoBaseIE(InfoExtractor):
delivered_live = True
preference = -1
entry_protocol = 'm3u8'
+ format_note=[]
- if domain in self._DOESNT_WORK_WITH_FFMPEG:
- self.write_debug(f"skipping {domain} (known not working)")
- continue
if domain in self._DELIVERED_ONDEMAND:
# override the defaults for delivered as on-demand
delivered_live = False
- preference = 1
+ preference += 2
entry_protocol = None
+ if domain in self._AD_INSERTION:
+ preference -= 3
+ format_note.append("Ad insertion")
+
+
+ auth_headers = auth_data["token"]
+
+ m3u8_formats = self._extract_m3u8_formats(
+ playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_headers,
+ live=delivered_live, preference=preference, entry_protocol=entry_protocol,
+ note=f"Downloading m3u8 information from {domain}"
+ )
+
+ if delivered_live and timefree and do_as_live_chunks:
+
+ first_chunk = traverse_obj(m3u8_formats, (..., "url",), get_all=False)
+ # we have this so that we can still return a semi-useful `url` for use in mpv etc
+
+ def fragments_generator(_):
+ return hacks._generate_as_live_fragments(
+ self, playlist_url, start_at, end_at, domain, auth_headers, first_chunk
+ )
+
+ m3u8_formats = [{
+ "format_id": join_nonempty(domain, "chunked"),
+ "fragments": fragments_generator,
+ "protocol": "http_dash_segments_generator",
+ "preference": preference,
+ "ext": "m4a",
+ "vcodec": "none",
+
+ # fallback to live for ffmpeg etc
+ "url": first_chunk,
+ "http_headers": auth_headers,
+ "is_from_start": True,
+ }]
+ format_note.append("Chunked")
+
+ for f in m3u8_formats:
+ # ffmpeg sends a Range header which some streams reject. here we disable that (and also some icecast header as well)
+ f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']}
+ f['format_note'] = ", ".join(format_note)
+ formats.append(f)
- formats += self._extract_m3u8_formats(
- playlist_url, station, m3u8_id=domain, fatal=False, headers=auth_data["token"],
- live=delivered_live, preference=preference, entry_protocol=entry_protocol,
- note=f"Downloading m3u8 information from {domain}")
return formats
@@ -299,7 +365,7 @@ class RadikoLiveIE(_RadikoBaseIE):
"id": "FMT",
"title": "re:^TOKYO FM.+$",
"alt_title": "TOKYO FM",
- "thumbnail": "https://radiko.jp/res/banner/FMT/20220512162447.jpg",
+ "thumbnail": "https://radiko.jp/v2/static/station/logo/FMT/lrtrim/688x160.png",
"channel": "TOKYO FM",
"channel_id": "FMT",
@@ -319,7 +385,7 @@ class RadikoLiveIE(_RadikoBaseIE):
"id": "NORTHWAVE",
"title": "re:^FM NORTH WAVE.+$",
"alt_title": "FM NORTH WAVE",
- "thumbnail": "https://radiko.jp/res/banner/NORTHWAVE/20150731161543.png",
+ "thumbnail": "https://radiko.jp/v2/static/station/logo/NORTHWAVE/lrtrim/688x160.png",
"uploader": "FM NORTH WAVE",
"uploader_url": "https://www.fmnorth.co.jp/",
@@ -340,7 +406,7 @@ class RadikoLiveIE(_RadikoBaseIE):
"id": "RN1",
"title": "re:^ラジオNIKKEI第1.+$",
"alt_title": "RADIONIKKEI",
- "thumbnail": "https://radiko.jp/res/banner/RN1/20120802154152.png",
+ "thumbnail": "https://radiko.jp/v2/static/station/logo/RN1/lrtrim/688x160.png",
"channel": "ラジオNIKKEI第1",
"channel_url": "http://www.radionikkei.jp/",
@@ -357,7 +423,7 @@ class RadikoLiveIE(_RadikoBaseIE):
region = self._get_station_region(station)
station_meta = self._get_station_meta(region, station)
auth_data = self._auth(region)
- formats = self._get_station_formats(station, False, auth_data)
+ formats = self._get_station_formats(station, False, auth_data, use_pc_html5=True)
return {
"is_live": True,
@@ -368,71 +434,39 @@ class RadikoLiveIE(_RadikoBaseIE):
class RadikoTimeFreeIE(_RadikoBaseIE):
- _VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-_]+)/(?P<id>\d+)"
- _TESTS = [{
- "url": "https://radiko.jp/#!/ts/INT/20240809230000",
- "info_dict": {
- "live_status": "was_live",
- "ext": "m4a",
- "id": "INT-20240809230000",
-
- "title": "TOKYO MOON",
- "series": "Tokyo Moon",
- "description": "md5:20e68d2f400a391fa34d4e7c8c702cb8",
- "chapters": "count:14",
- "thumbnail": "https://program-static.cf.radiko.jp/ehwtw6mcvy.jpg",
-
- "upload_date": "20240809",
- "timestamp": 1723212000.0,
- "release_date": "20240809",
- "release_timestamp": 1723215600.0,
- "duration": 3600,
-
- "channel": "interfm",
- "channel_id": "INT",
- "channel_url": "https://www.interfm.co.jp/",
- "uploader": "interfm",
- "uploader_id": "INT",
- "uploader_url": "https://www.interfm.co.jp/",
-
- "cast": ["松浦\u3000俊夫"],
- "tags": ["松浦俊夫"],
- },
- }, {
- # late-night/early-morning show to test broadcast day checking
- "url": "https://radiko.jp/#!/ts/TBS/20240810033000",
- "info_dict": {
- "live_status": "was_live",
- "ext": "m4a",
- "id": "TBS-20240810033000",
-
- "title": "CITY CHILL CLUB",
- "series": "CITY CHILL CLUB",
- "description": "md5:3fba2c1125059bed27247c0be90e58fa",
- "chapters": "count:22",
- "thumbnail": "https://program-static.cf.radiko.jp/ku7t4ztnaq.jpg",
-
- "upload_date": "20240809",
- "timestamp": 1723228200.0,
- "release_date": "20240809",
- "release_timestamp": 1723233600.0,
- "duration": 5400,
-
- "channel": "TBSラジオ",
- "channel_url": "https://www.tbsradio.jp/",
- "channel_id": "TBS",
- "uploader": "TBSラジオ",
- "uploader_url": "https://www.tbsradio.jp/",
- "uploader_id": "TBS",
-
- "tags": ["CCC905", "音楽との出会いが楽しめる", "人気アーティストトーク", "音楽プロデューサー出演", "ドライブ中におすすめ", "寝る前におすすめ", "学生におすすめ"],
- "cast": ["PES"],
- },
- }]
+ _NETRC_MACHINE = "rajiko"
+ _VALID_URL = [
+ r"https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-_]+)/(?P<id>\d+)",
+ r"rdk://(?P<station>[A-Z0-9-_]+)-(?P<id>\d+)",
+ ]
+ # TESTS use a custom-ish script that updates the airdates automatically, see contrib/test_extractors.py
+
+ def _perform_login(self, username, password):
+ try:
+ login_info = self._download_json('https://radiko.jp/ap/member/webapi/member/login', None, note='Logging in',
+ data=urlencode_postdata({'mail': username, 'pass': password}))
+ self._has_tf30 = '2' in login_info.get('privileges')
+ # areafree = 1, timefree30 = 2, double plan = both
+ self.write_debug({**login_info, "radiko_session": "PRIVATE", "member_ukey": "PRIVATE"})
+ except ExtractorError as error:
+ if isinstance(error.cause, HTTPError) and error.cause.status == 401:
+ raise ExtractorError('Invalid username and/or password', expected=True)
+ raise
+
+ def _check_tf30(self):
+ if self._has_tf30 is not None:
+ return self._has_tf30
+ if self._get_cookies('https://radiko.jp').get('radiko_session') is None:
+ return
+ account_info = self._download_json('https://radiko.jp/ap/member/webapi/v2/member/login/check',
+ None, note='Checking account status from cookies', expected_status=400)
+ self.write_debug({**account_info, "user_key": "PRIVATE"})
+ self._has_tf30 = account_info.get('timefreeplus') == '1'
+ return self._has_tf30
def _get_programme_meta(self, station_id, url_time):
day = url_time.broadcast_day_string()
- meta = self._download_json(f"https://radiko.jp/v4/program/station/date/{day}/{station_id}.json", station_id,
+ meta = self._download_json(f"https://api.radiko.jp/program/v4/date/{day}/station/{station_id}.json", station_id,
note="Downloading programme data")
programmes = traverse_obj(meta, ("stations", lambda _, v: v["station_id"] == station_id,
"programs", "program"), get_all=False)
@@ -467,10 +501,12 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
"start_time_gte": start.isoformat(),
"end_time_lt": end.isoformat(),
})
- data = self._download_json(api_url, video_id, note="Downloading tracklist").get("data")
+ data_json = self._download_json(
+ api_url, video_id, note="Downloading tracklist", errnote="Downloading tracklist", fatal=False
+ )
chapters = []
- for track in data:
+ for track in traverse_obj(data_json, "data") or []:
artist = traverse_obj(track, ("artist", "name")) or track.get("artist_name")
chapters.append({
"title": join_nonempty(artist, track.get("title"), delim=" - "),
@@ -493,11 +529,11 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
end = times[1]
now = datetime.datetime.now(tz=rtime.JST)
expiry_free, expiry_tf30 = end.expiry()
- have_tf30 = False
if expiry_tf30 < now:
self.raise_no_formats("Programme is no longer available.", video_id=meta["id"], expected=True)
- elif not have_tf30 and expiry_free < now:
+ need_tf30 = expiry_free < now
+ if need_tf30 and not self._check_tf30():
self.raise_login_required("Programme is only available with a Timefree 30 subscription")
elif start > now:
self.raise_no_formats("Programme has not aired yet.", video_id=meta["id"], expected=True)
@@ -508,13 +544,19 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
region = self._get_station_region(station)
station_meta = self._get_station_meta(region, station)
- chapters = self._extract_chapters(station, start, end, video_id=meta["id"])
- auth_data = self._auth(region)
- formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end)
+ if live_status == "was_live":
+ chapters = self._extract_chapters(station, start, end, video_id=meta["id"])
+ auth_data = self._auth(region, need_tf30=need_tf30)
+ formats = self._get_station_formats(station, True, auth_data, start_at=start, end_at=end, use_pc_html5=need_tf30)
+ else:
+ chapters = None
+ formats = None
return {
**station_meta,
- "alt_title": None,
+ "alt_title": None, # override from station metadata
+ "thumbnails": None,
+
**meta,
"chapters": chapters,
"formats": formats,
@@ -524,7 +566,7 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
class RadikoSearchIE(InfoExtractor):
- _VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/search/(?:timeshift|live|history)\?"
+ _VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/search/(?:radio/)?(?:timeshift|live|history)\?"
_TESTS = [{
# timefree, specific area
"url": "https://radiko.jp/#!/search/live?key=city%20chill%20club&filter=past&start_day=&end_day=&region_id=&area_id=JP13&cul_area_id=JP13&page_idx=0",
@@ -549,26 +591,70 @@ class RadikoSearchIE(InfoExtractor):
"id": "ニュース-all-all",
"title": "ニュース"
},
+ 'expected_warnings': ['Skipping podcasts. If you really want EVERY EPISODE of EVERY RESULT, set your search filter to Podcasts only.'],
}]
def _strip_date(self, date):
+ # lazy way of making a timestring (from eg 2025-05-20 01:00:00)
return date.replace(" ", "").replace("-", "").replace(":", "")
def _pagefunc(self, url, idx):
url = update_url_query(url, {"page_idx": idx})
data = self._download_json(url, None, note=f"Downloading page {idx+1}")
- return [self.url_result("https://radiko.jp/#!/ts/{station}/{time}".format(
- station = i.get("station_id"), time = self._strip_date(i.get("start_time"))))
- for i in data.get("data")]
+ results = []
+ for r in data.get("data"):
+ station = r.get("station_id")
+ timestring = self._strip_date(r.get("start_time"))
+
+ results.append(
+ self.url_result(
+ f"https://radiko.jp/#!/ts/{station}/{timestring}",
+ id=join_nonempty(station, timestring),
+ ie=RadikoTimeFreeIE,
+ )
+ )
+ return results
def _real_extract(self, url):
- url = url.replace("/#!/", "/!/", 1)
# urllib.parse interprets the path as just one giant fragment because of the #, so we hack it away
+ url = url.replace("/#!/", "/!/", 1)
queries = parse_qs(url)
+ key = traverse_obj(queries, ("key", 0))
- search_url = update_url_query("https://radiko.jp/v3/api/program/search", {
+ # site used to use "cul_area_id" in the search url, now it uses "cur_area_id" (with an r)
+ # and outright rejects the old one with HTTP Error 415: Unsupported Media Type
+ if queries.get("cul_area_id"):
+ queries["cur_area_id"] = queries.pop("cul_area_id")
+
+ if queries.get("filter"):
+ filter_set = set(queries["filter"][0].split("|"))
+ del queries["filter"]
+ else:
+ filter_set = {"future", "past", "channel"}
+
+ if filter_set == {"channel"}:
+ podcast_search_url = update_url_query(
+ "https://radiko.jp/!/search/podcast/live", {"key": key}
+ ).replace("!", "#!", 1) # same shit with urllib.parse
+ return self.url_result(podcast_search_url, ie=RadikoPodcastSearchIE)
+
+ if "channel" in filter_set:
+ self.report_warning("Skipping podcasts. If you really want EVERY EPISODE of EVERY RESULT, set your search filter to Podcasts only.")
+ filter_set.discard("channel")
+
+ if filter_set == {"future", "past"}:
+ filter_str = ""
+ else:
+ filter_str = "|".join(filter_set) # there should be only one filter now, so this should be the same as filter_set[0]
+ # but if there's more than one, then we should at least try to pass it through as-is, in the hope that it works
+ if len(filter_set) != 1:
+ # but also kick up a stink about it so it's clear it probably won't
+ self.report_warning("Your search has an unknown combination of filters, so this request will probably fail!")
+
+ search_url = update_url_query("https://api.annex-cf.radiko.jp/v1/programs/legacy/perl/program/search", {
**queries,
+ "filter": filter_str,
"uid": "".join(random.choices("0123456789abcdef", k=32)),
"app_id": "pc",
"row_limit": 50, # higher row_limit = more results = less requests = more good
@@ -576,60 +662,32 @@ class RadikoSearchIE(InfoExtractor):
results = OnDemandPagedList(lambda idx: self._pagefunc(search_url, idx), 50)
- key = traverse_obj(queries, ("key", 0))
day = traverse_obj(queries, ("start_day", 0)) or "all"
region = traverse_obj(queries, ("region_id", 0)) or traverse_obj(queries, ("area_id", 0))
- status_filter = traverse_obj(queries, ("filter", 0)) or "all"
+ status_filter = filter_str or "all"
playlist_id = join_nonempty(key, status_filter, day, region)
return {
"_type": "playlist",
- "title": traverse_obj(queries, ("key", 0)),
+ "title": key,
"id": playlist_id,
"entries": results,
}
+
class RadikoShareIE(InfoExtractor):
_VALID_URL = r"https?://(?:www\.)?radiko\.jp/share/"
- _TESTS = [{
- # 29-hour time -> 24-hour time
- "url": "http://radiko.jp/share/?sid=FMT&t=20240802240000",
- "info_dict": {
- "live_status": "was_live",
- "ext": "m4a",
- "id": "FMT-20240803000000", # the time given (24:00) works out to 00:00 the next day
-
- "title": "JET STREAM",
- "series": "JET STREAM",
- "description": "md5:c1a2172036ebb7a54eeafb47e0a08a50",
- "chapters": "count:9",
- "thumbnail": "https://program-static.cf.radiko.jp/greinlrspi.jpg",
-
- "upload_date": "20240802",
- "timestamp": 1722610800.0,
- "release_date": "20240802",
- "release_timestamp": 1722614100.0,
- "duration": 3300,
-
- "channel": "TOKYO FM",
- "channel_id": "FMT",
- "channel_url": "https://www.tfm.co.jp/",
- "uploader": "TOKYO FM",
- "uploader_id": "FMT",
- "uploader_url": "https://www.tfm.co.jp/",
-
- "cast": ["福山雅治"],
- "tags": ["福山雅治", "夜間飛行", "音楽との出会いが楽しめる", "朗読を楽しめる", "寝る前に聴きたい"],
- }
- }]
def _real_extract(self, url):
queries = parse_qs(url)
station = traverse_obj(queries, ("sid", 0))
time = traverse_obj(queries, ("t", 0))
time = rtime.RadikoShareTime(time).timestring()
- return self.url_result(f"https://radiko.jp/#!/ts/{station}/{time}", RadikoTimeFreeIE)
+ return self.url_result(
+ f"https://radiko.jp/#!/ts/{station}/{time}", RadikoTimeFreeIE,
+ id=join_nonempty(station, time)
+ )
class RadikoStationButtonIE(InfoExtractor):
@@ -642,19 +700,9 @@ class RadikoStationButtonIE(InfoExtractor):
"info_dict": {
"ext": "m4a",
'live_status': 'is_live',
-
"id": "QRR",
- "title": "re:^文化放送.+$",
- 'alt_title': 'JOQR BUNKA HOSO',
- 'thumbnail': 'https://radiko.jp/res/banner/QRR/20240423144553.png',
- 'channel': '文化放送',
- 'channel_id': 'QRR',
- 'channel_url': 'http://www.joqr.co.jp/',
- 'uploader': '文化放送',
- 'uploader_id': 'QRR',
- 'uploader_url': 'http://www.joqr.co.jp/',
-
- }
+ },
+ 'only_matching': True,
}]
_WEBPAGE_TESTS = [{
@@ -665,7 +713,7 @@ class RadikoStationButtonIE(InfoExtractor):
'id': 'CCL',
"title": "re:^FM COCOLO.+$",
'alt_title': 'FM COCOLO',
- 'thumbnail': 'https://radiko.jp/res/banner/CCL/20161014144826.png',
+ 'thumbnail': 'https://radiko.jp/v2/static/station/logo/CCL/lrtrim/688x160.png',
'channel': 'FM COCOLO',
'channel_id': 'CCL',
@@ -683,56 +731,88 @@ class RadikoStationButtonIE(InfoExtractor):
return self.url_result(f"https://radiko.jp/#!/live/{station}", RadikoLiveIE)
-class RadikoPersonIE(InfoExtractor):
+class _RadikoMobileWebBaseIE(InfoExtractor):
+
+ def _download_pageprops(self, url, video_id):
+ html = self._download_webpage(url, video_id)
+ return self._search_nextjs_data(html, video_id)["props"]["pageProps"]
+
+ def _programs_entries(self, Programs):
+ for episode in Programs:
+ station = traverse_obj(episode, ("stationId"))
+ start = traverse_obj(episode, ("startAt", "seconds"))
+ timestring = rtime.RadikoTime.fromtimestamp(start, tz=rtime.JST).timestring()
+
+ timefree_id = join_nonempty(station, timestring)
+ timefree_url = f"https://radiko.jp/#!/ts/{station}/{timestring}"
+
+ yield self.url_result(timefree_url, ie=RadikoTimeFreeIE, video_id=timefree_id)
+
+
+class RadikoPersonIE(_RadikoMobileWebBaseIE):
_VALID_URL = r"https?://(?:www\.)?radiko\.jp/persons/(?P<id>\d+)"
_TESTS = [{
"url": "https://radiko.jp/persons/11421",
- "playlist_mincount": 10,
+ "playlist_mincount": 1,
"info_dict": {
- "id": "person-11421",
- },
- },{
- "url": "https://radiko.jp/persons/11421",
- "params": {'extractor_args': {'rajiko': {'key_station_only': ['']}}},
- "playlist_count": 1,
- "info_dict": {
- "id": "person-11421",
+ "id": "11421",
+ 'title': '森山良子',
+ 'description': 'md5:bbf061fc22c6a740927cfa7186d984d2',
+ 'thumbnail': 'https://ac-static.cf.radiko.jp/509_resized_logo_L.jpg',
+ '_old_archive_ids': ['radikoperson person-11421'],
},
}]
def _real_extract(self, url):
person_id = self._match_id(url)
- now = rtime.RadikoTime.now(tz=rtime.JST)
+ person_info = self._download_pageprops(url, person_id)["data"]
+ person_id = traverse_obj(person_info, "id") or person_id
- min_start = (now - datetime.timedelta(days=30)).broadcast_day_start()
- # we set the earliest time as the earliest we can get (or at least, that it's possible to get),
- # so, the start of the broadcast day 30 days ago
- # that way we can get everything we can actually download, including stuff that aired at eg "26:00"
-
- person_api_url = update_url_query("https://api.radiko.jp/program/api/v1/programs", {
- "person_id": person_id,
- "start_at_gte": min_start.isoformat(),
- "start_at_lt": now.isoformat(),
- })
- person_api = self._download_json(person_api_url, person_id)
+ return self.playlist_result(
+ self._programs_entries(person_info.get("pastPrograms")),
+ playlist_id=person_id,
+ **traverse_obj(person_info, {
+ "playlist_title": "name",
+ "thumbnail": "imageUrl",
+ "description": "description",
+ }),
+ _old_archive_ids=[make_archive_id(self, join_nonempty("person", person_id))]
+ )
- def entries():
- key_station_only = len(self._configuration_arg("key_station_only", ie_key="rajiko")) > 0
- for episode in person_api.get("data"):
- if key_station_only and episode.get("key_station_id") != episode.get("station_id"):
- continue
- share_url = traverse_obj(episode, ("radiko_url", ("pc", "sp", "android", "ios", "app"),
- {url_or_none}), get_all=False)
- # they're all identical share links at the moment (5th aug 2024) but they might not be in the future
-
- # predictions:
- # pc will probably stay the same
- # don't know what sp is, possibly "SmartPhone"?, anyway seems reasonably generic
- # android is easier for me to reverse-engineer than ios (no ithing)
- # i assume "app" would be some internal tell-it-to-do-something link, not a regular web link
-
- yield self.url_result(share_url, ie=RadikoShareIE, video_title=episode.get("title"))
+class RadikoRSeasonsIE(_RadikoMobileWebBaseIE):
+ _VALID_URL = r"https?://(?:www\.)?radiko\.jp/(?:mobile/)?r_seasons/(?P<id>\d+$)"
+ _TESTS = [{
+ "url": "https://radiko.jp/r_seasons/10012302",
+ "playlist_mincount": 4,
+ "info_dict": {
+ "id": '10012302',
+ "title": '山下達郎の楽天カード サンデー・ソングブック',
+ 'thumbnail': 'https://program-static.cf.radiko.jp/935a87fc-4a52-48e5-9468-7b2ef9448d9f.jpeg',
+ }
+ }, {
+ "url": "https://radiko.jp/r_seasons/10002831",
+ "playlist_mincount": 4,
+ "info_dict": {
+ "id": "10002831",
+ "title": "Tokyo Moon",
+ 'description': 'md5:3eef525003bbe96ccf33ec647c43d904',
+ 'thumbnail': 'https://program-static.cf.radiko.jp/0368ee85-5d5f-41c9-8ee1-6c1035d87b3f.jpeg',
+ }
+ }]
- return self.playlist_result(entries(), playlist_id=join_nonempty("person", person_id))
+ def _real_extract(self, url):
+ season_id = self._match_id(url)
+ pageProps = self._download_pageprops(url, season_id)
+ season_id = traverse_obj(pageProps, ("rSeason", "id")) or season_id
+
+ return self.playlist_result(
+ self._programs_entries(pageProps.get("pastPrograms")),
+ playlist_id=season_id,
+ **traverse_obj(pageProps, ("rSeason", {
+ "playlist_title": "rSeasonName",
+ "thumbnail": "backgroundImageUrl",
+ "description": ("summary", filter),
+ })),
+ )
diff --git a/yt_dlp_plugins/extractor/radiko_dependencies.py b/yt_dlp_plugins/extractor/radiko_dependencies.py
new file mode 100644
index 0000000..769a5e3
--- /dev/null
+++ b/yt_dlp_plugins/extractor/radiko_dependencies.py
@@ -0,0 +1,29 @@
+# Bundle importing code Copyright (c) 2021-2022 Grub4K, from yt-dont-lock-p.
+# https://github.com/Grub4K/yt-dont-lock-p/blob/ff3b6e1d42ce8584153ae27544d2c05b50ab5954/yt_dlp_plugins/postprocessor/yt_dont_lock_p/__init__.py#L23-L46
+# Used under 0BSD with permission
+
+# https://discord.com/channels/807245652072857610/1112613156934668338/1416816007732920430 (yt-dlp discord server, https://discord.gg/H5MNcFW63r )
+# [17:00] garret1317: @Grub4K can i pinch your MIT-licensed dependency bundling code to use in my 0BSD-licensed plugin?
+# I will credit of course but i can't require that anyone else does the same
+# (Any response to this message will be considered a written consent or refusal of the request)
+# [17:04] Grub4K: Feel free to use that part under 0BSD
+# [17:05] garret1317: 👍 cheers
+
+try:
+ import protobug
+except ImportError:
+ import sys
+ from pathlib import Path
+
+ # Try importing from zip file bundle
+ search_path = str(Path(__file__).parent.parent)
+ sys.path.append(search_path)
+ try:
+ import protobug
+ except ImportError:
+ protobug = None
+ except Exception:
+ protobug = None
+
+ finally:
+ sys.path.remove(search_path)
diff --git a/yt_dlp_plugins/extractor/radiko_hacks.py b/yt_dlp_plugins/extractor/radiko_hacks.py
new file mode 100644
index 0000000..1d2d1df
--- /dev/null
+++ b/yt_dlp_plugins/extractor/radiko_hacks.py
@@ -0,0 +1,90 @@
+import datetime
+import re
+
+from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.utils import (
+ join_nonempty,
+ update_url_query,
+ traverse_obj,
+)
+
+# "hacks" as in great jank/schizo shit that works anyway
+
+def _generate_as_live_fragments(self, playlist_base_url, start_at, end_at, domain, headers={}, first_chunk=None):
+ playlist = []
+ chunk_length = 300 # max the api allows
+
+ duration = int(end_at.timestamp() - start_at.timestamp())
+ cursor = 0
+ chunk_num = 1
+ while cursor < duration:
+ chunk_length = min(chunk_length, duration - cursor)
+
+ chunk_start = start_at + datetime.timedelta(seconds=cursor)
+ chunk_url = update_url_query(playlist_base_url, {
+ "seek": chunk_start.timestring(),
+ "l": chunk_length,
+ })
+
+ chunk_fragments, real_chunk_length = _get_chunk_playlist(self, chunk_url, domain, chunk_num, headers, first_chunk)
+
+ cursor += round(real_chunk_length)
+ chunk_num += 1
+ first_chunk = None
+
+ for frag in chunk_fragments:
+ yield frag
+
+
+def _get_chunk_playlist(self, chunk_url, src_id, chunk_num, headers={}, first_chunk=None):
+ EXTINF_duration = re.compile(r"^#EXTINF:([\d.]+),", flags=re.MULTILINE)
+
+ playlist = ""
+ chunk_id = join_nonempty(src_id, chunk_num)
+
+ if first_chunk:
+ m3u8_url = first_chunk
+ else:
+ self.write_debug(f"Preparing {src_id} chunk {chunk_num}")
+ base_formats = self._extract_m3u8_formats(
+ chunk_url, chunk_id, fatal=False, headers=headers,
+ # note=f"Preparing {src_id} chunk {chunk_num}"
+ note=False,
+ errnote=f"Failed to get {src_id} chunk {chunk_num} base format",
+ )
+ m3u8_url = traverse_obj(base_formats, (..., "url",), get_all=False)
+
+ self.write_debug(f"Getting {src_id} chunk {chunk_num} playlist")
+ playlist = self._download_webpage(m3u8_url, chunk_id, note=False, errnote=f"Failed to get {src_id} chunk {chunk_num} playlist")
+ #note=f"Getting {src_id} chunk {chunk_num} fragments")
+
+ return _parse_hls(playlist)
+
+def _parse_hls(m3u8_doc):
+ fragments = []
+
+ # playlists can sometimes be longer than they should
+ # e.g. wowza stream does some strange things
+ # it goes along fine with every fragment 5s long as normal
+ # and then during the ad break it does one with a different length (eg 2s)
+ # i assume so they have a clean split to do ad insertion in? idk
+
+ # but anyway now the chunks aren't always a clean 5mins long
+ # and we get a repeated fragment going into the next chunk
+
+ # so to work around this, we track the real duration from the #EXTINF tags
+
+ playlist_duration = 0
+ fragment_duration = None
+ for line in m3u8_doc.splitlines():
+ if line.startswith("#EXTINF:"):
+ fragment_duration = float(line[len('#EXTINF:'):].split(',')[0]) # from common._parse_m3u8_vod_duration
+ continue
+ elif line.startswith("#"):
+ continue
+
+ fragments.append({"url": line, "duration": fragment_duration})
+ playlist_duration += fragment_duration or 0
+ fragment_duration = None
+
+ return fragments, playlist_duration
diff --git a/yt_dlp_plugins/extractor/radiko_podcast.py b/yt_dlp_plugins/extractor/radiko_podcast.py
new file mode 100644
index 0000000..27b91ad
--- /dev/null
+++ b/yt_dlp_plugins/extractor/radiko_podcast.py
@@ -0,0 +1,175 @@
+from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.utils import (
+ clean_html,
+ OnDemandPagedList,
+ parse_qs,
+ traverse_obj,
+ update_url_query,
+ url_or_none,
+ str_or_none,
+)
+
+import dataclasses
+import random
+
+from yt_dlp_plugins.extractor.radiko_dependencies import protobug
+if protobug:
+ import yt_dlp_plugins.extractor.radiko_protobufs as pb
+
+
+class _RadikoPodcastBaseIE(InfoExtractor):
+
+ def _extract_episode(self, episode_info):
+ return {
+ **traverse_obj(episode_info, {
+ "id": ("id", {str_or_none}),
+ "url": ("audio", "url"),
+ "duration": ("audio", "durationSec"),
+
+ "title": "title",
+ "description": ("description", {clean_html}),
+ "timestamp": ("startAt", "seconds"),
+
+ "series": "channelTitle",
+ "series_id": "channelId",
+ "channel": "channelStationName",
+ "uploader": "channelStationName",
+ }),
+ "thumbnail": traverse_obj(episode_info, ("imageUrl", {url_or_none}))
+ or traverse_obj(episode_info, ("channelImageUrl", {url_or_none})),
+
+ # so that --download-archive still works if you download from the playlist page
+ "webpage_url": "https://radiko.jp/podcast/episodes/{id}".format(id=traverse_obj(episode_info, "id")),
+ 'extractor_key': RadikoPodcastEpisodeIE.ie_key(),
+ 'extractor': 'RadikoPodcastEpisode',
+ }
+
+
+class RadikoPodcastEpisodeIE(_RadikoPodcastBaseIE):
+ _VALID_URL = r"https?://radiko\.jp/podcast/episodes/(?P<id>[a-f0-9-]+)"
+
+ _TESTS = [{
+ "url": "https://radiko.jp/podcast/episodes/cc8cf709-a50b-4846-aa0e-91ab10cf8bff",
+ "info_dict": {
+ "id": "cc8cf709-a50b-4846-aa0e-91ab10cf8bff",
+ "ext": "mp3",
+ 'title': '2025.6.26 おしゃべり技術くん',
+ 'description': 'md5:1c4048025f68d6da053dd879a5d62304',
+ 'duration': 717,
+ 'thumbnail': 'https://podcast-static.cf.radiko.jp/09f27a48-ae04-4ce7-a024-572460e46eb7-20240214160012.png',
+ 'series': 'おしゃべり技術くん',
+ 'series_id': '09f27a48-ae04-4ce7-a024-572460e46eb7',
+ 'timestamp': 1751554800,
+ 'upload_date': '20250703',
+ 'uploader': 'IBCラジオ',
+ 'channel': 'IBCラジオ',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ next_data = self._search_nextjs_data(webpage, video_id)["props"]["pageProps"]
+
+ episode_info = next_data["podcastEpisode"]
+
+ return self._extract_episode(episode_info)
+
+
+class RadikoPodcastChannelIE(_RadikoPodcastBaseIE):
+ _VALID_URL = r"https?://radiko\.jp/podcast/channels/(?P<id>[a-f0-9-]+)"
+
+ _TESTS = [{
+ "url": "https://radiko.jp/podcast/channels/09f27a48-ae04-4ce7-a024-572460e46eb7",
+ "info_dict": {
+ "id": "09f27a48-ae04-4ce7-a024-572460e46eb7"
+ },
+ 'playlist_mincount': 20,
+ 'expected_warnings': ['Currently this extractor can only extract the latest 20 episodes'],
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ webpage = self._download_webpage(url, channel_id)
+ next_data = self._search_nextjs_data(webpage, channel_id)["props"]["pageProps"]
+
+ channel_info = next_data["podcastChannel"]
+ episode_list_response = next_data["listPodcastEpisodesResponse"]
+
+
+ def entries():
+ has_next_page = episode_list_response.get("hasNextPage")
+ for episode in episode_list_response["episodesList"]:
+ cursor = episode.get("id")
+ yield self._extract_episode(episode)
+
+ if has_next_page:
+ if protobug:
+ userservice_token = pb.auth_userservice(self)
+ while has_next_page:
+ page = pb.get_podcast_episodes(self, channel_id, userservice_token, cursor)
+ has_next_page = page.hasNextPage
+ for episode in page.episodes:
+ cursor = episode.id
+ yield self._extract_episode(dataclasses.asdict(episode))
+ else:
+ self.report_warning(f'protobug is required to extract more than the latest {len(episode_list_response["episodesList"])} episodes.\nIf you installed yt-dlp-rajiko manually (with the .whl), use the .zip bundle instead. If you installed with pip, pip install protobug .')
+
+ return {
+ "_type": "playlist",
+ "id": channel_id,
+ **traverse_obj(channel_info, {
+ "playlist_title": "title",
+ "playlist_id": "id",
+ "playlist_description": ("description", {clean_html}),
+ "playlist_thumbnail": ("imageUrl", {url_or_none}),
+
+ }),
+ "entries": entries(),
+ }
+
+
+class RadikoPodcastSearchIE(InfoExtractor):
+ _VALID_URL = r"https?://(?:www\.)?radiko\.jp/#!/search/podcast/(?:timeshift|live)\?"
+ _TESTS = [{
+ "url": "https://radiko.jp/#!/search/podcast/live?key=ドラマ",
+ "playlist_mincount": 51,
+ "info_dict": {
+ "id": "ドラマ",
+ "title": "ドラマ",
+ },
+ }]
+
+ def _pagefunc(self, url, idx):
+ url = update_url_query(url, {"pageIdx": idx})
+ data = self._download_json(url, None, note=f"Downloading page {idx+1}")
+
+ results = []
+ for channel in data.get("channels"):
+ results.append(
+ self.url_result(
+ channel.get("channelUrl"),
+ id=channel.get("id"),
+ ie=RadikoPodcastChannelIE,
+ )
+ )
+ return results
+
+
+ def _real_extract(self, url):
+ # hack away the # so urllib.parse will work (same as normal RadikoSearchIE)
+ url = url.replace("/#!/", "/!/", 1)
+ queries = parse_qs(url)
+
+ keywords = traverse_obj(queries, ("key", 0))
+ search_url = update_url_query("https://api.annex-cf.radiko.jp/v1/podcasts/channels/search_with_keywords_by_offset", {
+ "keywords": keywords,
+ "uid": "".join(random.choices("0123456789abcdef", k=32)),
+ "limit": 50, # result limit. the actual limit before the api errors is 5000, but that seems a bit rude so i'll leave as 50 like the radio one
+ })
+
+ return self.playlist_result(
+ OnDemandPagedList(lambda idx: self._pagefunc(search_url, idx), 50),
+ title=keywords,
+ id=keywords, # i have to put some kind of id or the tests fail
+ )
diff --git a/yt_dlp_plugins/extractor/radiko_protobufs.py b/yt_dlp_plugins/extractor/radiko_protobufs.py
new file mode 100755
index 0000000..a8bbec1
--- /dev/null
+++ b/yt_dlp_plugins/extractor/radiko_protobufs.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+import struct
+import random
+
+from yt_dlp_plugins.extractor.radiko_dependencies import protobug
+
+if protobug: # i suppose it works lmao
+
+
+ def add_grpc_header(protobuf_data):
+ compression_flag = 0
+ message_length = len(protobuf_data)
+ header = struct.pack('>BI', compression_flag, message_length)
+ return header + protobuf_data
+
+ def strip_grpc_response(response):
+ return response[5:].rpartition(b"grpc-status:")[0]
+
+ def _download_grpc(self, url_or_request, video_id, response_message, note="Downloading GRPC information", *args, **kwargs):
+ urlh = self._request_webpage(url_or_request, video_id,
+ headers={
+ 'Content-Type': 'application/grpc-web+proto',
+ 'X-User-Agent': 'grpc-web-javascript/0.1',
+ 'X-Grpc-Web': '1',
+ **kwargs.pop('headers')
+ },
+ data=add_grpc_header(protobug.dumps(kwargs.pop('data'))), note=note,
+ *args, **kwargs,
+ )
+ response = urlh.read()
+
+ protobuf = strip_grpc_response(response)
+ if len(protobuf) > 0:
+ return protobug.loads(protobuf, response_message)
+
+
+ @protobug.message
+ class SignUpRequest:
+ lsid: protobug.String = protobug.field(1)
+
+ def sign_up(self):
+ lsid = ''.join(random.choices('0123456789abcdef', k=32))
+
+ signup = _download_grpc(self, "https://api.annex.radiko.jp/radiko.UserService/SignUp",
+ "UserService", None, note="Registering ID", headers={'Origin': 'https://radiko.jp'},
+ data=SignUpRequest(lsid=lsid),
+ )
+ # youre meant to only do the sign up ^ once and then keep your lsid for later
+ # so that you can sign in and get the token for the API to work
+ return lsid
+
+
+ @protobug.message
+ class SignInRequest:
+ lsid: protobug.String = protobug.field(2)
+ area: protobug.String = protobug.field(3)
+
+ @protobug.message
+ class SignInResponse:
+ jwt: protobug.String = protobug.field(1)
+
+
+ def sign_in(self, lsid):
+ sign_in = _download_grpc(self, "https://api.annex.radiko.jp/radiko.UserService/SignIn",
+ "UserService", SignInResponse, note="Getting auth token", headers={'Origin': 'https://radiko.jp'},
+ data=SignInRequest(lsid=lsid, area="JP13"),
+ )
+ return sign_in.jwt
+
+
+ def auth_userservice(self):
+ cachedata = self.cache.load("rajiko", "UserService")
+ if cachedata is not None:
+ lsid = cachedata.get("lsid")
+ else:
+ lsid = sign_up(self)
+ self.cache.store("rajiko", "UserService", {"lsid": lsid})
+ jwt = sign_in(self, lsid)
+ return jwt
+
+
+ @protobug.message
+ class ListPodcastEpisodesRequest:
+ channel_id: protobug.String = protobug.field(1)
+ sort_by_latest: protobug.Bool = protobug.field(2)
+ page_length: protobug.Int32 = protobug.field(4)
+ cursor: protobug.String = protobug.field(5, default=None)
+
+
+ @protobug.message
+ class Audio:
+ revision: protobug.Int32 = protobug.field(1)
+ url: protobug.String = protobug.field(2)
+ fileSize: protobug.Int64 = protobug.field(3)
+ durationSec: protobug.Int64 = protobug.field(4)
+ transcoded: protobug.Bool = protobug.field(5)
+
+ @protobug.message
+ class EpisodeStartAt:
+ seconds: protobug.UInt64 = protobug.field(1)
+ nanos: protobug.UInt64 = protobug.field(2, default=0)
+
+
+ @protobug.message
+ class PodcastEpisode:
+ id: protobug.String = protobug.field(1)
+ workspaceId: protobug.String = protobug.field(2)
+ channelId: protobug.String = protobug.field(3)
+ title: protobug.String = protobug.field(4)
+ description: protobug.String = protobug.field(5)
+
+ audio: Audio = protobug.field(8)
+ channelImageUrl: protobug.String = protobug.field(16)
+ channelTitle: protobug.String = protobug.field(17)
+ channelStationName: protobug.String = protobug.field(18)
+ channelAuthor: protobug.String = protobug.field(19)
+
+ channelThumbnailImageUrl: protobug.String = protobug.field(21)
+ channelStationType: protobug.UInt32 = protobug.field(22)
+ startAt: EpisodeStartAt = protobug.field(27)
+ isEnabled: protobug.Bool = protobug.field(29)
+ hasTranscription: protobug.Bool = protobug.field(32)
+
+ imageUrl: protobug.String = protobug.field(7, default=None)
+ thumbnailImageUrl: protobug.String = protobug.field(20, default=None)
+
+ @protobug.message
+ class ListPodcastEpisodesResponse:
+ episodes: list[PodcastEpisode] = protobug.field(1)
+ hasNextPage: protobug.Bool = protobug.field(2, default=False)
+
+
+ def get_podcast_episodes(self, channel_id, jwt, cursor, page_length=20):
+ # site uses 20 items
+ # cursor is the id of the last episode you've seen in the list
+
+ return _download_grpc(self, 'https://api.annex.radiko.jp/radiko.PodcastService/ListPodcastEpisodes',
+ channel_id, ListPodcastEpisodesResponse, note="Downloading episode listings",
+ headers={'Authorization': f'Bearer {jwt}'},
+ data=ListPodcastEpisodesRequest(
+ channel_id=channel_id,
+ sort_by_latest=True,
+ page_length=page_length,
+ cursor=cursor,
+ )
+ )