aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/yt_dlp_plugins/extractor/radiko.py
diff options
context:
space:
mode:
authorgarret <garret@airmail.cc>2023-08-21 00:24:07 +0100
committergarret <garret@airmail.cc>2023-08-21 00:24:07 +0100
commitc809d78e52b886235e28d8d48fc7531c4bd77c43 (patch)
tree821841d41309ca98f0c562392fa80003e78952b2 /yt_dlp_plugins/extractor/radiko.py
parent1f25017782f42dafcf1a7612fce8b28fe23ff76c (diff)
downloadyt-dlp-rajiko-c809d78e52b886235e28d8d48fc7531c4bd77c43.tar.gz
yt-dlp-rajiko-c809d78e52b886235e28d8d48fc7531c4bd77c43.tar.bz2
yt-dlp-rajiko-c809d78e52b886235e28d8d48fc7531c4bd77c43.zip
Escape HTML in tracklist entriestracklist-html-escape
I got a chapter titled "角松敏生 - GO &amp; SEE MY LOVE" in a recent download. The &amp; shows up on the actual site[1][2], and i've only seen it on this station (NORTHWAVE), on another (INT) it's fine[3] so this is probably just NORTH WAVE doing fucky things. Either way, it's not much bother to unescape and it makes the metadata better. Hopefully there's not an actual band/song with HTML escapes in the name lmao [1] https://files.catbox.moe/uk2ucp.png [2] https://files.catbox.moe/cuibi3.png [3] https://files.catbox.moe/l1ap39.png
Diffstat (limited to 'yt_dlp_plugins/extractor/radiko.py')
-rwxr-xr-xyt_dlp_plugins/extractor/radiko.py5
1 files changed, 3 insertions, 2 deletions
diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py
index 2dc7380..2c9112f 100755
--- a/yt_dlp_plugins/extractor/radiko.py
+++ b/yt_dlp_plugins/extractor/radiko.py
@@ -12,6 +12,7 @@ from yt_dlp.utils import (
join_nonempty,
parse_qs,
traverse_obj,
+ unescapeHTML,
unified_timestamp,
url_or_none,
update_url_query,
@@ -773,9 +774,9 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
chapters = []
for track in data:
- artist = traverse_obj(track, ("artist", "name")) or track.get("artist_name")
+ artist = unescapeHTML(traverse_obj(track, ("artist", "name")) or track.get("artist_name"))
chapters.append({
- "title": join_nonempty(artist, track.get("title"), delim=" - "),
+ "title": unescapeHTML(join_nonempty(artist, track.get("title"), delim=" - ")),
"start_time": (datetime.datetime.fromisoformat(track.get("displayed_start_time")) - start).total_seconds(),
})