From c809d78e52b886235e28d8d48fc7531c4bd77c43 Mon Sep 17 00:00:00 2001 From: garret Date: Mon, 21 Aug 2023 00:24:07 +0100 Subject: Escape HTML in tracklist entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I got a chapter titled "角松敏生 - GO & SEE MY LOVE" in a recent download. The & shows up on the actual site[1][2], and i've only seen it on this station (NORTHWAVE), on another (INT) it's fine[3] so this is probably just NORTH WAVE doing fucky things. Either way, it's not much bother to unescape and it makes the metadata better. Hopefully there's not an actual band/song with HTML escapes in the name lmao [1] https://files.catbox.moe/uk2ucp.png [2] https://files.catbox.moe/cuibi3.png [3] https://files.catbox.moe/l1ap39.png --- yt_dlp_plugins/extractor/radiko.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'yt_dlp_plugins/extractor') diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py index 2dc7380..2c9112f 100755 --- a/yt_dlp_plugins/extractor/radiko.py +++ b/yt_dlp_plugins/extractor/radiko.py @@ -12,6 +12,7 @@ from yt_dlp.utils import ( join_nonempty, parse_qs, traverse_obj, + unescapeHTML, unified_timestamp, url_or_none, update_url_query, @@ -773,9 +774,9 @@ class RadikoTimeFreeIE(_RadikoBaseIE): chapters = [] for track in data: - artist = traverse_obj(track, ("artist", "name")) or track.get("artist_name") + artist = unescapeHTML(traverse_obj(track, ("artist", "name")) or track.get("artist_name")) chapters.append({ - "title": join_nonempty(artist, track.get("title"), delim=" - "), + "title": unescapeHTML(join_nonempty(artist, track.get("title"), delim=" - ")), "start_time": (datetime.datetime.fromisoformat(track.get("displayed_start_time")) - start).total_seconds(), }) -- cgit v1.2.3-70-g09d2