From c809d78e52b886235e28d8d48fc7531c4bd77c43 Mon Sep 17 00:00:00 2001
From: garret <garret@airmail.cc>
Date: Mon, 21 Aug 2023 00:24:07 +0100
Subject: Escape HTML in tracklist entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I got a chapter titled "角松敏生 - GO &amp; SEE MY LOVE" in a recent
download. The &amp; shows up on the actual site[1][2], and i've only
seen it on this station (NORTHWAVE), on another (INT) it's fine[3] so
this is probably just NORTH WAVE doing fucky things. Either way, it's
not much bother to unescape and it makes the metadata better. Hopefully
there's not an actual band/song with HTML escapes in the name lmao

[1] https://files.catbox.moe/uk2ucp.png
[2] https://files.catbox.moe/cuibi3.png
[3] https://files.catbox.moe/l1ap39.png
---
 yt_dlp_plugins/extractor/radiko.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'yt_dlp_plugins')

diff --git a/yt_dlp_plugins/extractor/radiko.py b/yt_dlp_plugins/extractor/radiko.py
index 2dc7380..2c9112f 100755
--- a/yt_dlp_plugins/extractor/radiko.py
+++ b/yt_dlp_plugins/extractor/radiko.py
@@ -12,6 +12,7 @@ from yt_dlp.utils import (
 	join_nonempty,
 	parse_qs,
 	traverse_obj,
+	unescapeHTML,
 	unified_timestamp,
 	url_or_none,
 	update_url_query,
@@ -773,9 +774,9 @@ class RadikoTimeFreeIE(_RadikoBaseIE):
 
 		chapters = []
 		for track in data:
-			artist = traverse_obj(track, ("artist", "name")) or track.get("artist_name")
+			artist = unescapeHTML(traverse_obj(track, ("artist", "name")) or track.get("artist_name"))
 			chapters.append({
-				"title": join_nonempty(artist, track.get("title"), delim=" - "),
+				"title": unescapeHTML(join_nonempty(artist, track.get("title"), delim=" - ")),
 				"start_time": (datetime.datetime.fromisoformat(track.get("displayed_start_time")) - start).total_seconds(),
 			})
 
-- 
cgit v1.2.3-70-g09d2