#!/usr/bin/env python3 import cgi from datetime import datetime import feedgenerator import requests from bs4 import BeautifulSoup from urllib.parse import urljoin import threading root = "https://www.toshiki-kadomatsu.jp/information/" feed = feedgenerator.Rss201rev2Feed( title="角松敏生 OFFICIAL SITE", description="角松敏生 OFFICIAL SITE - INFORMATION section", link=root, language="ja", ) s = requests.Session() s.headers.update({'User-Agent': 'kadomatsu-rss https://427738.xyz/kadomatsu-rss.html'}) def get_soup(url): site = s.get(url) site.encoding = "utf-8" soup = BeautifulSoup(site.text, "lxml") return soup def relative2absolute(rel): return urljoin(root, rel) def format_article(soup): for i in soup.find_all("a"): i["href"] = relative2absolute(i["href"]) for i in soup.find_all("img"): i["src"] = relative2absolute(i["src"]) return soup def get_article(url): soup = get_soup(url) article = soup.find("li", id="Detail") return format_article(article) def add_article(i): title = list(i.find("a").strings)[0] date = datetime.strptime(i.find("time")["datetime"], "%Y-%m-%d") content_url = relative2absolute(i.find("a")["href"]) content = get_article(content_url) feed.add_item( title=title, pubdate=date, link=content_url, description=content, ) def make_feed(soup): items = soup.find("ul", id="List") threads = [] for i in items.find_all("dl"): thread = threading.Thread(target=add_article, args=(i,)) threads.append(thread) thread.start() for thread in threads: thread.join() if __name__ == "__main__": make_feed(get_soup(root + "?select=all")) print("Content-Type: application/rss+xml; charset=UTF-8") print() print(feed.writeString("utf-8"))