#!/usr/bin/env python3 import cgi import cgitb cgitb.enable() from datetime import datetime import feedgenerator import requests from bs4 import BeautifulSoup from urllib.parse import urljoin root = "http://www.toshiki-kadomatsu.jp/information/" feed = feedgenerator.Rss201rev2Feed( title="角松敏生 OFFICIAL SITE", description="角松敏生 OFFICIAL SITE - INFORMATION section", link=root, language="ja", ) class Item: def __init__(self, title, date, content, url): self.title = title self.date = date self.url = url self.content = content s = requests.Session() def get_soup(url): site = s.get(url) site.encoding = "utf-8" soup = BeautifulSoup(site.text, "lxml") return soup def relative2absolute(rel): return urljoin(root, rel) def format_article(soup): for i in soup.find_all("a"): i["href"] = relative2absolute(i["href"]) for i in soup.find_all("img"): i["src"] = relative2absolute(i["src"]) return soup def get_article(url): soup = get_soup(url) article = soup.find("li", id="Detail") return format_article(article) def get_rss_items(soup): items = soup.find("ul", id="List") rss_items = [] for i in items.find_all("dl"): title = list(i.find("a").strings)[0] date = datetime.strptime(i.find("time")["datetime"], "%Y-%m-%d") content_url = relative2absolute(i.find("a")["href"]) content = format_article(get_article(content_url)) rss_item = Item(title, date, content, content_url) rss_items.append(rss_item) return rss_items rss_items = get_rss_items(get_soup("http://www.toshiki-kadomatsu.jp/information/?select=all")) for i in rss_items: feed.add_item( title=i.title, link=i.url, pubdate=i.date, description=i.content, updateddate=rss_items[0].date, ) print("Content-Type: application/rss+xml; charset=UTF-8") print() print(feed.writeString("utf-8"))