#!/usr/bin/env python3 import cgi import cgitb cgitb.enable() from datetime import datetime import feedgenerator import requests from bs4 import BeautifulSoup from os import path root = "http://www.toshiki-kadomatsu.jp/information/" feed = feedgenerator.Rss201rev2Feed( title="角松敏生 OFFICIAL SITE", description="Recent 角松敏生 Information posts", link=root, language="ja", ) class Item: def __init__(self, title, date, content, url): self.title = title self.date = date self.url = url self.content = content s = requests.Session() def get_soup(url): site = s.get(url) site.encoding = "utf-8" soup = BeautifulSoup(site.text, "lxml") return soup # with open("index.html") as fp: # soup = BeautifulSoup(fp, "lxml") def get_article(soup): return soup.find("li", id="Detail") def get_rss_items(soup): items = soup.find("ul", id="List") rss_items = [] for i in items.find_all("dl"): title = list(i.find("a").strings)[0] date = datetime.strptime(i.find("time")["datetime"], "%Y-%m-%d") content_url = path.join(root, i.find("a")["href"]) content = get_article(get_soup(content_url)) rss_item = Item(title, date, content, content_url) rss_items.append(rss_item) return rss_items rss_items = get_rss_items(get_soup(path.join(root, "?select=all"))) for i in rss_items: feed.add_item( title=i.title, link=i.url, pubdate=i.date, description=i.content, updateddate=rss_items[0].date, ) print("Content-Type: application/rss+xml; charset=UTF-8") print() print(feed.writeString("utf-8"))