diff options
author | garret <garret@airmail.cc> | 2022-10-01 15:22:02 +0100 |
---|---|---|
committer | garret <garret@airmail.cc> | 2022-10-01 15:22:02 +0100 |
commit | b46908e859f3dd14e6432505ec6d6f545a249619 (patch) | |
tree | b47ed7919202784e96f24841879cada315516845 | |
download | kadomatsu-rss-b46908e859f3dd14e6432505ec6d6f545a249619.tar.gz kadomatsu-rss-b46908e859f3dd14e6432505ec6d6f545a249619.tar.bz2 kadomatsu-rss-b46908e859f3dd14e6432505ec6d6f545a249619.zip |
initial commit
-rwxr-xr-x | kadomatsu-rss.py | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/kadomatsu-rss.py b/kadomatsu-rss.py new file mode 100755 index 0000000..c1fb4ff --- /dev/null +++ b/kadomatsu-rss.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +import cgi +import cgitb + +cgitb.enable() +from datetime import datetime, timedelta + +import feedgenerator +import requests + +# import requests_cache +from bs4 import BeautifulSoup +from os import path + +# urls_expire_after = { +# '*?select=all': timedelta(minutes=15), +# '*/detail.php*': -1 +# } + +# requests_cache.install_cache('kadomatsu', urls_expire_after=urls_expire_after) + +root = "http://www.toshiki-kadomatsu.jp/information/" + +feed = feedgenerator.Rss201rev2Feed( + title="角松敏生 OFFICIAL SITE", + description="Recent 角松敏生 Information posts", + link=root, + language="ja", +) + + +class Item: + def __init__(self, title, date, content, url): + self.title = title + self.date = date + self.url = url + self.content = content + + +def get_soup(url): + site = requests.get(url) + site.encoding = "utf-8" + soup = BeautifulSoup(site.text, "lxml") + return soup + + +# with open("index.html") as fp: +# soup = BeautifulSoup(fp, "lxml") + + +def get_article(soup): + return soup.find("li", id="Detail") + + +def get_rss_items(soup): + items = soup.find("ul", id="List") + rss_items = [] + for i in items.find_all("dl"): + title = list(i.find("a").strings)[0] + date = datetime.strptime(i.find("time")["datetime"], "%Y-%m-%d") + content_url = path.join(root, i.find("a")["href"]) + content = get_article(get_soup(content_url)) + rss_item = Item(title, date, content, content_url) + rss_items.append(rss_item) + return rss_items + + +rss_items = get_rss_items(get_soup(path.join(root, "?select=all"))) + +for i in rss_items: + feed.add_item( + title=i.title, + link=i.url, + pubdate=i.date, + description=i.content, + updateddate=rss_items[0].date, + ) + +print("Content-Type: application/rss+xml; charset=UTF-8") +print() +print(feed.writeString("utf-8")) |