summaryrefslogtreecommitdiffstats
path: root/kadomatsu-rss.py
diff options
context:
space:
mode:
authorgarret <garret@airmail.cc>2022-10-01 15:22:02 +0100
committergarret <garret@airmail.cc>2022-10-01 15:22:02 +0100
commitb46908e859f3dd14e6432505ec6d6f545a249619 (patch)
treeb47ed7919202784e96f24841879cada315516845 /kadomatsu-rss.py
downloadkadomatsu-rss-b46908e859f3dd14e6432505ec6d6f545a249619.tar.gz
kadomatsu-rss-b46908e859f3dd14e6432505ec6d6f545a249619.tar.bz2
kadomatsu-rss-b46908e859f3dd14e6432505ec6d6f545a249619.zip
initial commit
Diffstat (limited to 'kadomatsu-rss.py')
-rwxr-xr-xkadomatsu-rss.py81
1 files changed, 81 insertions, 0 deletions
diff --git a/kadomatsu-rss.py b/kadomatsu-rss.py
new file mode 100755
index 0000000..c1fb4ff
--- /dev/null
+++ b/kadomatsu-rss.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+import cgi
+import cgitb
+
+cgitb.enable()
+from datetime import datetime, timedelta
+
+import feedgenerator
+import requests
+
+# import requests_cache
+from bs4 import BeautifulSoup
+from os import path
+
+# urls_expire_after = {
+# '*?select=all': timedelta(minutes=15),
+# '*/detail.php*': -1
+# }
+
+# requests_cache.install_cache('kadomatsu', urls_expire_after=urls_expire_after)
+
+root = "http://www.toshiki-kadomatsu.jp/information/"
+
+feed = feedgenerator.Rss201rev2Feed(
+ title="角松敏生 OFFICIAL SITE",
+ description="Recent 角松敏生 Information posts",
+ link=root,
+ language="ja",
+)
+
+
+class Item:
+ def __init__(self, title, date, content, url):
+ self.title = title
+ self.date = date
+ self.url = url
+ self.content = content
+
+
+def get_soup(url):
+ site = requests.get(url)
+ site.encoding = "utf-8"
+ soup = BeautifulSoup(site.text, "lxml")
+ return soup
+
+
+# with open("index.html") as fp:
+# soup = BeautifulSoup(fp, "lxml")
+
+
+def get_article(soup):
+ return soup.find("li", id="Detail")
+
+
+def get_rss_items(soup):
+ items = soup.find("ul", id="List")
+ rss_items = []
+ for i in items.find_all("dl"):
+ title = list(i.find("a").strings)[0]
+ date = datetime.strptime(i.find("time")["datetime"], "%Y-%m-%d")
+ content_url = path.join(root, i.find("a")["href"])
+ content = get_article(get_soup(content_url))
+ rss_item = Item(title, date, content, content_url)
+ rss_items.append(rss_item)
+ return rss_items
+
+
+rss_items = get_rss_items(get_soup(path.join(root, "?select=all")))
+
+for i in rss_items:
+ feed.add_item(
+ title=i.title,
+ link=i.url,
+ pubdate=i.date,
+ description=i.content,
+ updateddate=rss_items[0].date,
+ )
+
+print("Content-Type: application/rss+xml; charset=UTF-8")
+print()
+print(feed.writeString("utf-8"))