1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
#!/usr/bin/env python3
import cgi
import cgitb
cgitb.enable()
from datetime import datetime
import feedgenerator
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import threading
root = "https://www.toshiki-kadomatsu.jp/information/"
feed = feedgenerator.Rss201rev2Feed(
title="角松敏生 OFFICIAL SITE",
description="角松敏生 OFFICIAL SITE - INFORMATION section",
link=root,
language="ja",
)
class Item:
def __init__(self, title, date, content, url):
self.title = title
self.date = date
self.url = url
self.content = content
s = requests.Session()
def get_soup(url):
site = s.get(url)
site.encoding = "utf-8"
soup = BeautifulSoup(site.text, "lxml")
return soup
def relative2absolute(rel):
return urljoin(root, rel)
def format_article(soup):
for i in soup.find_all("a"):
i["href"] = relative2absolute(i["href"])
for i in soup.find_all("img"):
i["src"] = relative2absolute(i["src"])
return soup
def get_article(url):
soup = get_soup(url)
article = soup.find("li", id="Detail")
return format_article(article)
rss_items = [] # god this is a load of fucking spaghetti
def make_item(i):
title = list(i.find("a").strings)[0]
date = datetime.strptime(i.find("time")["datetime"], "%Y-%m-%d")
content_url = relative2absolute(i.find("a")["href"])
content = format_article(get_article(content_url))
rss_item = Item(title, date, content, content_url)
rss_items.append(rss_item)
def get_rss_items(soup):
items = soup.find("ul", id="List")
threads = []
for i in items.find_all("dl"):
thread = threading.Thread(target=make_item, args=(i,))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
return rss_items
rss_items = get_rss_items(get_soup(root + "?select=all"))
for i in rss_items:
feed.add_item(
title=i.title,
link=i.url,
pubdate=i.date,
description=i.content,
)
print("Content-Type: application/rss+xml; charset=UTF-8")
print()
print(feed.writeString("utf-8"))
|