diff options
author | garret <garret@airmail.cc> | 2022-10-02 02:38:22 +0100 |
---|---|---|
committer | garret <garret@airmail.cc> | 2022-10-02 02:38:22 +0100 |
commit | c0ef4eb6303989d82672b12affa76efb879e59b7 (patch) | |
tree | d0d8b01b437879a482542015ec5df668fbec28f4 /kadomatsu-rss.py | |
parent | c826ef809832cbad66882e110c073af96e5b77ba (diff) | |
download | kadomatsu-rss-c0ef4eb6303989d82672b12affa76efb879e59b7.tar.gz kadomatsu-rss-c0ef4eb6303989d82672b12affa76efb879e59b7.tar.bz2 kadomatsu-rss-c0ef4eb6303989d82672b12affa76efb879e59b7.zip |
add threading
biggest performance increase lets go
much spaghetti however
Diffstat (limited to 'kadomatsu-rss.py')
-rwxr-xr-x | kadomatsu-rss.py | 26 |
1 files changed, 19 insertions, 7 deletions
diff --git a/kadomatsu-rss.py b/kadomatsu-rss.py index 1d2b6aa..df1b824 100755 --- a/kadomatsu-rss.py +++ b/kadomatsu-rss.py @@ -11,6 +11,8 @@ import requests from bs4 import BeautifulSoup from urllib.parse import urljoin +import threading + root = "https://www.toshiki-kadomatsu.jp/information/" @@ -52,16 +54,26 @@ def get_article(url): article = soup.find("li", id="Detail") return format_article(article) +rss_items = [] # god this is a load of fucking spaghetti + +def make_item(i): + title = list(i.find("a").strings)[0] + date = datetime.strptime(i.find("time")["datetime"], "%Y-%m-%d") + content_url = relative2absolute(i.find("a")["href"]) + content = format_article(get_article(content_url)) + rss_item = Item(title, date, content, content_url) + rss_items.append(rss_item) + def get_rss_items(soup): items = soup.find("ul", id="List") - rss_items = [] + threads = [] + for i in items.find_all("dl"): - title = list(i.find("a").strings)[0] - date = datetime.strptime(i.find("time")["datetime"], "%Y-%m-%d") - content_url = relative2absolute(i.find("a")["href"]) - content = format_article(get_article(content_url)) - rss_item = Item(title, date, content, content_url) - rss_items.append(rss_item) + thread = threading.Thread(target=make_item, args=(i,)) + threads.append(thread) + thread.start() + for thread in threads: + thread.join() return rss_items |