From c0ef4eb6303989d82672b12affa76efb879e59b7 Mon Sep 17 00:00:00 2001 From: garret Date: Sun, 2 Oct 2022 02:38:22 +0100 Subject: add threading biggest performance increase lets go much spaghetti however --- kadomatsu-rss.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'kadomatsu-rss.py') diff --git a/kadomatsu-rss.py b/kadomatsu-rss.py index 1d2b6aa..df1b824 100755 --- a/kadomatsu-rss.py +++ b/kadomatsu-rss.py @@ -11,6 +11,8 @@ import requests from bs4 import BeautifulSoup from urllib.parse import urljoin +import threading + root = "https://www.toshiki-kadomatsu.jp/information/" @@ -52,16 +54,26 @@ def get_article(url): article = soup.find("li", id="Detail") return format_article(article) +rss_items = [] # god this is a load of fucking spaghetti + +def make_item(i): + title = list(i.find("a").strings)[0] + date = datetime.strptime(i.find("time")["datetime"], "%Y-%m-%d") + content_url = relative2absolute(i.find("a")["href"]) + content = format_article(get_article(content_url)) + rss_item = Item(title, date, content, content_url) + rss_items.append(rss_item) + def get_rss_items(soup): items = soup.find("ul", id="List") - rss_items = [] + threads = [] + for i in items.find_all("dl"): - title = list(i.find("a").strings)[0] - date = datetime.strptime(i.find("time")["datetime"], "%Y-%m-%d") - content_url = relative2absolute(i.find("a")["href"]) - content = format_article(get_article(content_url)) - rss_item = Item(title, date, content, content_url) - rss_items.append(rss_item) + thread = threading.Thread(target=make_item, args=(i,)) + threads.append(thread) + thread.start() + for thread in threads: + thread.join() return rss_items -- cgit v1.2.3-70-g09d2