From a2a32acf8c1f5802b79e49069992e70dead7f25e Mon Sep 17 00:00:00 2001 From: garret Date: Fri, 5 Jul 2024 14:37:11 +0100 Subject: cache things into a database not done db stuff before so this is probably a bit jank --- fetch-status.py | 90 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 26 deletions(-) diff --git a/fetch-status.py b/fetch-status.py index d87d6b8..7ba3934 100755 --- a/fetch-status.py +++ b/fetch-status.py @@ -5,6 +5,8 @@ from bs4 import BeautifulSoup import requests import os +import sqlite3 +import time print("Access-Control-Allow-Origin: *") # CORS shit, needed for styling iframes apparently @@ -23,35 +25,60 @@ if not profile_id.isdigit(): print(f"you have to put your user id as a query, like {current_url}?6498") quit() -headers = { - 'User-Agent': f'agora status fetcher - contact forum user "garret 427738" or https://427738.xyz/hate-mail.html - requested by {os.environ.get("REMOTE_ADDR")}', -} +con = sqlite3.connect(os.environ.get("AGORA_STATUS_DB")) +cur = con.cursor() -profile_html = requests.get("https://forum.agoraroad.com/index.php?members/" + profile_id, headers=headers).text -soup = BeautifulSoup(profile_html, 'lxml') +cur.execute(""" + CREATE TABLE IF NOT EXISTS statuses( + profile_id INTEGER PRIMARY KEY UNIQUE, + content TEXT, + last_checked REAL, + check_status TEXT + ) +""") +con.commit() -posts = soup.find_all("div", class_="message-content js-messageContent") +CACHE_TIMEOUT = 3600 -post_body = None +cached_result = cur.execute("""SELECT content, check_status FROM statuses + WHERE profile_id=? AND last_checked > ?""", + (profile_id, time.time() - CACHE_TIMEOUT) +).fetchone() -data_lb_id_regex = re.compile(r"profile\-post\-\d+") -for post in posts: - posted_by_user = post.find("a", attrs={ - "class": 'username', - "data-user-id": profile_id, - }) != None - if posted_by_user: - post_body = post.find("div", attrs={ - "class": "lbContainer js-lbContainer", - "data-lb-id": data_lb_id_regex, - }) - break +if cached_result == None: + headers = { + 'User-Agent': f'agora status fetcher - contact forum user "garret 427738" or https://427738.xyz/hate-mail.html - requested by {os.environ.get("REMOTE_ADDR")}', + } + profile_html = requests.get("https://forum.agoraroad.com/index.php?members/" + profile_id, headers=headers).text + soup = BeautifulSoup(profile_html, 'lxml') + posts = soup.find_all("div", class_="message-content js-messageContent") + + post_body = None + + data_lb_id_regex = re.compile(r"profile\-post\-\d+") + + for post in posts: + posted_by_user = post.find("a", attrs={ + "class": 'username', + "data-user-id": profile_id, + }) != None + if posted_by_user: + post_body = str(post.find("div", attrs={ + "class": "lbContainer js-lbContainer", + "data-lb-id": data_lb_id_regex, + })) + break + + is_loginwall = soup.find("html", class_='has-no-js template-login') != None + is_cloudflare_block = soup.find("title").text == "Just a moment..." +else: + post_body = cached_result[0] + is_loginwall = cached_result[1] == "schizo" + is_cloudflare_block = cached_result[1] == "cloudflare" -is_loginwall = soup.find("html", class_='has-no-js template-login') != None -is_cloudflare_block = soup.find("title").text == "Just a moment..." if post_body is None: print("Content-Type: text/plain; charset=utf-8") if is_loginwall: @@ -68,8 +95,19 @@ if post_body is None: print("Status: 500") print() print("it didn't work for some reason, possibly you've not made any posts on your profile") - quit() - -print("Content-Type: text/html; charset=utf-8") -print() -print(post_body) +else: + print("Content-Type: text/html; charset=utf-8") + print() + print(post_body) + +check_status = "success" +if is_loginwall: + check_status = "schizo" +elif is_cloudflare_block: + check_status = "cloudflare" +elif post_body is None: + check_status = "mystery" + +if not cached_result: + cur.execute("INSERT INTO statuses VALUES(?,?,?,?)", (profile_id, post_body, time.time(), check_status)) +con.commit() -- cgit v1.2.3-70-g09d2