diff options
| author | garret <garret@airmail.cc> | 2024-07-05 14:37:11 +0100 | 
|---|---|---|
| committer | garret <garret@airmail.cc> | 2024-07-05 14:37:11 +0100 | 
| commit | a2a32acf8c1f5802b79e49069992e70dead7f25e (patch) | |
| tree | b9da832f345506f46f6537ba070cba6223f9ee32 | |
| parent | fa18ed0238c23310c040c5d374d85b7938596734 (diff) | |
| download | agora-status-a2a32acf8c1f5802b79e49069992e70dead7f25e.tar.gz agora-status-a2a32acf8c1f5802b79e49069992e70dead7f25e.tar.bz2 agora-status-a2a32acf8c1f5802b79e49069992e70dead7f25e.zip  | |
cache things into a database
not done db stuff before so this is probably a bit jank
| -rwxr-xr-x | fetch-status.py | 90 | 
1 files changed, 64 insertions, 26 deletions
diff --git a/fetch-status.py b/fetch-status.py index d87d6b8..7ba3934 100755 --- a/fetch-status.py +++ b/fetch-status.py @@ -5,6 +5,8 @@ from bs4 import BeautifulSoup  import requests  import os +import sqlite3 +import time  print("Access-Control-Allow-Origin: *") # CORS shit, needed for styling iframes apparently @@ -23,35 +25,60 @@ if not profile_id.isdigit():  	print(f"you have to put your user id as a query, like {current_url}?6498")  	quit() -headers = { -	'User-Agent': f'agora status fetcher - contact forum user "garret 427738" or https://427738.xyz/hate-mail.html - requested by {os.environ.get("REMOTE_ADDR")}', -} +con = sqlite3.connect(os.environ.get("AGORA_STATUS_DB")) +cur = con.cursor() -profile_html = requests.get("https://forum.agoraroad.com/index.php?members/" + profile_id, headers=headers).text -soup = BeautifulSoup(profile_html, 'lxml') +cur.execute(""" +	CREATE TABLE IF NOT EXISTS statuses( +		profile_id INTEGER PRIMARY KEY UNIQUE, +		content TEXT, +		last_checked REAL, +		check_status TEXT +	) +""") +con.commit() -posts = soup.find_all("div", class_="message-content js-messageContent") +CACHE_TIMEOUT = 3600 -post_body = None +cached_result = cur.execute("""SELECT content, check_status FROM statuses +	WHERE profile_id=? AND last_checked > ?""", +	(profile_id, time.time() - CACHE_TIMEOUT) +).fetchone() -data_lb_id_regex = re.compile(r"profile\-post\-\d+") -for post in posts: -	posted_by_user = post.find("a", attrs={ -		"class": 'username', -		"data-user-id": profile_id, -	}) != None -	if posted_by_user: -		post_body = post.find("div", attrs={ -			"class": "lbContainer js-lbContainer", -			"data-lb-id": data_lb_id_regex, -		}) -		break +if cached_result == None: +	headers = { +		'User-Agent': f'agora status fetcher - contact forum user "garret 427738" or https://427738.xyz/hate-mail.html - requested by {os.environ.get("REMOTE_ADDR")}', +	} +	profile_html = requests.get("https://forum.agoraroad.com/index.php?members/" + profile_id, headers=headers).text +	soup = BeautifulSoup(profile_html, 'lxml') +	posts = soup.find_all("div", class_="message-content js-messageContent") + +	post_body = None + +	data_lb_id_regex = re.compile(r"profile\-post\-\d+") + +	for post in posts: +		posted_by_user = post.find("a", attrs={ +			"class": 'username', +			"data-user-id": profile_id, +		}) != None +		if posted_by_user: +			post_body = str(post.find("div", attrs={ +				"class": "lbContainer js-lbContainer", +				"data-lb-id": data_lb_id_regex, +			})) +			break + +	is_loginwall = soup.find("html", class_='has-no-js template-login') != None +	is_cloudflare_block = soup.find("title").text == "Just a moment..." +else: +	post_body = cached_result[0] +	is_loginwall = cached_result[1] == "schizo" +	is_cloudflare_block = cached_result[1] == "cloudflare" -is_loginwall = soup.find("html", class_='has-no-js template-login') != None -is_cloudflare_block = soup.find("title").text == "Just a moment..."  if post_body is None:  	print("Content-Type: text/plain; charset=utf-8")  	if is_loginwall: @@ -68,8 +95,19 @@ if post_body is None:  		print("Status: 500")  		print()  		print("it didn't work for some reason, possibly you've not made any posts on your profile") -	quit() - -print("Content-Type: text/html; charset=utf-8") -print() -print(post_body) +else: +	print("Content-Type: text/html; charset=utf-8") +	print() +	print(post_body) + +check_status = "success" +if is_loginwall: +	check_status = "schizo" +elif is_cloudflare_block: +	check_status = "cloudflare" +elif post_body is None: +	check_status = "mystery" + +if not cached_result: +	cur.execute("INSERT INTO statuses VALUES(?,?,?,?)", (profile_id, post_body, time.time(), check_status)) +con.commit()  |