Properly escape post content that contains HTML tags; fix issue with scraping when metadata is in an unanticipated form

2026-01-30 19:42:40 -06:00 · 2026-01-30 19:42:40 -06:00 · b7d80002a1
parent 192d4e739e
commit b7d80002a1
3 changed files with 280 additions and 274 deletions
--- a/app/YARS/src/yars/yars.py
+++ b/app/YARS/src/yars/yars.py
@ -278,6 +278,8 @@ class YARS:
 						id = item["media_id"]
 						if id in post_data["media_metadata"]:
 							metadata = post_data["media_metadata"][id]
 							if "p" not in metadata:
 								continue
 							m = re.search(r"redd\.it\/(.+)\?", metadata["p"][0]["u"])
 							if m:
 								media_urls.append(f"https://i.redd.it/{m.group(1)}")
@ -286,6 +288,8 @@ class YARS:
 					media_urls = []
 					for id in post_data["media_metadata"]:
 						metadata = post_data["media_metadata"][id]
 						if "p" not in metadata:
 							continue
 						m = re.search(r"redd\.it\/(.+)\?", metadata["p"][0]["u"])
 						if m:
 							media_urls.append(f"https://i.redd.it/{m.group(1)}")
--- a/app/app.py
+++ b/app/app.py
@ -4,6 +4,7 @@ from urllib.parse import urlparse
 import delete_posts
 import config
 import json
 import html
 import re
 import sqlite3
 import subprocess
@ -415,6 +416,7 @@ def add_age_to_posts(posts):
 def reformat_body(posts):
 	for post in posts:
 		if "body" in post and post["body"] is not None:
 			post["body"] = html.escape(post["body"])
 			post["body"] = post["body"].rstrip().replace("\n", "<br>")
 			post["body"] = re.sub(r"\[(.*?)\]\((.*?)\)", r'<b><a href="\2" style="white-space: nowrap;" class="no-style-link">\1</a></b>', post["body"])
--- a/app/delete_posts.py
+++ b/app/delete_posts.py
@ -36,7 +36,7 @@ def run():
 	print("Deleting old media db rows")
 	delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)"
 	cursor.execute(delete)
-	print("Deleving media db for read posts")
+	print("Deleting media db for read posts")
 	delete = "DELETE FROM media WHERE permalink IN (SELECT permalink FROM post WHERE hidden = ?)"
 	binds = [True]
 	cursor.execute(delete, binds)