From 3b98b5ea06fbd2cede65e7adb49840bd966b70dc Mon Sep 17 00:00:00 2001 From: John Stephani Date: Thu, 25 Dec 2025 17:13:14 -0600 Subject: [PATCH] Delete media for hidden posts, reset fetched status, only fetch media for visible posts --- app/config.py | 2 -- app/delete_posts.py | 12 +++++++++--- app/scrape_posts.py | 9 +++++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/app/config.py b/app/config.py index bf1a98f..4927480 100644 --- a/app/config.py +++ b/app/config.py @@ -11,7 +11,6 @@ subreddits = [ ("PoliticalCompassMemes", 100), ("meirl", 100), ("me_irl", 100), - ("AITAH", 100), ("Fauxmoi", 100), ("NoFilterNews", 100), ("linux", 100), @@ -19,7 +18,6 @@ subreddits = [ ("selfhosted", 100), ("HomeServer", 100), ("homelab", 100), - ("KidsAreFuckingStupid", 100), ("NonPoliticalTwitter", 100), ("all", 1000) ] diff --git a/app/delete_posts.py b/app/delete_posts.py index 1a58f5b..957c229 100644 --- a/app/delete_posts.py +++ b/app/delete_posts.py @@ -9,15 +9,21 @@ if __name__ == "__main__": cursor = connection.cursor() now = int(time.time()) max_created_utc = now - config.max_age_seconds - select = "SELECT count(*) FROM post WHERE created_utc < ?" - binds = [max_created_utc] - results = cursor.execute(select, binds) print("Deleting old posts") delete = "DELETE FROM post WHERE created_utc < ?" + binds = [max_created_utc] cursor.execute(delete, binds) print("Deleting old media db rows") delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)" cursor.execute(delete) + print("Deleving media db for read posts") + delete = "DELETE FROM media WHERE permalink IN (SELECT permalink FROM post WHERE hidden = ?)" + binds = [True] + cursor.execute(delete, binds) + print("Updating media_fetched for read posts") + update = "UPDATE post SET media_fetched = ? WHERE hidden = ?" + binds = [False, True] + cursor.execute(update, binds) all_files_local = subprocess.run(["find", "/reddit/media", "-type", "f"], capture_output=True, text=True) all_files_local = set(all_files_local.stdout.splitlines()) select = "SELECT local from media" diff --git a/app/scrape_posts.py b/app/scrape_posts.py index e2034bb..fdef9c2 100755 --- a/app/scrape_posts.py +++ b/app/scrape_posts.py @@ -68,8 +68,8 @@ def save_posts_to_db(data, cursor): cursor.execute(upsert, binds) def download_media(cursor): - select = "SELECT post FROM post WHERE media_fetched = ?" - binds = [False] + select = "SELECT post FROM post WHERE media_fetched = ? AND hidden = ?" + binds = [False, False] results = cursor.execute(select, binds) post = results.fetchone() binds = [] @@ -79,8 +79,9 @@ def download_media(cursor): for url in post["media_urls"]: binds.append(post["permalink"]) binds.append(url) - binds.append(download_image(url, config.media_dir)) - print("image downloaded") + path = download_image(url, config.media_dir) + binds.append(path) + print(f"Downloaded {path}") post = results.fetchone() if len(binds)>0: