Delete media for hidden posts, reset fetched status, only fetch media for visible posts

This commit is contained in:
John Stephani 2025-12-25 17:13:14 -06:00
parent cdae48c98c
commit 3b98b5ea06
3 changed files with 14 additions and 9 deletions

View File

@ -11,7 +11,6 @@ subreddits = [
("PoliticalCompassMemes", 100),
("meirl", 100),
("me_irl", 100),
("AITAH", 100),
("Fauxmoi", 100),
("NoFilterNews", 100),
("linux", 100),
@ -19,7 +18,6 @@ subreddits = [
("selfhosted", 100),
("HomeServer", 100),
("homelab", 100),
("KidsAreFuckingStupid", 100),
("NonPoliticalTwitter", 100),
("all", 1000)
]

View File

@ -9,15 +9,21 @@ if __name__ == "__main__":
cursor = connection.cursor()
now = int(time.time())
max_created_utc = now - config.max_age_seconds
select = "SELECT count(*) FROM post WHERE created_utc < ?"
binds = [max_created_utc]
results = cursor.execute(select, binds)
print("Deleting old posts")
delete = "DELETE FROM post WHERE created_utc < ?"
binds = [max_created_utc]
cursor.execute(delete, binds)
print("Deleting old media db rows")
delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)"
cursor.execute(delete)
print("Deleving media db for read posts")
delete = "DELETE FROM media WHERE permalink IN (SELECT permalink FROM post WHERE hidden = ?)"
binds = [True]
cursor.execute(delete, binds)
print("Updating media_fetched for read posts")
update = "UPDATE post SET media_fetched = ? WHERE hidden = ?"
binds = [False, True]
cursor.execute(update, binds)
all_files_local = subprocess.run(["find", "/reddit/media", "-type", "f"], capture_output=True, text=True)
all_files_local = set(all_files_local.stdout.splitlines())
select = "SELECT local from media"

View File

@ -68,8 +68,8 @@ def save_posts_to_db(data, cursor):
cursor.execute(upsert, binds)
def download_media(cursor):
select = "SELECT post FROM post WHERE media_fetched = ?"
binds = [False]
select = "SELECT post FROM post WHERE media_fetched = ? AND hidden = ?"
binds = [False, False]
results = cursor.execute(select, binds)
post = results.fetchone()
binds = []
@ -79,8 +79,9 @@ def download_media(cursor):
for url in post["media_urls"]:
binds.append(post["permalink"])
binds.append(url)
binds.append(download_image(url, config.media_dir))
print("image downloaded")
path = download_image(url, config.media_dir)
binds.append(path)
print(f"Downloaded {path}")
post = results.fetchone()
if len(binds)>0: