Delete media for hidden posts, reset fetched status, only fetch media for visible posts
This commit is contained in:
parent
cdae48c98c
commit
3b98b5ea06
|
|
@ -11,7 +11,6 @@ subreddits = [
|
||||||
("PoliticalCompassMemes", 100),
|
("PoliticalCompassMemes", 100),
|
||||||
("meirl", 100),
|
("meirl", 100),
|
||||||
("me_irl", 100),
|
("me_irl", 100),
|
||||||
("AITAH", 100),
|
|
||||||
("Fauxmoi", 100),
|
("Fauxmoi", 100),
|
||||||
("NoFilterNews", 100),
|
("NoFilterNews", 100),
|
||||||
("linux", 100),
|
("linux", 100),
|
||||||
|
|
@ -19,7 +18,6 @@ subreddits = [
|
||||||
("selfhosted", 100),
|
("selfhosted", 100),
|
||||||
("HomeServer", 100),
|
("HomeServer", 100),
|
||||||
("homelab", 100),
|
("homelab", 100),
|
||||||
("KidsAreFuckingStupid", 100),
|
|
||||||
("NonPoliticalTwitter", 100),
|
("NonPoliticalTwitter", 100),
|
||||||
("all", 1000)
|
("all", 1000)
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -9,15 +9,21 @@ if __name__ == "__main__":
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
now = int(time.time())
|
now = int(time.time())
|
||||||
max_created_utc = now - config.max_age_seconds
|
max_created_utc = now - config.max_age_seconds
|
||||||
select = "SELECT count(*) FROM post WHERE created_utc < ?"
|
|
||||||
binds = [max_created_utc]
|
|
||||||
results = cursor.execute(select, binds)
|
|
||||||
print("Deleting old posts")
|
print("Deleting old posts")
|
||||||
delete = "DELETE FROM post WHERE created_utc < ?"
|
delete = "DELETE FROM post WHERE created_utc < ?"
|
||||||
|
binds = [max_created_utc]
|
||||||
cursor.execute(delete, binds)
|
cursor.execute(delete, binds)
|
||||||
print("Deleting old media db rows")
|
print("Deleting old media db rows")
|
||||||
delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)"
|
delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)"
|
||||||
cursor.execute(delete)
|
cursor.execute(delete)
|
||||||
|
print("Deleving media db for read posts")
|
||||||
|
delete = "DELETE FROM media WHERE permalink IN (SELECT permalink FROM post WHERE hidden = ?)"
|
||||||
|
binds = [True]
|
||||||
|
cursor.execute(delete, binds)
|
||||||
|
print("Updating media_fetched for read posts")
|
||||||
|
update = "UPDATE post SET media_fetched = ? WHERE hidden = ?"
|
||||||
|
binds = [False, True]
|
||||||
|
cursor.execute(update, binds)
|
||||||
all_files_local = subprocess.run(["find", "/reddit/media", "-type", "f"], capture_output=True, text=True)
|
all_files_local = subprocess.run(["find", "/reddit/media", "-type", "f"], capture_output=True, text=True)
|
||||||
all_files_local = set(all_files_local.stdout.splitlines())
|
all_files_local = set(all_files_local.stdout.splitlines())
|
||||||
select = "SELECT local from media"
|
select = "SELECT local from media"
|
||||||
|
|
|
||||||
|
|
@ -68,8 +68,8 @@ def save_posts_to_db(data, cursor):
|
||||||
cursor.execute(upsert, binds)
|
cursor.execute(upsert, binds)
|
||||||
|
|
||||||
def download_media(cursor):
|
def download_media(cursor):
|
||||||
select = "SELECT post FROM post WHERE media_fetched = ?"
|
select = "SELECT post FROM post WHERE media_fetched = ? AND hidden = ?"
|
||||||
binds = [False]
|
binds = [False, False]
|
||||||
results = cursor.execute(select, binds)
|
results = cursor.execute(select, binds)
|
||||||
post = results.fetchone()
|
post = results.fetchone()
|
||||||
binds = []
|
binds = []
|
||||||
|
|
@ -79,8 +79,9 @@ def download_media(cursor):
|
||||||
for url in post["media_urls"]:
|
for url in post["media_urls"]:
|
||||||
binds.append(post["permalink"])
|
binds.append(post["permalink"])
|
||||||
binds.append(url)
|
binds.append(url)
|
||||||
binds.append(download_image(url, config.media_dir))
|
path = download_image(url, config.media_dir)
|
||||||
print("image downloaded")
|
binds.append(path)
|
||||||
|
print(f"Downloaded {path}")
|
||||||
post = results.fetchone()
|
post = results.fetchone()
|
||||||
|
|
||||||
if len(binds)>0:
|
if len(binds)>0:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue