Add ability to block posts with certain words, e.x. Trump

This commit is contained in:
John Stephani 2025-12-28 23:33:12 -06:00
parent e83f646dd8
commit e6d6df6299
3 changed files with 23 additions and 9 deletions

View File

@ -196,6 +196,7 @@ def other_page():
post post
WHERE WHERE
hidden = ? AND hidden = ? AND
saved = ? AND
subreddit IN subreddit IN
( (
SELECT SELECT
@ -220,7 +221,7 @@ def other_page():
created_utc asc created_utc asc
LIMIT ? LIMIT ?
""" """
binds = [False, False, False, config.other_posts_cutoff, config.posts_per_page_load] binds = [False, False, False, False, config.other_posts_cutoff, config.posts_per_page_load]
posts = get_posts_from_select(cursor, select, binds) posts = get_posts_from_select(cursor, select, binds)
connection.close() connection.close()
return render_template('index.html', title=title, posts=posts, sidebar_links=sidebar_links) return render_template('index.html', title=title, posts=posts, sidebar_links=sidebar_links)

View File

@ -27,6 +27,12 @@ def run():
binds.append(False) binds.append(False)
delete = f"DELETE FROM post WHERE author IN ({bind_array}) AND saved = ?" delete = f"DELETE FROM post WHERE author IN ({bind_array}) AND saved = ?"
cursor.execute(delete, binds) cursor.execute(delete, binds)
print("Deleting posts with blocked words")
select = "SELECT name FROM block where name NOT LIKE '/u/%' AND name NOT LIKE '/r/%'"
binds = ["%"+row[0]+"%" for row in cursor.execute(select).fetchall()]
where_array = " OR ".join(["post LIKE ?"]*len(binds))
delete = f"DELETE FROM post WHERE {where_array}"
cursor.execute(delete, binds)
print("Deleting old media db rows") print("Deleting old media db rows")
delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)" delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)"
cursor.execute(delete) cursor.execute(delete)

View File

@ -18,9 +18,9 @@ from yars.utils import download_image
miner = YARS() miner = YARS()
# Function to scrape subreddit post details and save to JSON # Function to scrape subreddit post details and save to JSON
def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5, blocked_subs=[], blocked_users=[]): def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5, blocked_subs=[], blocked_users=[], blocked_words=[]):
ret = [] ret = []
print(f"Starting {subreddit} with min score {minimum_score}, by {pull_by}, limit {limit}") print(f"Starting {subreddit}")
empty = dict() empty = dict()
try: try:
subreddit_posts = miner.fetch_subreddit_posts( subreddit_posts = miner.fetch_subreddit_posts(
@ -42,12 +42,16 @@ def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5,
"body": post.get("body", None), "body": post.get("body", None),
} }
if post_data["subreddit"] in blocked_subs: if post_data["subreddit"] in blocked_subs:
print(f'Ignoring post from {post_data["subreddit"]}')
continue continue
if post_data["author"] in blocked_users: if post_data["author"] in blocked_users:
print(f'Ignoring post from {post_data["author"]}')
continue continue
ret.append(post_data) for word in blocked_words:
if word in post_data["title"]:
break
if post_data["body"] is not None and word in post_data["body"]:
break
else:
ret.append(post_data)
print(f"Finished {subreddit}") print(f"Finished {subreddit}")
return ret return ret
except Exception as e: except Exception as e:
@ -121,12 +125,15 @@ def get_blocks(cursor):
blocks = [row[0] for row in cursor.execute(select)] blocks = [row[0] for row in cursor.execute(select)]
subs = [] subs = []
users = [] users = []
words = []
for block in blocks: for block in blocks:
if "/r/" in block: if "/r/" in block:
subs.append(block[3:]) subs.append(block[3:])
elif "/u/" in block: elif "/u/" in block:
users.append(block[3:]) users.append(block[3:])
return subs, users else:
words.append(block)
return subs, users, words
# Main execution # Main execution
if __name__ == "__main__": if __name__ == "__main__":
@ -143,9 +150,9 @@ if __name__ == "__main__":
subreddit subreddit
""" """
subreddits = cursor.execute(select).fetchall() subreddits = cursor.execute(select).fetchall()
blocked_subs, blocked_users = get_blocks(cursor) blocked_subs, blocked_users, blocked_words = get_blocks(cursor)
for subreddit in subreddits: for subreddit in subreddits:
post_data = scrape_subreddit_data(subreddit[0], subreddit[1], subreddit[2], subreddit[3], blocked_subs, blocked_users) post_data = scrape_subreddit_data(subreddit[0], subreddit[1], subreddit[2], subreddit[3], blocked_subs, blocked_users, blocked_words)
save_posts_to_db(post_data, cursor) save_posts_to_db(post_data, cursor)
connection.commit() connection.commit()
download_media(cursor) download_media(cursor)