From e6d6df629926ea29abd8d9b0bd1913bd7fc2feca Mon Sep 17 00:00:00 2001 From: John Stephani Date: Sun, 28 Dec 2025 23:33:12 -0600 Subject: [PATCH] Add ability to block posts with certain words, e.x. Trump --- app/app.py | 3 ++- app/delete_posts.py | 6 ++++++ app/scrape_posts.py | 23 +++++++++++++++-------- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/app/app.py b/app/app.py index 5a57b64..62f5ded 100755 --- a/app/app.py +++ b/app/app.py @@ -196,6 +196,7 @@ def other_page(): post WHERE hidden = ? AND + saved = ? AND subreddit IN ( SELECT @@ -220,7 +221,7 @@ def other_page(): created_utc asc LIMIT ? """ - binds = [False, False, False, config.other_posts_cutoff, config.posts_per_page_load] + binds = [False, False, False, False, config.other_posts_cutoff, config.posts_per_page_load] posts = get_posts_from_select(cursor, select, binds) connection.close() return render_template('index.html', title=title, posts=posts, sidebar_links=sidebar_links) diff --git a/app/delete_posts.py b/app/delete_posts.py index e888541..b25a94c 100644 --- a/app/delete_posts.py +++ b/app/delete_posts.py @@ -27,6 +27,12 @@ def run(): binds.append(False) delete = f"DELETE FROM post WHERE author IN ({bind_array}) AND saved = ?" cursor.execute(delete, binds) + print("Deleting posts with blocked words") + select = "SELECT name FROM block where name NOT LIKE '/u/%' AND name NOT LIKE '/r/%'" + binds = ["%"+row[0]+"%" for row in cursor.execute(select).fetchall()] + where_array = " OR ".join(["post LIKE ?"]*len(binds)) + delete = f"DELETE FROM post WHERE {where_array}" + cursor.execute(delete, binds) print("Deleting old media db rows") delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)" cursor.execute(delete) diff --git a/app/scrape_posts.py b/app/scrape_posts.py index caa5af5..b8d30da 100755 --- a/app/scrape_posts.py +++ b/app/scrape_posts.py @@ -18,9 +18,9 @@ from yars.utils import download_image miner = YARS() # Function to scrape subreddit post details and save to JSON -def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5, blocked_subs=[], blocked_users=[]): +def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5, blocked_subs=[], blocked_users=[], blocked_words=[]): ret = [] - print(f"Starting {subreddit} with min score {minimum_score}, by {pull_by}, limit {limit}") + print(f"Starting {subreddit}") empty = dict() try: subreddit_posts = miner.fetch_subreddit_posts( @@ -42,12 +42,16 @@ def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5, "body": post.get("body", None), } if post_data["subreddit"] in blocked_subs: - print(f'Ignoring post from {post_data["subreddit"]}') continue if post_data["author"] in blocked_users: - print(f'Ignoring post from {post_data["author"]}') continue - ret.append(post_data) + for word in blocked_words: + if word in post_data["title"]: + break + if post_data["body"] is not None and word in post_data["body"]: + break + else: + ret.append(post_data) print(f"Finished {subreddit}") return ret except Exception as e: @@ -121,12 +125,15 @@ def get_blocks(cursor): blocks = [row[0] for row in cursor.execute(select)] subs = [] users = [] + words = [] for block in blocks: if "/r/" in block: subs.append(block[3:]) elif "/u/" in block: users.append(block[3:]) - return subs, users + else: + words.append(block) + return subs, users, words # Main execution if __name__ == "__main__": @@ -143,9 +150,9 @@ if __name__ == "__main__": subreddit """ subreddits = cursor.execute(select).fetchall() - blocked_subs, blocked_users = get_blocks(cursor) + blocked_subs, blocked_users, blocked_words = get_blocks(cursor) for subreddit in subreddits: - post_data = scrape_subreddit_data(subreddit[0], subreddit[1], subreddit[2], subreddit[3], blocked_subs, blocked_users) + post_data = scrape_subreddit_data(subreddit[0], subreddit[1], subreddit[2], subreddit[3], blocked_subs, blocked_users, blocked_words) save_posts_to_db(post_data, cursor) connection.commit() download_media(cursor)