From 9fc267ec6af18190dd50b4f4f1db3083f70cd3c8 Mon Sep 17 00:00:00 2001 From: John Stephani Date: Fri, 26 Dec 2025 15:45:26 -0600 Subject: [PATCH] Add admin page to manage subreddit subscriptions --- README.md | 6 +- app/app.py | 63 +++++++++++ app/config.py | 23 ---- app/make_db.py | 1 + app/scrape_posts.py | 184 ++++++++++++++++--------------- app/templates/admin.html | 228 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 391 insertions(+), 114 deletions(-) create mode 100755 app/templates/admin.html diff --git a/README.md b/README.md index fa7064a..e9c76c3 100755 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ You can change the host port, host volume directories, how often reddit is scann ## ./app/config.py -You can change how much data is pulled, from where, the minimum score to save it to your DB, and how long it is retained. +You can change how many posts are displayed per page load and how long data is retained. ### Startup @@ -15,13 +15,13 @@ docker compose build docker compose up ``` -The DB is created automatically. You will want to run +The DB is created automatically. You will want to visit the /admin endpoint to set up your subreddits, then run ``` docker exec -it reddit-web-1 sh -c "python3 /app/scrape_posts.py" ``` -to populate the DB with initial data, or you will have to wait for the scheduled task to get triggered for the web page to be usable. +to populate the DB with initial data, or you will have to wait for the scheduled task to get triggered for posts to start showing. ### Thanks diff --git a/app/app.py b/app/app.py index 020bcca..e447b01 100755 --- a/app/app.py +++ b/app/app.py @@ -40,8 +40,70 @@ def hide_post(permalink): @app.route('/') def index(): + connection = sqlite3.connect(config.db_file) + cursor = connection.cursor() + select = """ + SELECT + count(*) + FROM + subreddit + """ + count = cursor.execute(select).fetchone()[0] + if count == 0: + return admin() return front_page() +@app.route('/admin', methods=['GET', 'POST', 'DELETE']) +def admin(): + connection = sqlite3.connect(config.db_file) + cursor = connection.cursor() + if request.method == 'DELETE': + delete = """ + DELETE FROM + subreddit + WHERE + subreddit = ? + """ + binds = [request.args.get("name")] + cursor.execute(delete, binds) + connection.commit() + connection.close() + return "" + elif request.method == 'POST': + upsert = """ + INSERT INTO + subreddit (subreddit, minimum_score, fetch_by, fetch_max) + VALUES + (?, ?, ?, ?) + ON CONFLICT + (subreddit) + DO UPDATE SET + minimum_score=excluded.minimum_score, + fetch_by=excluded.fetch_by, + fetch_max=excluded.fetch_max + """ + binds = [ + request.form.get("name"), + int(request.form.get("score")), + request.form.get("by"), + int(request.form.get("max")) + ] + cursor.execute(upsert, binds) + connection.commit() + post_subreddits = get_subreddits(cursor) + select = """ + SELECT + subreddit, + minimum_score, + fetch_by, + fetch_max + FROM + subreddit + """ + sub_subreddits = cursor.execute(select).fetchall() + connection.close() + return render_template('admin.html', post_subreddits=post_subreddits, sub_subreddits=sub_subreddits) + @app.route('/r/all') def front_page(): title = "/r/all" @@ -156,6 +218,7 @@ def get_subreddits(cursor): subreddits = [f"/r/{sub[0]}" for sub in results] subreddits.insert(0, "/r/all") subreddits.append("/r/other") + subreddits.append("/admin") return subreddits def get_posts_from_select(cursor, select, binds): diff --git a/app/config.py b/app/config.py index eb1d488..f1881b6 100644 --- a/app/config.py +++ b/app/config.py @@ -1,27 +1,4 @@ # Scheduler configuration -max_posts_per_pull = 100 -pull_by = "day" -subreddits = [ - # name, minimum upvotes - ("pcgaming", 50), - ("gadgets", 10), - ("Nightreign", 100), - ("CuratedTumblr", 100), - ("196", 100), - ("PoliticalCompassMemes", 100), - ("meirl", 100), - ("me_irl", 100), - ("Fauxmoi", 100), - ("NoFilterNews", 100), - ("linux", 100), - ("linux4noobs", 100), - ("selfhosted", 100), - ("HomeServer", 100), - ("homelab", 100), - ("NonPoliticalTwitter", 100), - ("comics", 100), - ("all", 1000) -] max_age_days = 30 max_age_seconds = max_age_days * 24 * 60 * 60 other_posts_cutoff = 1 #subreddits with this many unread posts or fewer are merged to /r/other diff --git a/app/make_db.py b/app/make_db.py index 8450450..6edc0f6 100755 --- a/app/make_db.py +++ b/app/make_db.py @@ -8,5 +8,6 @@ connection = sqlite3.connect(config.db_file) cursor = connection.cursor() cursor.execute("CREATE TABLE IF NOT EXISTS post(permalink primary key, subreddit, created_utc, score, media_fetched, post, hidden)") cursor.execute("CREATE TABLE IF NOT EXISTS media(permalink, url , local, PRIMARY KEY (permalink, url))") +cursor.execute("CREATE TABLE IF NOT EXISTS subreddit(subreddit primary key, minimum_score, fetch_by, fetch_max)") connection.commit() connection.close() \ No newline at end of file diff --git a/app/scrape_posts.py b/app/scrape_posts.py index fdef9c2..e04f8b7 100755 --- a/app/scrape_posts.py +++ b/app/scrape_posts.py @@ -18,102 +18,110 @@ from yars.utils import download_image miner = YARS() # Function to scrape subreddit post details and save to JSON -def scrape_subreddit_data(subreddit, limit=5): - ret = [] - subreddit_name = subreddit[0] - minimum_score = subreddit[1] - print(f"Starting {subreddit_name}") - empty = dict() - try: - subreddit_posts = miner.fetch_subreddit_posts( - subreddit_name, limit=limit, category="top", time_filter=config.pull_by - ) - for i, post in enumerate(subreddit_posts, 1): - score = post.get("score", 0) - if score < minimum_score: - continue - post_data = { - "permalink": post.get("permalink"), - "title": post.get("title", ""), - "author": post.get("author", ""), - "created_utc": post.get("created_utc", ""), - "num_comments": post.get("num_comments", 0), - "score": post.get("score", 0), - "media_urls" : post.get("media_urls", []), - "body": post.get("body", None), - } - ret.append(post_data) - print(f"Finished {subreddit_name}") - return ret - except Exception as e: - print(f"Error occurred while scraping subreddit: {e}") - return ret +def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5): + ret = [] + print(f"Starting {subreddit} with min score {minimum_score}, by {pull_by}, limit {limit}") + empty = dict() + try: + subreddit_posts = miner.fetch_subreddit_posts( + subreddit, limit=limit, category="top", time_filter=pull_by + ) + for i, post in enumerate(subreddit_posts, 1): + score = post.get("score", 0) + if score < minimum_score: + break + post_data = { + "permalink": post.get("permalink"), + "title": post.get("title", ""), + "author": post.get("author", ""), + "created_utc": post.get("created_utc", ""), + "num_comments": post.get("num_comments", 0), + "score": post.get("score", 0), + "media_urls" : post.get("media_urls", []), + "body": post.get("body", None), + } + ret.append(post_data) + print(f"Finished {subreddit}") + return ret + except Exception as e: + print(f"Error occurred while scraping subreddit: {e}") + return ret def save_posts_to_db(data, cursor): - if len(data)==0: - return - upsert = "INSERT INTO post(permalink, subreddit, created_utc, score, media_fetched, post, hidden) VALUES " - upsert += ",".join(["(?,?,?,?,?,?,?)"] * len(data)) - upsert += " ON CONFLICT(permalink) DO UPDATE SET score=excluded.score, post=excluded.post" - binds = [] - for post in data: - binds.append(post["permalink"]) - m = re.search(r"\/r\/([a-zA-Z0-9_]+)\/.*", post["permalink"]) - binds.append(m.group(1)) #subreddit - binds.append(post["created_utc"]) - binds.append(post["score"]) - binds.append(False) - binds.append(json.dumps(post)) - binds.append(False) - cursor.execute(upsert, binds) + if len(data)==0: + return + upsert = "INSERT INTO post(permalink, subreddit, created_utc, score, media_fetched, post, hidden) VALUES " + upsert += ",".join(["(?,?,?,?,?,?,?)"] * len(data)) + upsert += " ON CONFLICT(permalink) DO UPDATE SET score=excluded.score, post=excluded.post" + binds = [] + for post in data: + binds.append(post["permalink"]) + m = re.search(r"\/r\/([a-zA-Z0-9_]+)\/.*", post["permalink"]) + binds.append(m.group(1)) #subreddit + binds.append(post["created_utc"]) + binds.append(post["score"]) + binds.append(False) + binds.append(json.dumps(post)) + binds.append(False) + cursor.execute(upsert, binds) def download_media(cursor): - select = "SELECT post FROM post WHERE media_fetched = ? AND hidden = ?" - binds = [False, False] - results = cursor.execute(select, binds) - post = results.fetchone() - binds = [] - while post is not None: - post = json.loads(post[0]) - if len(post["media_urls"])>0: - for url in post["media_urls"]: - binds.append(post["permalink"]) - binds.append(url) - path = download_image(url, config.media_dir) - binds.append(path) - print(f"Downloaded {path}") - post = results.fetchone() - - if len(binds)>0: - upsert = "INSERT INTO media(permalink, url, local) VALUES " - upsert += ",".join(["(?,?,?)"] * (len(binds)//3)) - upsert += " ON CONFLICT(permalink, url) DO UPDATE SET local=excluded.local" - cursor.execute(upsert, binds) + select = "SELECT post FROM post WHERE media_fetched = ? AND hidden = ?" + binds = [False, False] + results = cursor.execute(select, binds) + post = results.fetchone() + binds = [] + while post is not None: + post = json.loads(post[0]) + if len(post["media_urls"])>0: + for url in post["media_urls"]: + binds.append(post["permalink"]) + binds.append(url) + path = download_image(url, config.media_dir) + binds.append(path) + print(f"Downloaded {path}") + post = results.fetchone() + + if len(binds)>0: + upsert = "INSERT INTO media(permalink, url, local) VALUES " + upsert += ",".join(["(?,?,?)"] * (len(binds)//3)) + upsert += " ON CONFLICT(permalink, url) DO UPDATE SET local=excluded.local" + cursor.execute(upsert, binds) - update = "UPDATE post SET media_fetched = ? WHERE media_fetched = ?" - binds = [True, False] - cursor.execute(update, binds) + update = "UPDATE post SET media_fetched = ? WHERE media_fetched = ?" + binds = [True, False] + cursor.execute(update, binds) def download_comments_for_permalink(permalink, cursor): - # Currently unused - post_details = miner.scrape_post_details(permalink) - update = "UPDATE post SET body = ? WHERE permalink = ?" - binds = [post_details["body"], permalink] - cursor.execute(update, binds) + # Currently unused + post_details = miner.scrape_post_details(permalink) + update = "UPDATE post SET body = ? WHERE permalink = ?" + binds = [post_details["body"], permalink] + cursor.execute(update, binds) - upsert += "INSERT INTO comments(permalink, comments) VALUES (?, ?) ON CONFLICT(permalink) DO UPDATE SET comments=excluded.comments" - binds = [permalink, post_details["comments"]] - cursor.execute(upsert, binds) + upsert += "INSERT INTO comments(permalink, comments) VALUES (?, ?) ON CONFLICT(permalink) DO UPDATE SET comments=excluded.comments" + binds = [permalink, post_details["comments"]] + cursor.execute(upsert, binds) # Main execution if __name__ == "__main__": - os.makedirs(config.media_dir, exist_ok=True) - connection = sqlite3.connect(config.db_file) - cursor = connection.cursor() - for subreddit in config.subreddits: - post_data = scrape_subreddit_data(subreddit, config.max_posts_per_pull) - save_posts_to_db(post_data, cursor) - connection.commit() - download_media(cursor) - connection.commit() - connection.close() \ No newline at end of file + os.makedirs(config.media_dir, exist_ok=True) + connection = sqlite3.connect(config.db_file) + cursor = connection.cursor() + select = """ + SELECT + subreddit, + minimum_score, + fetch_by, + fetch_max + FROM + subreddit + """ + subreddits = cursor.execute(select).fetchall() + for subreddit in subreddits: + post_data = scrape_subreddit_data(subreddit[0], subreddit[1], subreddit[2], subreddit[3]) + save_posts_to_db(post_data, cursor) + connection.commit() + download_media(cursor) + connection.commit() + connection.close() \ No newline at end of file diff --git a/app/templates/admin.html b/app/templates/admin.html new file mode 100755 index 0000000..77a9b58 --- /dev/null +++ b/app/templates/admin.html @@ -0,0 +1,228 @@ + + + + + + Reddit, but better + + + +
+ +
+

Admin Panel

+
+

Subreddits

+ + + + + + + + + {% for subreddit in sub_subreddits %} + + + + + + + + {% endfor %} + + + + + + + + + + + +
SubredditMinimum ScoreFetch byFetch maxUpdate
/r/{{ subreddit[0] }}{{ subreddit[1] }}{{ subreddit[2] }}{{ subreddit[3] }}
/r/ + +
+
+
+
+ + +