Add ability to block subreddits and users, add subreddit link and username to every post

This commit is contained in:
John Stephani 2025-12-27 20:02:27 -06:00
parent 9fc267ec6a
commit 7ecabdf2b7
8 changed files with 204 additions and 100 deletions

View File

@ -260,6 +260,7 @@ class YARS:
post_data = post["data"] post_data = post["data"]
post_info = { post_info = {
"title": post_data["title"], "title": post_data["title"],
"subreddit": post_data["subreddit"],
"author": post_data["author"], "author": post_data["author"],
"permalink": post_data["permalink"], "permalink": post_data["permalink"],
"score": post_data["score"], "score": post_data["score"],

View File

@ -58,18 +58,32 @@ def admin():
connection = sqlite3.connect(config.db_file) connection = sqlite3.connect(config.db_file)
cursor = connection.cursor() cursor = connection.cursor()
if request.method == 'DELETE': if request.method == 'DELETE':
type = request.args.get("type")
if type == "sub":
delete = """ delete = """
DELETE FROM DELETE FROM
subreddit subreddit
WHERE WHERE
subreddit = ? subreddit = ?
""" """
elif type == "block":
delete = """
DELETE FROM
block
WHERE
name = ?
"""
else:
connection.close()
return ""
binds = [request.args.get("name")] binds = [request.args.get("name")]
cursor.execute(delete, binds) cursor.execute(delete, binds)
connection.commit() connection.commit()
connection.close() connection.close()
return "" return ""
elif request.method == 'POST': elif request.method == 'POST':
type = request.form.get("type")
if type == "sub":
upsert = """ upsert = """
INSERT INTO INSERT INTO
subreddit (subreddit, minimum_score, fetch_by, fetch_max) subreddit (subreddit, minimum_score, fetch_by, fetch_max)
@ -88,9 +102,20 @@ def admin():
request.form.get("by"), request.form.get("by"),
int(request.form.get("max")) int(request.form.get("max"))
] ]
elif type == "block":
upsert = """
INSERT OR IGNORE INTO
block (name)
VALUES
(?)
"""
binds = [request.form.get("name")]
else:
connection.close()
return ""
cursor.execute(upsert, binds) cursor.execute(upsert, binds)
connection.commit() connection.commit()
post_subreddits = get_subreddits(cursor) sidebar_links = get_sidebar_links(cursor)
select = """ select = """
SELECT SELECT
subreddit, subreddit,
@ -100,16 +125,24 @@ def admin():
FROM FROM
subreddit subreddit
""" """
sub_subreddits = cursor.execute(select).fetchall() subreddits = cursor.execute(select).fetchall()
select = """
SELECT
name
FROM
block
"""
rows = cursor.execute(select).fetchall()
blocks = [row[0] for row in rows]
connection.close() connection.close()
return render_template('admin.html', post_subreddits=post_subreddits, sub_subreddits=sub_subreddits) return render_template('admin.html', sidebar_links=sidebar_links, subreddits=subreddits, blocks=blocks)
@app.route('/r/all') @app.route('/r/all')
def front_page(): def front_page():
title = "/r/all" title = "/r/all"
connection = sqlite3.connect(config.db_file) connection = sqlite3.connect(config.db_file)
cursor = connection.cursor() cursor = connection.cursor()
subreddits = get_subreddits(cursor) sidebar_links = get_sidebar_links(cursor)
select = """ select = """
SELECT SELECT
post post
@ -123,16 +156,15 @@ def front_page():
""" """
binds = [False, config.posts_per_page_load] binds = [False, config.posts_per_page_load]
posts = get_posts_from_select(cursor, select, binds) posts = get_posts_from_select(cursor, select, binds)
add_subreddits_to_posts(posts)
connection.close() connection.close()
return render_template('index.html', title=title, posts=posts, subreddits=subreddits) return render_template('index.html', title=title, posts=posts, sidebar_links=sidebar_links)
@app.route('/r/other') @app.route('/r/other')
def other_page(): def other_page():
title = "/r/other" title = "/r/other"
connection = sqlite3.connect(config.db_file) connection = sqlite3.connect(config.db_file)
cursor = connection.cursor() cursor = connection.cursor()
subreddits = get_subreddits(cursor) sidebar_links = get_sidebar_links(cursor)
select = """ select = """
SELECT SELECT
post post
@ -165,16 +197,15 @@ def other_page():
""" """
binds = [False, False, config.other_posts_cutoff, config.posts_per_page_load] binds = [False, False, config.other_posts_cutoff, config.posts_per_page_load]
posts = get_posts_from_select(cursor, select, binds) posts = get_posts_from_select(cursor, select, binds)
add_subreddits_to_posts(posts)
connection.close() connection.close()
return render_template('index.html', title=title, posts=posts, subreddits=subreddits) return render_template('index.html', title=title, posts=posts, sidebar_links=sidebar_links)
@app.route('/r/<path:subreddit>') @app.route('/r/<path:subreddit>')
def get_subreddit(subreddit): def get_subreddit(subreddit):
title = f"/r/{subreddit}" title = f"/r/{subreddit}"
connection = sqlite3.connect(config.db_file) connection = sqlite3.connect(config.db_file)
cursor = connection.cursor() cursor = connection.cursor()
subreddits = get_subreddits(cursor) sidebar_links = get_sidebar_links(cursor)
select = """ select = """
SELECT SELECT
post post
@ -190,9 +221,9 @@ def get_subreddit(subreddit):
binds = [subreddit, False, config.posts_per_page_load] binds = [subreddit, False, config.posts_per_page_load]
posts = get_posts_from_select(cursor, select, binds) posts = get_posts_from_select(cursor, select, binds)
connection.close() connection.close()
return render_template('index.html', title=title, posts=posts, subreddits=subreddits) return render_template('index.html', title=title, posts=posts, sidebar_links=sidebar_links)
def get_subreddits(cursor): def get_sidebar_links(cursor):
select = """ select = """
SELECT SELECT
subreddit subreddit
@ -215,16 +246,17 @@ def get_subreddits(cursor):
""" """
binds = [False, config.other_posts_cutoff] binds = [False, config.other_posts_cutoff]
results = cursor.execute(select, binds).fetchall() results = cursor.execute(select, binds).fetchall()
subreddits = [f"/r/{sub[0]}" for sub in results] links = [f"/r/{sub[0]}" for sub in results]
subreddits.insert(0, "/r/all") links.insert(0, "/r/all")
subreddits.append("/r/other") links.append("/r/other")
subreddits.append("/admin") links.append("/admin")
return subreddits return links
def get_posts_from_select(cursor, select, binds): def get_posts_from_select(cursor, select, binds):
results = cursor.execute(select, binds).fetchall() results = cursor.execute(select, binds).fetchall()
posts = [json.loads(post[0]) for post in results] posts = [json.loads(post[0]) for post in results]
add_media_html_to_posts(posts) add_media_html_to_posts(posts)
add_subreddits_to_posts(posts)
return posts return posts
def add_media_html_to_posts(posts): def add_media_html_to_posts(posts):
@ -239,9 +271,11 @@ def add_media_html_to_posts(posts):
post["media_html"] = media_html post["media_html"] = media_html
def add_subreddits_to_posts(posts): def add_subreddits_to_posts(posts):
# todo, remove after 30 days once subreddit is naturally a part of the post data
for post in posts: for post in posts:
if "subreddit" not in post:
m = re.search(r"\/r\/([a-zA-Z0-9_]+)\/.*", post["permalink"]) m = re.search(r"\/r\/([a-zA-Z0-9_]+)\/.*", post["permalink"])
post["subreddit"] = f"/r/{m.group(1)}" post["subreddit"] = m.group(1)
def get_media_html(file, priority=False): def get_media_html(file, priority=False):

View File

@ -1,10 +1,10 @@
# Scheduler configuration # Scheduler configuration
max_age_days = 30 max_age_days = 30
max_age_seconds = max_age_days * 24 * 60 * 60 max_age_seconds = max_age_days * 24 * 60 * 60
other_posts_cutoff = 1 #subreddits with this many unread posts or fewer are merged to /r/other other_posts_cutoff = 4 #subreddits with this many unread posts or fewer are merged to /r/other
# Webpage configuration # Webpage configuration
posts_per_page_load = 50 posts_per_page_load = 25
db_dir = "/reddit/db" db_dir = "/reddit/db"
media_dir = "/reddit/media" media_dir = "/reddit/media"

View File

@ -4,7 +4,7 @@ import time
import sqlite3 import sqlite3
import subprocess import subprocess
if __name__ == "__main__": def run():
connection = sqlite3.connect(config.db_file) connection = sqlite3.connect(config.db_file)
cursor = connection.cursor() cursor = connection.cursor()
now = int(time.time()) now = int(time.time())
@ -13,6 +13,12 @@ if __name__ == "__main__":
delete = "DELETE FROM post WHERE created_utc < ?" delete = "DELETE FROM post WHERE created_utc < ?"
binds = [max_created_utc] binds = [max_created_utc]
cursor.execute(delete, binds) cursor.execute(delete, binds)
print("Deleting posts from blocked subreddits")
select = "SELECT name FROM block WHERE name like '/r/%'"
binds = [row[0][3:] for row in cursor.execute(select).fetchall()]
bind_array = ",".join(["?"]*len(binds))
delete = f"DELETE FROM post WHERE subreddit IN ({bind_array})"
cursor.execute(delete, binds)
print("Deleting old media db rows") print("Deleting old media db rows")
delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)" delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)"
cursor.execute(delete) cursor.execute(delete)
@ -43,3 +49,5 @@ if __name__ == "__main__":
print(f"Removind dir {dir}") print(f"Removind dir {dir}")
os.rmdir(dir) os.rmdir(dir)
print("Done") print("Done")
run()

View File

@ -9,5 +9,6 @@ cursor = connection.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS post(permalink primary key, subreddit, created_utc, score, media_fetched, post, hidden)") cursor.execute("CREATE TABLE IF NOT EXISTS post(permalink primary key, subreddit, created_utc, score, media_fetched, post, hidden)")
cursor.execute("CREATE TABLE IF NOT EXISTS media(permalink, url , local, PRIMARY KEY (permalink, url))") cursor.execute("CREATE TABLE IF NOT EXISTS media(permalink, url , local, PRIMARY KEY (permalink, url))")
cursor.execute("CREATE TABLE IF NOT EXISTS subreddit(subreddit primary key, minimum_score, fetch_by, fetch_max)") cursor.execute("CREATE TABLE IF NOT EXISTS subreddit(subreddit primary key, minimum_score, fetch_by, fetch_max)")
cursor.execute("CREATE TABLE IF NOT EXISTS block(name primary key)")
connection.commit() connection.commit()
connection.close() connection.close()

View File

@ -18,7 +18,7 @@ from yars.utils import download_image
miner = YARS() miner = YARS()
# Function to scrape subreddit post details and save to JSON # Function to scrape subreddit post details and save to JSON
def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5): def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5, blocked_subs=[], blocked_users=[]):
ret = [] ret = []
print(f"Starting {subreddit} with min score {minimum_score}, by {pull_by}, limit {limit}") print(f"Starting {subreddit} with min score {minimum_score}, by {pull_by}, limit {limit}")
empty = dict() empty = dict()
@ -32,6 +32,7 @@ def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5):
break break
post_data = { post_data = {
"permalink": post.get("permalink"), "permalink": post.get("permalink"),
"subreddit": post.get("subreddit"),
"title": post.get("title", ""), "title": post.get("title", ""),
"author": post.get("author", ""), "author": post.get("author", ""),
"created_utc": post.get("created_utc", ""), "created_utc": post.get("created_utc", ""),
@ -40,6 +41,12 @@ def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5):
"media_urls" : post.get("media_urls", []), "media_urls" : post.get("media_urls", []),
"body": post.get("body", None), "body": post.get("body", None),
} }
if post_data["subreddit"] in blocked_subs:
print(f'Ignoring post from {post_data["subreddit"]}')
continue
if post_data["author"] in blocked_users:
print(f'Ignoring post from {post_data["author"]}')
continue
ret.append(post_data) ret.append(post_data)
print(f"Finished {subreddit}") print(f"Finished {subreddit}")
return ret return ret
@ -56,8 +63,7 @@ def save_posts_to_db(data, cursor):
binds = [] binds = []
for post in data: for post in data:
binds.append(post["permalink"]) binds.append(post["permalink"])
m = re.search(r"\/r\/([a-zA-Z0-9_]+)\/.*", post["permalink"]) binds.append(post["subreddit"])
binds.append(m.group(1)) #subreddit
binds.append(post["created_utc"]) binds.append(post["created_utc"])
binds.append(post["score"]) binds.append(post["score"])
binds.append(False) binds.append(False)
@ -103,6 +109,23 @@ def download_comments_for_permalink(permalink, cursor):
binds = [permalink, post_details["comments"]] binds = [permalink, post_details["comments"]]
cursor.execute(upsert, binds) cursor.execute(upsert, binds)
def get_blocks(cursor):
select = """
SELECT
name
FROM
block
"""
blocks = [row[0] for row in cursor.execute(select)]
subs = []
users = []
for block in blocks:
if "/r/" in block:
subs.append(block[3:])
elif "/u/" in block:
users.append(block[3:])
return subs, users
# Main execution # Main execution
if __name__ == "__main__": if __name__ == "__main__":
os.makedirs(config.media_dir, exist_ok=True) os.makedirs(config.media_dir, exist_ok=True)
@ -118,8 +141,9 @@ if __name__ == "__main__":
subreddit subreddit
""" """
subreddits = cursor.execute(select).fetchall() subreddits = cursor.execute(select).fetchall()
blocked_subs, blocked_users = get_blocks(cursor)
for subreddit in subreddits: for subreddit in subreddits:
post_data = scrape_subreddit_data(subreddit[0], subreddit[1], subreddit[2], subreddit[3]) post_data = scrape_subreddit_data(subreddit[0], subreddit[1], subreddit[2], subreddit[3], blocked_subs, blocked_users)
save_posts_to_db(post_data, cursor) save_posts_to_db(post_data, cursor)
connection.commit() connection.commit()
download_media(cursor) download_media(cursor)

View File

@ -170,14 +170,14 @@
<body> <body>
<div class="container"> <div class="container">
<div class="sidebar"> <div class="sidebar">
{% for subreddit in post_subreddits %} {% for link in sidebar_links %}
<a href="{{ subreddit }}">{{ subreddit }}</a> <a href="{{ link }}">{{ link }}</a>
{% endfor %} {% endfor %}
</div> </div>
<div class="content"> <div class="content">
<h1>Admin Panel</h1> <h1>Admin Panel</h1>
<div class="post"> <div class="post">
<h3>Subreddits</h3> <h2>Subreddits</h2>
<table> <table>
<tr> <tr>
<th>Subreddit</th> <th>Subreddit</th>
@ -186,19 +186,20 @@
<th>Fetch max</th> <th>Fetch max</th>
<th>Update</th> <th>Update</th>
</tr> </tr>
{% for subreddit in sub_subreddits %} {% for subreddit in subreddits %}
<tr> <tr>
<td>/r/{{ subreddit[0] }}</td> <td>/r/{{ subreddit[0] }}</td>
<td>{{ subreddit[1] }}</td> <td>{{ subreddit[1] }}</td>
<td>{{ subreddit[2] }}</td> <td>{{ subreddit[2] }}</td>
<td>{{ subreddit[3] }}</td> <td>{{ subreddit[3] }}</td>
<td><button onclick='deleteSubreddit("{{ subreddit[0] }}")'>Delete</button></td> <td><button onclick='deleteEntry("sub","{{ subreddit[0] }}")'>Delete</button></td>
</tr> </tr>
{% endfor %} {% endfor %}
<tr></tr> <tr></tr>
<tr></tr> <tr></tr>
<tr> <tr>
<form method="post"> <form method="post">
<input name="type" type="text" value="sub" hidden>
<td>/r/<input name="name" type="text"></td> <td>/r/<input name="name" type="text"></td>
<td><input name="score" type="text" value="100"></td> <td><input name="score" type="text" value="100"></td>
<td> <td>
@ -213,11 +214,34 @@
</tr> </tr>
</table> </table>
</div> </div>
<div class="post">
<h2>Blocked</h2>
<table>
<tr>
<th>Name</th>
</tr>
{% for block in blocks %}
<tr>
<td>{{ block }}</td>
<td><button onclick='deleteEntry("block", "{{ block }}")'>Delete</button></td>
</tr>
{% endfor %}
<tr></tr>
<tr></tr>
<tr>
<form method="post">
<input name="type" type="text" value="block" hidden>
<td><input name="name" type="text"></td>
<td><button type="submit">Add</button></td>
</form>
</tr>
</table>
</div>
</div> </div>
</div> </div>
<script> <script>
function deleteSubreddit(name) { function deleteEntry(type, name) {
fetch('/admin?name='+name, { fetch('/admin?name='+ name + "&type=" + type, {
method: 'DELETE' method: 'DELETE'
}).then(() => { }).then(() => {
window.location.href = window.location.href; window.location.href = window.location.href;

View File

@ -107,6 +107,11 @@
background-color: var(--darker); background-color: var(--darker);
color: var(--light); color: var(--light);
} }
a.no-style-link {
color: inherit;
text-decoration: inherit;
cursor: pointer;
}
.invert { .invert {
filter: invert(1); filter: invert(1);
transition: filter 0.3s; transition: filter 0.3s;
@ -170,8 +175,8 @@
<body> <body>
<div class="container"> <div class="container">
<div class="sidebar"> <div class="sidebar">
{% for subreddit in subreddits %} {% for link in sidebar_links %}
<a href="{{ subreddit }}">{{ subreddit }}</a> <a href="{{ link }}">{{ link }}</a>
{% endfor %} {% endfor %}
</div> </div>
<div class="content"> <div class="content">
@ -179,9 +184,16 @@
{% for post in posts %} {% for post in posts %}
<div class="post"> <div class="post">
<h3>{{ post.title }}</h3> <h3>{{ post.title }}</h3>
<span>
{% if post.subreddit %} {% if post.subreddit %}
<h5>{{ post.subreddit }}</h5> <h5>
<a href="/r/{{ post.subreddit }}" class="no-style-link">/r/{{ post.subreddit }}</a>
{% if post.author %}
— {{ post.author }}
{% endif %} {% endif %}
</h5>
{% endif %}
</span>
{% if post.media_html|length > 0 %} {% if post.media_html|length > 0 %}
<div class="media-div"> <div class="media-div">
{% for media in post.media_html %} {% for media in post.media_html %}