Add ability to block subreddits and users, add subreddit link and username to every post
This commit is contained in:
parent
9fc267ec6a
commit
7ecabdf2b7
|
|
@ -260,6 +260,7 @@ class YARS:
|
|||
post_data = post["data"]
|
||||
post_info = {
|
||||
"title": post_data["title"],
|
||||
"subreddit": post_data["subreddit"],
|
||||
"author": post_data["author"],
|
||||
"permalink": post_data["permalink"],
|
||||
"score": post_data["score"],
|
||||
|
|
|
|||
120
app/app.py
120
app/app.py
|
|
@ -58,39 +58,64 @@ def admin():
|
|||
connection = sqlite3.connect(config.db_file)
|
||||
cursor = connection.cursor()
|
||||
if request.method == 'DELETE':
|
||||
delete = """
|
||||
DELETE FROM
|
||||
subreddit
|
||||
WHERE
|
||||
subreddit = ?
|
||||
"""
|
||||
type = request.args.get("type")
|
||||
if type == "sub":
|
||||
delete = """
|
||||
DELETE FROM
|
||||
subreddit
|
||||
WHERE
|
||||
subreddit = ?
|
||||
"""
|
||||
elif type == "block":
|
||||
delete = """
|
||||
DELETE FROM
|
||||
block
|
||||
WHERE
|
||||
name = ?
|
||||
"""
|
||||
else:
|
||||
connection.close()
|
||||
return ""
|
||||
binds = [request.args.get("name")]
|
||||
cursor.execute(delete, binds)
|
||||
connection.commit()
|
||||
connection.close()
|
||||
return ""
|
||||
elif request.method == 'POST':
|
||||
upsert = """
|
||||
INSERT INTO
|
||||
subreddit (subreddit, minimum_score, fetch_by, fetch_max)
|
||||
VALUES
|
||||
(?, ?, ?, ?)
|
||||
ON CONFLICT
|
||||
(subreddit)
|
||||
DO UPDATE SET
|
||||
minimum_score=excluded.minimum_score,
|
||||
fetch_by=excluded.fetch_by,
|
||||
fetch_max=excluded.fetch_max
|
||||
"""
|
||||
binds = [
|
||||
request.form.get("name"),
|
||||
int(request.form.get("score")),
|
||||
request.form.get("by"),
|
||||
int(request.form.get("max"))
|
||||
]
|
||||
type = request.form.get("type")
|
||||
if type == "sub":
|
||||
upsert = """
|
||||
INSERT INTO
|
||||
subreddit (subreddit, minimum_score, fetch_by, fetch_max)
|
||||
VALUES
|
||||
(?, ?, ?, ?)
|
||||
ON CONFLICT
|
||||
(subreddit)
|
||||
DO UPDATE SET
|
||||
minimum_score=excluded.minimum_score,
|
||||
fetch_by=excluded.fetch_by,
|
||||
fetch_max=excluded.fetch_max
|
||||
"""
|
||||
binds = [
|
||||
request.form.get("name"),
|
||||
int(request.form.get("score")),
|
||||
request.form.get("by"),
|
||||
int(request.form.get("max"))
|
||||
]
|
||||
elif type == "block":
|
||||
upsert = """
|
||||
INSERT OR IGNORE INTO
|
||||
block (name)
|
||||
VALUES
|
||||
(?)
|
||||
"""
|
||||
binds = [request.form.get("name")]
|
||||
else:
|
||||
connection.close()
|
||||
return ""
|
||||
cursor.execute(upsert, binds)
|
||||
connection.commit()
|
||||
post_subreddits = get_subreddits(cursor)
|
||||
sidebar_links = get_sidebar_links(cursor)
|
||||
select = """
|
||||
SELECT
|
||||
subreddit,
|
||||
|
|
@ -100,16 +125,24 @@ def admin():
|
|||
FROM
|
||||
subreddit
|
||||
"""
|
||||
sub_subreddits = cursor.execute(select).fetchall()
|
||||
subreddits = cursor.execute(select).fetchall()
|
||||
select = """
|
||||
SELECT
|
||||
name
|
||||
FROM
|
||||
block
|
||||
"""
|
||||
rows = cursor.execute(select).fetchall()
|
||||
blocks = [row[0] for row in rows]
|
||||
connection.close()
|
||||
return render_template('admin.html', post_subreddits=post_subreddits, sub_subreddits=sub_subreddits)
|
||||
return render_template('admin.html', sidebar_links=sidebar_links, subreddits=subreddits, blocks=blocks)
|
||||
|
||||
@app.route('/r/all')
|
||||
def front_page():
|
||||
title = "/r/all"
|
||||
connection = sqlite3.connect(config.db_file)
|
||||
cursor = connection.cursor()
|
||||
subreddits = get_subreddits(cursor)
|
||||
sidebar_links = get_sidebar_links(cursor)
|
||||
select = """
|
||||
SELECT
|
||||
post
|
||||
|
|
@ -123,16 +156,15 @@ def front_page():
|
|||
"""
|
||||
binds = [False, config.posts_per_page_load]
|
||||
posts = get_posts_from_select(cursor, select, binds)
|
||||
add_subreddits_to_posts(posts)
|
||||
connection.close()
|
||||
return render_template('index.html', title=title, posts=posts, subreddits=subreddits)
|
||||
return render_template('index.html', title=title, posts=posts, sidebar_links=sidebar_links)
|
||||
|
||||
@app.route('/r/other')
|
||||
def other_page():
|
||||
title = "/r/other"
|
||||
connection = sqlite3.connect(config.db_file)
|
||||
cursor = connection.cursor()
|
||||
subreddits = get_subreddits(cursor)
|
||||
sidebar_links = get_sidebar_links(cursor)
|
||||
select = """
|
||||
SELECT
|
||||
post
|
||||
|
|
@ -165,16 +197,15 @@ def other_page():
|
|||
"""
|
||||
binds = [False, False, config.other_posts_cutoff, config.posts_per_page_load]
|
||||
posts = get_posts_from_select(cursor, select, binds)
|
||||
add_subreddits_to_posts(posts)
|
||||
connection.close()
|
||||
return render_template('index.html', title=title, posts=posts, subreddits=subreddits)
|
||||
return render_template('index.html', title=title, posts=posts, sidebar_links=sidebar_links)
|
||||
|
||||
@app.route('/r/<path:subreddit>')
|
||||
def get_subreddit(subreddit):
|
||||
title = f"/r/{subreddit}"
|
||||
connection = sqlite3.connect(config.db_file)
|
||||
cursor = connection.cursor()
|
||||
subreddits = get_subreddits(cursor)
|
||||
sidebar_links = get_sidebar_links(cursor)
|
||||
select = """
|
||||
SELECT
|
||||
post
|
||||
|
|
@ -190,9 +221,9 @@ def get_subreddit(subreddit):
|
|||
binds = [subreddit, False, config.posts_per_page_load]
|
||||
posts = get_posts_from_select(cursor, select, binds)
|
||||
connection.close()
|
||||
return render_template('index.html', title=title, posts=posts, subreddits=subreddits)
|
||||
return render_template('index.html', title=title, posts=posts, sidebar_links=sidebar_links)
|
||||
|
||||
def get_subreddits(cursor):
|
||||
def get_sidebar_links(cursor):
|
||||
select = """
|
||||
SELECT
|
||||
subreddit
|
||||
|
|
@ -215,16 +246,17 @@ def get_subreddits(cursor):
|
|||
"""
|
||||
binds = [False, config.other_posts_cutoff]
|
||||
results = cursor.execute(select, binds).fetchall()
|
||||
subreddits = [f"/r/{sub[0]}" for sub in results]
|
||||
subreddits.insert(0, "/r/all")
|
||||
subreddits.append("/r/other")
|
||||
subreddits.append("/admin")
|
||||
return subreddits
|
||||
links = [f"/r/{sub[0]}" for sub in results]
|
||||
links.insert(0, "/r/all")
|
||||
links.append("/r/other")
|
||||
links.append("/admin")
|
||||
return links
|
||||
|
||||
def get_posts_from_select(cursor, select, binds):
|
||||
results = cursor.execute(select, binds).fetchall()
|
||||
posts = [json.loads(post[0]) for post in results]
|
||||
add_media_html_to_posts(posts)
|
||||
add_subreddits_to_posts(posts)
|
||||
return posts
|
||||
|
||||
def add_media_html_to_posts(posts):
|
||||
|
|
@ -239,9 +271,11 @@ def add_media_html_to_posts(posts):
|
|||
post["media_html"] = media_html
|
||||
|
||||
def add_subreddits_to_posts(posts):
|
||||
# todo, remove after 30 days once subreddit is naturally a part of the post data
|
||||
for post in posts:
|
||||
m = re.search(r"\/r\/([a-zA-Z0-9_]+)\/.*", post["permalink"])
|
||||
post["subreddit"] = f"/r/{m.group(1)}"
|
||||
if "subreddit" not in post:
|
||||
m = re.search(r"\/r\/([a-zA-Z0-9_]+)\/.*", post["permalink"])
|
||||
post["subreddit"] = m.group(1)
|
||||
|
||||
|
||||
def get_media_html(file, priority=False):
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
# Scheduler configuration
|
||||
max_age_days = 30
|
||||
max_age_seconds = max_age_days * 24 * 60 * 60
|
||||
other_posts_cutoff = 1 #subreddits with this many unread posts or fewer are merged to /r/other
|
||||
other_posts_cutoff = 4 #subreddits with this many unread posts or fewer are merged to /r/other
|
||||
|
||||
# Webpage configuration
|
||||
posts_per_page_load = 50
|
||||
posts_per_page_load = 25
|
||||
|
||||
db_dir = "/reddit/db"
|
||||
media_dir = "/reddit/media"
|
||||
|
|
|
|||
|
|
@ -4,42 +4,50 @@ import time
|
|||
import sqlite3
|
||||
import subprocess
|
||||
|
||||
if __name__ == "__main__":
|
||||
connection = sqlite3.connect(config.db_file)
|
||||
cursor = connection.cursor()
|
||||
now = int(time.time())
|
||||
max_created_utc = now - config.max_age_seconds
|
||||
print("Deleting old posts")
|
||||
delete = "DELETE FROM post WHERE created_utc < ?"
|
||||
binds = [max_created_utc]
|
||||
cursor.execute(delete, binds)
|
||||
print("Deleting old media db rows")
|
||||
delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)"
|
||||
cursor.execute(delete)
|
||||
print("Deleving media db for read posts")
|
||||
delete = "DELETE FROM media WHERE permalink IN (SELECT permalink FROM post WHERE hidden = ?)"
|
||||
binds = [True]
|
||||
cursor.execute(delete, binds)
|
||||
print("Updating media_fetched for read posts")
|
||||
update = "UPDATE post SET media_fetched = ? WHERE hidden = ?"
|
||||
binds = [False, True]
|
||||
cursor.execute(update, binds)
|
||||
all_files_local = subprocess.run(["find", "/reddit/media", "-type", "f"], capture_output=True, text=True)
|
||||
all_files_local = set(all_files_local.stdout.splitlines())
|
||||
select = "SELECT local from media"
|
||||
results = cursor.execute(select).fetchall()
|
||||
connection.commit()
|
||||
connection.close()
|
||||
all_files_db = set([row[0] for row in results])
|
||||
extra_files = all_files_local - all_files_db
|
||||
print("Deleting old files")
|
||||
for file in extra_files:
|
||||
print(f"Removing {file}")
|
||||
os.remove(file)
|
||||
empty_dirs = subprocess.run(["find", "/reddit/media", "-type", "d", "-empty"], capture_output=True, text=True)
|
||||
empty_dirs = set(empty_dirs.stdout.splitlines())
|
||||
print("Deleting empty directories")
|
||||
for dir in empty_dirs:
|
||||
print(f"Removind dir {dir}")
|
||||
os.rmdir(dir)
|
||||
print("Done")
|
||||
def run():
|
||||
connection = sqlite3.connect(config.db_file)
|
||||
cursor = connection.cursor()
|
||||
now = int(time.time())
|
||||
max_created_utc = now - config.max_age_seconds
|
||||
print("Deleting old posts")
|
||||
delete = "DELETE FROM post WHERE created_utc < ?"
|
||||
binds = [max_created_utc]
|
||||
cursor.execute(delete, binds)
|
||||
print("Deleting posts from blocked subreddits")
|
||||
select = "SELECT name FROM block WHERE name like '/r/%'"
|
||||
binds = [row[0][3:] for row in cursor.execute(select).fetchall()]
|
||||
bind_array = ",".join(["?"]*len(binds))
|
||||
delete = f"DELETE FROM post WHERE subreddit IN ({bind_array})"
|
||||
cursor.execute(delete, binds)
|
||||
print("Deleting old media db rows")
|
||||
delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)"
|
||||
cursor.execute(delete)
|
||||
print("Deleving media db for read posts")
|
||||
delete = "DELETE FROM media WHERE permalink IN (SELECT permalink FROM post WHERE hidden = ?)"
|
||||
binds = [True]
|
||||
cursor.execute(delete, binds)
|
||||
print("Updating media_fetched for read posts")
|
||||
update = "UPDATE post SET media_fetched = ? WHERE hidden = ?"
|
||||
binds = [False, True]
|
||||
cursor.execute(update, binds)
|
||||
all_files_local = subprocess.run(["find", "/reddit/media", "-type", "f"], capture_output=True, text=True)
|
||||
all_files_local = set(all_files_local.stdout.splitlines())
|
||||
select = "SELECT local from media"
|
||||
results = cursor.execute(select).fetchall()
|
||||
connection.commit()
|
||||
connection.close()
|
||||
all_files_db = set([row[0] for row in results])
|
||||
extra_files = all_files_local - all_files_db
|
||||
print("Deleting old files")
|
||||
for file in extra_files:
|
||||
print(f"Removing {file}")
|
||||
os.remove(file)
|
||||
empty_dirs = subprocess.run(["find", "/reddit/media", "-type", "d", "-empty"], capture_output=True, text=True)
|
||||
empty_dirs = set(empty_dirs.stdout.splitlines())
|
||||
print("Deleting empty directories")
|
||||
for dir in empty_dirs:
|
||||
print(f"Removind dir {dir}")
|
||||
os.rmdir(dir)
|
||||
print("Done")
|
||||
|
||||
run()
|
||||
|
|
@ -9,5 +9,6 @@ cursor = connection.cursor()
|
|||
cursor.execute("CREATE TABLE IF NOT EXISTS post(permalink primary key, subreddit, created_utc, score, media_fetched, post, hidden)")
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS media(permalink, url , local, PRIMARY KEY (permalink, url))")
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS subreddit(subreddit primary key, minimum_score, fetch_by, fetch_max)")
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS block(name primary key)")
|
||||
connection.commit()
|
||||
connection.close()
|
||||
|
|
@ -18,7 +18,7 @@ from yars.utils import download_image
|
|||
miner = YARS()
|
||||
|
||||
# Function to scrape subreddit post details and save to JSON
|
||||
def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5):
|
||||
def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5, blocked_subs=[], blocked_users=[]):
|
||||
ret = []
|
||||
print(f"Starting {subreddit} with min score {minimum_score}, by {pull_by}, limit {limit}")
|
||||
empty = dict()
|
||||
|
|
@ -32,6 +32,7 @@ def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5):
|
|||
break
|
||||
post_data = {
|
||||
"permalink": post.get("permalink"),
|
||||
"subreddit": post.get("subreddit"),
|
||||
"title": post.get("title", ""),
|
||||
"author": post.get("author", ""),
|
||||
"created_utc": post.get("created_utc", ""),
|
||||
|
|
@ -40,6 +41,12 @@ def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5):
|
|||
"media_urls" : post.get("media_urls", []),
|
||||
"body": post.get("body", None),
|
||||
}
|
||||
if post_data["subreddit"] in blocked_subs:
|
||||
print(f'Ignoring post from {post_data["subreddit"]}')
|
||||
continue
|
||||
if post_data["author"] in blocked_users:
|
||||
print(f'Ignoring post from {post_data["author"]}')
|
||||
continue
|
||||
ret.append(post_data)
|
||||
print(f"Finished {subreddit}")
|
||||
return ret
|
||||
|
|
@ -56,8 +63,7 @@ def save_posts_to_db(data, cursor):
|
|||
binds = []
|
||||
for post in data:
|
||||
binds.append(post["permalink"])
|
||||
m = re.search(r"\/r\/([a-zA-Z0-9_]+)\/.*", post["permalink"])
|
||||
binds.append(m.group(1)) #subreddit
|
||||
binds.append(post["subreddit"])
|
||||
binds.append(post["created_utc"])
|
||||
binds.append(post["score"])
|
||||
binds.append(False)
|
||||
|
|
@ -103,6 +109,23 @@ def download_comments_for_permalink(permalink, cursor):
|
|||
binds = [permalink, post_details["comments"]]
|
||||
cursor.execute(upsert, binds)
|
||||
|
||||
def get_blocks(cursor):
|
||||
select = """
|
||||
SELECT
|
||||
name
|
||||
FROM
|
||||
block
|
||||
"""
|
||||
blocks = [row[0] for row in cursor.execute(select)]
|
||||
subs = []
|
||||
users = []
|
||||
for block in blocks:
|
||||
if "/r/" in block:
|
||||
subs.append(block[3:])
|
||||
elif "/u/" in block:
|
||||
users.append(block[3:])
|
||||
return subs, users
|
||||
|
||||
# Main execution
|
||||
if __name__ == "__main__":
|
||||
os.makedirs(config.media_dir, exist_ok=True)
|
||||
|
|
@ -118,8 +141,9 @@ if __name__ == "__main__":
|
|||
subreddit
|
||||
"""
|
||||
subreddits = cursor.execute(select).fetchall()
|
||||
blocked_subs, blocked_users = get_blocks(cursor)
|
||||
for subreddit in subreddits:
|
||||
post_data = scrape_subreddit_data(subreddit[0], subreddit[1], subreddit[2], subreddit[3])
|
||||
post_data = scrape_subreddit_data(subreddit[0], subreddit[1], subreddit[2], subreddit[3], blocked_subs, blocked_users)
|
||||
save_posts_to_db(post_data, cursor)
|
||||
connection.commit()
|
||||
download_media(cursor)
|
||||
|
|
|
|||
|
|
@ -170,14 +170,14 @@
|
|||
<body>
|
||||
<div class="container">
|
||||
<div class="sidebar">
|
||||
{% for subreddit in post_subreddits %}
|
||||
<a href="{{ subreddit }}">{{ subreddit }}</a>
|
||||
{% for link in sidebar_links %}
|
||||
<a href="{{ link }}">{{ link }}</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<div class="content">
|
||||
<h1>Admin Panel</h1>
|
||||
<div class="post">
|
||||
<h3>Subreddits</h3>
|
||||
<h2>Subreddits</h2>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Subreddit</th>
|
||||
|
|
@ -186,19 +186,20 @@
|
|||
<th>Fetch max</th>
|
||||
<th>Update</th>
|
||||
</tr>
|
||||
{% for subreddit in sub_subreddits %}
|
||||
{% for subreddit in subreddits %}
|
||||
<tr>
|
||||
<td>/r/{{ subreddit[0] }}</td>
|
||||
<td>{{ subreddit[1] }}</td>
|
||||
<td>{{ subreddit[2] }}</td>
|
||||
<td>{{ subreddit[3] }}</td>
|
||||
<td><button onclick='deleteSubreddit("{{ subreddit[0] }}")'>Delete</button></td>
|
||||
<td><button onclick='deleteEntry("sub","{{ subreddit[0] }}")'>Delete</button></td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
<tr></tr>
|
||||
<tr></tr>
|
||||
<tr>
|
||||
<form method="post">
|
||||
<input name="type" type="text" value="sub" hidden>
|
||||
<td>/r/<input name="name" type="text"></td>
|
||||
<td><input name="score" type="text" value="100"></td>
|
||||
<td>
|
||||
|
|
@ -213,11 +214,34 @@
|
|||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<div class="post">
|
||||
<h2>Blocked</h2>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
</tr>
|
||||
{% for block in blocks %}
|
||||
<tr>
|
||||
<td>{{ block }}</td>
|
||||
<td><button onclick='deleteEntry("block", "{{ block }}")'>Delete</button></td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
<tr></tr>
|
||||
<tr></tr>
|
||||
<tr>
|
||||
<form method="post">
|
||||
<input name="type" type="text" value="block" hidden>
|
||||
<td><input name="name" type="text"></td>
|
||||
<td><button type="submit">Add</button></td>
|
||||
</form>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
function deleteSubreddit(name) {
|
||||
fetch('/admin?name='+name, {
|
||||
function deleteEntry(type, name) {
|
||||
fetch('/admin?name='+ name + "&type=" + type, {
|
||||
method: 'DELETE'
|
||||
}).then(() => {
|
||||
window.location.href = window.location.href;
|
||||
|
|
|
|||
|
|
@ -107,6 +107,11 @@
|
|||
background-color: var(--darker);
|
||||
color: var(--light);
|
||||
}
|
||||
a.no-style-link {
|
||||
color: inherit;
|
||||
text-decoration: inherit;
|
||||
cursor: pointer;
|
||||
}
|
||||
.invert {
|
||||
filter: invert(1);
|
||||
transition: filter 0.3s;
|
||||
|
|
@ -170,8 +175,8 @@
|
|||
<body>
|
||||
<div class="container">
|
||||
<div class="sidebar">
|
||||
{% for subreddit in subreddits %}
|
||||
<a href="{{ subreddit }}">{{ subreddit }}</a>
|
||||
{% for link in sidebar_links %}
|
||||
<a href="{{ link }}">{{ link }}</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<div class="content">
|
||||
|
|
@ -179,9 +184,16 @@
|
|||
{% for post in posts %}
|
||||
<div class="post">
|
||||
<h3>{{ post.title }}</h3>
|
||||
{% if post.subreddit %}
|
||||
<h5>{{ post.subreddit }}</h5>
|
||||
{% endif %}
|
||||
<span>
|
||||
{% if post.subreddit %}
|
||||
<h5>
|
||||
<a href="/r/{{ post.subreddit }}" class="no-style-link">/r/{{ post.subreddit }}</a>
|
||||
{% if post.author %}
|
||||
— {{ post.author }}
|
||||
{% endif %}
|
||||
</h5>
|
||||
{% endif %}
|
||||
</span>
|
||||
{% if post.media_html|length > 0 %}
|
||||
<div class="media-div">
|
||||
{% for media in post.media_html %}
|
||||
|
|
|
|||
Loading…
Reference in New Issue