Add ability to block posts with certain words, e.x. Trump
This commit is contained in:
parent
e83f646dd8
commit
e6d6df6299
|
|
@ -196,6 +196,7 @@ def other_page():
|
||||||
post
|
post
|
||||||
WHERE
|
WHERE
|
||||||
hidden = ? AND
|
hidden = ? AND
|
||||||
|
saved = ? AND
|
||||||
subreddit IN
|
subreddit IN
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
|
|
@ -220,7 +221,7 @@ def other_page():
|
||||||
created_utc asc
|
created_utc asc
|
||||||
LIMIT ?
|
LIMIT ?
|
||||||
"""
|
"""
|
||||||
binds = [False, False, False, config.other_posts_cutoff, config.posts_per_page_load]
|
binds = [False, False, False, False, config.other_posts_cutoff, config.posts_per_page_load]
|
||||||
posts = get_posts_from_select(cursor, select, binds)
|
posts = get_posts_from_select(cursor, select, binds)
|
||||||
connection.close()
|
connection.close()
|
||||||
return render_template('index.html', title=title, posts=posts, sidebar_links=sidebar_links)
|
return render_template('index.html', title=title, posts=posts, sidebar_links=sidebar_links)
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,12 @@ def run():
|
||||||
binds.append(False)
|
binds.append(False)
|
||||||
delete = f"DELETE FROM post WHERE author IN ({bind_array}) AND saved = ?"
|
delete = f"DELETE FROM post WHERE author IN ({bind_array}) AND saved = ?"
|
||||||
cursor.execute(delete, binds)
|
cursor.execute(delete, binds)
|
||||||
|
print("Deleting posts with blocked words")
|
||||||
|
select = "SELECT name FROM block where name NOT LIKE '/u/%' AND name NOT LIKE '/r/%'"
|
||||||
|
binds = ["%"+row[0]+"%" for row in cursor.execute(select).fetchall()]
|
||||||
|
where_array = " OR ".join(["post LIKE ?"]*len(binds))
|
||||||
|
delete = f"DELETE FROM post WHERE {where_array}"
|
||||||
|
cursor.execute(delete, binds)
|
||||||
print("Deleting old media db rows")
|
print("Deleting old media db rows")
|
||||||
delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)"
|
delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)"
|
||||||
cursor.execute(delete)
|
cursor.execute(delete)
|
||||||
|
|
|
||||||
|
|
@ -18,9 +18,9 @@ from yars.utils import download_image
|
||||||
miner = YARS()
|
miner = YARS()
|
||||||
|
|
||||||
# Function to scrape subreddit post details and save to JSON
|
# Function to scrape subreddit post details and save to JSON
|
||||||
def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5, blocked_subs=[], blocked_users=[]):
|
def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5, blocked_subs=[], blocked_users=[], blocked_words=[]):
|
||||||
ret = []
|
ret = []
|
||||||
print(f"Starting {subreddit} with min score {minimum_score}, by {pull_by}, limit {limit}")
|
print(f"Starting {subreddit}")
|
||||||
empty = dict()
|
empty = dict()
|
||||||
try:
|
try:
|
||||||
subreddit_posts = miner.fetch_subreddit_posts(
|
subreddit_posts = miner.fetch_subreddit_posts(
|
||||||
|
|
@ -42,12 +42,16 @@ def scrape_subreddit_data(subreddit, minimum_score=100, pull_by="day", limit=5,
|
||||||
"body": post.get("body", None),
|
"body": post.get("body", None),
|
||||||
}
|
}
|
||||||
if post_data["subreddit"] in blocked_subs:
|
if post_data["subreddit"] in blocked_subs:
|
||||||
print(f'Ignoring post from {post_data["subreddit"]}')
|
|
||||||
continue
|
continue
|
||||||
if post_data["author"] in blocked_users:
|
if post_data["author"] in blocked_users:
|
||||||
print(f'Ignoring post from {post_data["author"]}')
|
|
||||||
continue
|
continue
|
||||||
ret.append(post_data)
|
for word in blocked_words:
|
||||||
|
if word in post_data["title"]:
|
||||||
|
break
|
||||||
|
if post_data["body"] is not None and word in post_data["body"]:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
ret.append(post_data)
|
||||||
print(f"Finished {subreddit}")
|
print(f"Finished {subreddit}")
|
||||||
return ret
|
return ret
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -121,12 +125,15 @@ def get_blocks(cursor):
|
||||||
blocks = [row[0] for row in cursor.execute(select)]
|
blocks = [row[0] for row in cursor.execute(select)]
|
||||||
subs = []
|
subs = []
|
||||||
users = []
|
users = []
|
||||||
|
words = []
|
||||||
for block in blocks:
|
for block in blocks:
|
||||||
if "/r/" in block:
|
if "/r/" in block:
|
||||||
subs.append(block[3:])
|
subs.append(block[3:])
|
||||||
elif "/u/" in block:
|
elif "/u/" in block:
|
||||||
users.append(block[3:])
|
users.append(block[3:])
|
||||||
return subs, users
|
else:
|
||||||
|
words.append(block)
|
||||||
|
return subs, users, words
|
||||||
|
|
||||||
# Main execution
|
# Main execution
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
@ -143,9 +150,9 @@ if __name__ == "__main__":
|
||||||
subreddit
|
subreddit
|
||||||
"""
|
"""
|
||||||
subreddits = cursor.execute(select).fetchall()
|
subreddits = cursor.execute(select).fetchall()
|
||||||
blocked_subs, blocked_users = get_blocks(cursor)
|
blocked_subs, blocked_users, blocked_words = get_blocks(cursor)
|
||||||
for subreddit in subreddits:
|
for subreddit in subreddits:
|
||||||
post_data = scrape_subreddit_data(subreddit[0], subreddit[1], subreddit[2], subreddit[3], blocked_subs, blocked_users)
|
post_data = scrape_subreddit_data(subreddit[0], subreddit[1], subreddit[2], subreddit[3], blocked_subs, blocked_users, blocked_words)
|
||||||
save_posts_to_db(post_data, cursor)
|
save_posts_to_db(post_data, cursor)
|
||||||
connection.commit()
|
connection.commit()
|
||||||
download_media(cursor)
|
download_media(cursor)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue