Initial Release

2025-12-23 13:44:14 -06:00 · 2025-12-23 13:44:14 -06:00 · 6609e7253b
commit 6609e7253b
23 changed files with 9093 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
 media/
 db/
 **/YARS.log
 **/__pycache__/
 **/.venv/
--- a/README.md
+++ b/README.md
@ -0,0 +1,28 @@
 ### Places to configure
 ## ./compose.yaml
 You can change the host port, host volume directories, how often reddit is scanned for new data, and how often old data is removed.
 ## ./app/config.py
 You can change how much data is pulled, from where, the minimum score to save it to your DB, and how long it is retained.
 ### Startup
 ```
 docker compose build
 docker compose up
 ```
 The DB is created automatically. You will want to run
 ```
 docker exec -it reddit-web-1 sh -c "python3 /app/scrape_posts.py"
 ```
 to populate the DB with initial data, or you will have to wait for the scheduled task to get triggered for the web page to be usable.
 ### Thanks
 This wouldn't be possible without https://github.com/datavorous/yars, which has been yoinked and tweaked to handle some more complex media cases. I will port my knowledge back to the project when time permits.
--- a/app/Dockerfile
+++ b/app/Dockerfile
@ -0,0 +1,27 @@
 # syntax=docker/dockerfile:1.4
 FROM --platform=$BUILDPLATFORM python:3.10-alpine AS builder
 WORKDIR /app
 COPY requirements.txt /app
 RUN --mount=type=cache,target=/root/.cache/pip \
    pip3 install -r requirements.txt
 COPY . /app
 ENTRYPOINT ["python3"]
 CMD ["app.py"]
 FROM builder as dev-envs
 RUN <<EOF
 apk update
 apk add git
 EOF
 RUN <<EOF
 addgroup -S docker
 adduser -S --shell /bin/bash --ingroup docker vscode
 EOF
 # install Docker tools (cli, buildx, compose)
 COPY --from=gloursdocker/docker / /
--- a/app/YARS/.gitignore
+++ b/app/YARS/.gitignore
@ -0,0 +1,162 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
 .pdm-python
 .pdm-build/
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
--- a/app/YARS/.pre-commit-config.yaml
+++ b/app/YARS/.pre-commit-config.yaml
@ -0,0 +1,41 @@
 ci:
  autofix_commit_msg: "[pre-commit.ci] auto fixes from pre-commit.com hooks"
  autofix_prs: true
  autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate"
  autoupdate_schedule: quarterly
  submodules: false
 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v5.0.0
    hooks:
      - id: trailing-whitespace
      - id: end-of-file-fixer
      - id: check-ast
      - id: check-yaml
      - id: check-toml
      - id: check-merge-conflict
      - id: mixed-line-ending
      - id: check-case-conflict
      - id: sort-simple-yaml
        files: .pre-commit-config.yaml
  - repo: https://github.com/hadialqattan/pycln
    rev: v2.4.0
    hooks:
      - id: pycln
        args: [--config=pyproject.toml, src]
        types: [file]
        types_or: [python, pyi]
  - repo: https://github.com/psf/black-pre-commit-mirror
    rev: 24.8.0
    hooks:
      - id: black
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.6.9
    hooks:
      - id: ruff
        types: [file]
        types_or: [python, pyi, toml]
  - repo: https://github.com/codespell-project/codespell
    rev: v2.3.0
    hooks:
      - id: codespell
--- a/app/YARS/LICENSE
+++ b/app/YARS/LICENSE
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2024 Sagnik Bhattacharjee
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/app/YARS/README.md
+++ b/app/YARS/README.md
@ -0,0 +1,172 @@
 <div align="center">
 <img src="logo.svg" width="10%">
 # YARS (Yet Another Reddit Scraper)
 [![GitHub stars](https://img.shields.io/github/stars/datavorous/yars.svg?style=social&label=Stars&style=plastic)](https://github.com/datavorous/yars/stargazers)<br>
 </div>
 YARS is a Python package designed to simplify the process of scraping Reddit for posts, comments, user data, and other media. The package also includes utility functions. It is built using **Python** and relies on the **requests** module for fetching data from Reddit’s public API. The scraper uses simple `.json` requests, avoiding the need for official Reddit API keys, making it lightweight and easy to use.
 ## Features
 - **Reddit Search**: Search Reddit for posts using a keyword query.
 - **Post Scraping**: Scrape post details, including title, body, and comments.
 - **User Data Scraping**: Fetch recent activity (posts and comments) of a Reddit user.
 - **Subreddit Posts Fetching**: Retrieve posts from specific subreddits with flexible options for category and time filters.
 - **Image Downloading**: Download images from posts.
 - **Results Display**: Utilize `Pygments` for colorful display of JSON-formatted results.
 > [!WARNING]
 > Use with rotating proxies, or Reddit might gift you with an IP ban.  
 > I could extract max 2552 posts at once from 'r/all' using this.  
 > [Here](https://files.catbox.moe/zdra2i.json) is a **7.1 MB JSON** file containing the top 100 posts from 'r/nosleep', which included post titles, body text, all comments and their replies, post scores, time of upload etc.
 ## Dependencies
 - `requests`
 - `Pygments`
 ## Installation
 1. Clone the repository:
   ```
   git clone https://github.com/datavorous/YARS.git
   ```
   Navigate inside the ```src``` folder.
 2. Install ```uv``` (if not already installed):
   ```
   pip install uv
   ```
 3. Run the application:
   ```
   uv run example/example.py
   ```
   It'll setup the virtual env, install the necessary packages and run the ```example.py``` program.
 ## Usage
 We will use the following Python script to demonstrate the functionality of the scraper. The script includes:
 - Searching Reddit
 - Scraping post details
 - Fetching user data
 - Retrieving subreddit posts
 - Downloading images from posts
 #### Code Overview
 ```python
 from yars import YARS
 from utils import display_results, download_image
 miner = YARS()
 ```
 #### Step 1: Searching Reddit
 The `search_reddit` method allows you to search Reddit using a query string. Here, we search for posts containing "OpenAI" and limit the results to 3 posts. The `display_results` function is used to present the results in a formatted way.
 ```python
 search_results = miner.search_reddit("OpenAI", limit=3)
 display_results(search_results, "SEARCH")
 ```
 #### Step 2: Scraping Post Details
 Next, we scrape details of a specific Reddit post by passing its permalink. If the post details are successfully retrieved, they are displayed using `display_results`. Otherwise, an error message is printed.
 ```python
 permalink = "https://www.reddit.com/r/getdisciplined/comments/1frb5ib/what_single_health_test_or_practice_has/".split('reddit.com')[1]
 post_details = miner.scrape_post_details(permalink)
 if post_details:
    display_results(post_details, "POST DATA")
 else:
    print("Failed to scrape post details.")
 ```
 #### Step 3: Fetching User Data
 We can also retrieve a Reddit user’s recent activity (posts and comments) using the `scrape_user_data` method. Here, we fetch data for the user `iamsecb` and limit the results to 2 items.
 ```python
 user_data = miner.scrape_user_data("iamsecb", limit=2)
 display_results(user_data, "USER DATA")
 ```
 #### Step 4: Fetching Subreddit Posts
 The `fetch_subreddit_posts` method retrieves posts from a specified subreddit. In this example, we fetch 11 top posts from the "generative" subreddit from the past week.
 ```python
 subreddit_posts = miner.fetch_subreddit_posts("generative", limit=11, category="top", time_filter="week")
 display_results(subreddit_posts, "EarthPorn SUBREDDIT New Posts")
 ```
 #### Step 5: Downloading Images
 For the posts retrieved from the subreddit, we try to download their associated images. The `download_image` function is used for this. If the post doesn't have an `image_url`, the thumbnail URL is used as a fallback.
 ```python
 for z in range(3):
    try:
        image_url = subreddit_posts[z]["image_url"]
    except:
        image_url = subreddit_posts[z]["thumbnail_url"]
    download_image(image_url)
 ```
 ### Complete Code Example
 ```python
 from yars import YARS
 from utils import display_results, download_image
 miner = YARS()
 # Search for posts related to "OpenAI"
 search_results = miner.search_reddit("OpenAI", limit=3)
 display_results(search_results, "SEARCH")
 # Scrape post details using its permalink
 permalink = "https://www.reddit.com/r/getdisciplined/comments/1frb5ib/what_single_health_test_or_practice_has/".split('reddit.com')[1]
 post_details = miner.scrape_post_details(permalink)
 if post_details:
    display_results(post_details, "POST DATA")
 else:
    print("Failed to scrape post details.")
 # Fetch recent activity of user "iamsecb"
 user_data = miner.scrape_user_data("iamsecb", limit=2)
 display_results(user_data, "USER DATA")
 # Fetch top posts from the subreddit "generative" from the past week
 subreddit_posts = miner.fetch_subreddit_posts("generative", limit=11, category="top", time_filter="week")
 display_results(subreddit_posts, "EarthPorn SUBREDDIT New Posts")
 # Download images from the fetched posts
 for z in range(3):
    try:
        image_url = subreddit_posts[z]["image_url"]
    except:
        image_url = subreddit_posts[z]["thumbnail_url"]
    download_image(image_url)
 ```
 You can now use these techniques to explore and scrape data from Reddit programmatically.
 ## Contributing
 Contributions are welcome! For feature requests, bug reports, or questions, please open an issue. If you would like to contribute code, please open a pull request with your changes.
 ### Our Notable Contributors
 <a href="https://github.com/datavorous/yars/graphs/contributors">
  <img src="https://contrib.rocks/image?repo=datavorous/yars" />
--- a/app/YARS/logo.svg
+++ b/app/YARS/logo.svg
@ -0,0 +1,4 @@
 <svg width="217" height="229" viewBox="0 0 217 229" fill="none" xmlns="http://www.w3.org/2000/svg">
 <path d="M206.56 195.464C189.638 211.477 168.078 222.382 144.606 226.8C121.134 231.218 96.8052 228.95 74.6953 220.284C52.5854 211.618 33.6878 196.942 20.3922 178.113C7.09653 159.283 0 137.146 0 114.5C0 91.854 7.09653 69.7166 20.3922 50.8872C33.6878 32.0578 52.5855 17.382 74.6953 8.71579C96.8052 0.0495596 121.134 -2.21792 144.606 2.2001C168.078 6.61811 189.638 17.5232 206.56 33.5363L179.384 59.2528C167.836 48.3259 153.124 40.8846 137.108 37.8699C121.092 34.8552 104.49 36.4025 89.4031 42.316C74.316 48.2296 61.4208 58.2439 52.3483 71.0925C43.2757 83.9412 38.4332 99.0471 38.4332 114.5C38.4332 129.953 43.2757 145.059 52.3483 157.907C61.4208 170.756 74.316 180.77 89.4031 186.684C104.49 192.598 121.092 194.145 137.108 191.13C153.124 188.115 167.836 180.674 179.384 169.747L206.56 195.464Z" fill="#FB471A"/>
 <path d="M52.2642 51.0675C65.76 38.6206 82.9546 30.1442 101.674 26.7101C120.393 23.276 139.796 25.0385 157.429 31.7747C175.062 38.5109 190.133 49.9183 200.737 64.5543C211.34 79.1902 217 96.3975 217 114C217 131.603 211.34 148.81 200.737 163.446C190.133 178.082 175.062 189.489 157.429 196.225C139.796 202.961 120.393 204.724 101.674 201.29C82.9546 197.856 65.7599 189.379 52.2642 176.932L80.2341 151.136C88.1979 158.481 98.3445 163.483 109.391 165.51C120.437 167.536 131.886 166.496 142.292 162.521C152.697 158.546 161.591 151.815 167.848 143.178C174.105 134.541 177.445 124.387 177.445 114C177.445 103.613 174.105 93.4588 167.848 84.8221C161.591 76.1854 152.697 69.454 142.292 65.4789C131.886 61.5039 120.437 60.4639 109.391 62.4903C98.3445 64.5168 88.1979 69.5187 80.2341 76.8636L52.2642 51.0675Z" fill="#FFBA4A"/>
 </svg>
--- a/app/YARS/src/README.md
+++ b/app/YARS/src/README.md
--- a/app/YARS/src/pyproject.toml
+++ b/app/YARS/src/pyproject.toml
@ -0,0 +1,12 @@
 [project]
 name = "sm"
 version = "0.1.0"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
    "flask>=3.0.3",
    "meta-ai-api>=1.2.1",
    "pygments>=2.18.0",
    "requests>=2.32.3",
 ]
--- a/app/YARS/src/yars/init.py
+++ b/app/YARS/src/yars/init.py
--- a/app/YARS/src/yars/agents.py
+++ b/app/YARS/src/yars/agents.py
--- a/app/YARS/src/yars/sessions.py
+++ b/app/YARS/src/yars/sessions.py
@ -0,0 +1,15 @@
 from requests import Session
 from .agents import get_agent
 class RandomUserAgentSession(Session):
    """
    Session class (inherited from requests.Session) which passes
    a random user agent with each request
    """
    def request(self, *args, **kwargs):
        self.headers.update({"User-Agent": get_agent()})
        return super().request(*args, **kwargs)
--- a/app/YARS/src/yars/utils.py
+++ b/app/YARS/src/yars/utils.py
@ -0,0 +1,96 @@
 import os
 import csv
 import json
 import logging
 import requests
 from urllib.parse import urlparse
 from pygments import formatters, highlight, lexers
 logging.basicConfig(
    level=logging.INFO, filename="YARS.log", format="%(asctime)s - %(message)s"
 )
 def display_results(results, title):
    try:
        print(f"\n{'-'*20} {title} {'-'*20}")
        if isinstance(results, list):
            for item in results:
                if isinstance(item, dict):
                    formatted_json = json.dumps(item, sort_keys=True, indent=4)
                    colorful_json = highlight(
                        formatted_json,
                        lexers.JsonLexer(),
                        formatters.TerminalFormatter(),
                    )
                    print(colorful_json)
                else:
                    print(item)
        elif isinstance(results, dict):
            formatted_json = json.dumps(results, sort_keys=True, indent=4)
            colorful_json = highlight(
                formatted_json, lexers.JsonLexer(), formatters.TerminalFormatter()
            )
            print(colorful_json)
        else:
            logging.warning(
                "No results to display: expected a list or dictionary, got %S",
                type(results),
            )
            print("No results to display.")
    except Exception as e:
        logging.error(f"Error displaying results: {e}")
        print("Error displaying results.")
 def download_image(image_url, output_folder="images", session=None):
    os.makedirs(output_folder, exist_ok=True)
    filename = urlparse(image_url).path
    if filename[0]=='/':
        filename = filename[1:]
    filepath = os.path.join(output_folder, filename)
    os.makedirs(filepath[:filepath.rfind('/')], exist_ok=True)
    if session is None:
        session = requests.Session()
    try:
        response = session.get(image_url, stream=True)
        response.raise_for_status()
        with open(filepath, "wb") as f:
            for chunk in response.iter_content(8192):
                f.write(chunk)
        logging.info("Downloaded: %s", filepath)
        return filepath
    except requests.RequestException as e:
        logging.error("Failed to download %s: %s", image_url, e)
        return None
    except Exception as e:
        logging.error("An error occurred while saving the image: %s", e)
        return None
 def export_to_json(data, filename="output.json"):
    try:
        with open(filename, "w", encoding="utf-8") as json_file:
            json.dump(data, json_file, indent=4)
        print(f"Data successfully exported to {filename}")
    except Exception as e:
        print(f"Error exporting to JSON: {e}")
 def export_to_csv(data, filename="output.csv"):
    try:
        keys = data[0].keys()
        with open(filename, "w", newline="", encoding="utf-8") as output_file:
            dict_writer = csv.DictWriter(output_file, fieldnames=keys)
            dict_writer.writeheader()
            dict_writer.writerows(data)
        print(f"Data successfully exported to {filename}")
    except Exception as e:
        print(f"Error exporting to CSV: {e}")
--- a/app/YARS/src/yars/yars.py
+++ b/app/YARS/src/yars/yars.py
@ -0,0 +1,309 @@
 from __future__ import annotations
 from .sessions import RandomUserAgentSession
 import time
 import random
 import logging
 import re
 import requests
 from urllib3.util.retry import Retry
 from requests.adapters import HTTPAdapter
 logger = logging.basicConfig(
    filename="YARS.log",
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
 )
 class YARS:
    __slots__ = ("headers", "session", "proxy", "timeout")
    def __init__(self, proxy=None, timeout=10, random_user_agent=True):
        self.session = RandomUserAgentSession() if random_user_agent else requests.Session()
        self.proxy = proxy
        self.timeout = timeout
        retries = Retry(
            total=5,
            backoff_factor=2,  # Exponential backoff
            status_forcelist=[429, 500, 502, 503, 504],
        )
        self.session.mount("https://", HTTPAdapter(max_retries=retries))
        if proxy:
            self.session.proxies.update({"http": proxy, "https": proxy})
    def handle_search(self,url, params, after=None, before=None):
        if after:
            params["after"] = after
        if before:
            params["before"] = before
        try:
            response = self.session.get(url, params=params, timeout=self.timeout)
            response.raise_for_status()
            logging.info("Search request successful")
        except Exception as e:
            if response.status_code != 200:
                logging.info("Search request unsuccessful due to: %s", e)
                print(f"Failed to fetch search results: {response.status_code}")
                return []
        data = response.json()
        results = []
        for post in data["data"]["children"]:
            post_data = post["data"]
            results.append(
                {
                    "title": post_data["title"],
                    "link": f"https://www.reddit.com{post_data['permalink']}",
                    "description": post_data.get("selftext", "")[:269],
                }
            )
        logging.info("Search Results Retrned %d Results", len(results))
        return results
    def search_reddit(self, query, limit=10, after=None, before=None):
        url = "https://www.reddit.com/search.json"
        params = {"q": query, "limit": limit, "sort": "relevance", "type": "link"}
        return self.handle_search(url, params, after, before)
    def search_subreddit(self, subreddit, query, limit=10, after=None, before=None, sort="relevance"):
        url = f"https://www.reddit.com/r/{subreddit}/search.json"
        params = {"q": query, "limit": limit, "sort": "relevance", "type": "link","restrict_sr":"on"}
        return self.handle_search(url, params, after, before)
    def scrape_post_details(self, permalink):
        url = f"https://www.reddit.com{permalink}.json"
        try:
            response = self.session.get(url, timeout=self.timeout)
            response.raise_for_status()
            logging.info("Post details request successful : %s", url)
        except Exception as e:
            logging.info("Post details request unsccessful: %e", e)
            if response.status_code != 200:
                print(f"Failed to fetch post data: {response.status_code}")
                return None
        post_data = response.json()
        if not isinstance(post_data, list) or len(post_data) < 2:
            logging.info("Unexpected post data structre")
            print("Unexpected post data structure")
            return None
        main_post = post_data[0]["data"]["children"][0]["data"]
        title = main_post["title"]
        body = main_post.get("selftext", "")
        comments = self._extract_comments(post_data[1]["data"]["children"])
        logging.info("Successfully scraped post: %s", title)
        return {"title": title, "body": body, "comments": comments}
    def _extract_comments(self, comments):
        logging.info("Extracting comments")
        extracted_comments = []
        for comment in comments:
            if isinstance(comment, dict) and comment.get("kind") == "t1":
                comment_data = comment.get("data", {})
                extracted_comment = {
                    "author": comment_data.get("author", ""),
                    "body": comment_data.get("body", ""),
                    "score": comment_data.get("score",""),
                    "replies": [],
                }
                replies = comment_data.get("replies", "")
                if isinstance(replies, dict):
                    extracted_comment["replies"] = self._extract_comments(
                        replies.get("data", {}).get("children", [])
                    )
                extracted_comments.append(extracted_comment)
        logging.info("Successfully extracted comments")
        return extracted_comments
    def scrape_user_data(self, username, limit=10):
        logging.info("Scraping user data for %s, limit: %d", username, limit)
        base_url = f"https://www.reddit.com/user/{username}/.json"
        params = {"limit": limit, "after": None}
        all_items = []
        count = 0
        while count < limit:
            try:
                response = self.session.get(
                    base_url, params=params, timeout=self.timeout
                )
                response.raise_for_status()
                logging.info("User data request successful")
            except Exception as e:
                logging.info("User data request unsuccessful: %s", e)
                if response.status_code != 200:
                    print(
                        f"Failed to fetch data for user {username}: {response.status_code}"
                    )
                    break
            try:
                data = response.json()
            except ValueError:
                print(f"Failed to parse JSON response for user {username}.")
                break
            if "data" not in data or "children" not in data["data"]:
                print(
                    f"No 'data' or 'children' field found in response for user {username}."
                )
                logging.info("No 'data' or 'children' field found in response")
                break
            items = data["data"]["children"]
            if not items:
                print(f"No more items found for user {username}.")
                logging.info("No more items found for user")
                break
            for item in items:
                kind = item["kind"]
                item_data = item["data"]
                if kind == "t3":
                    post_url = f"https://www.reddit.com{item_data.get('permalink', '')}"
                    all_items.append(
                        {
                            "type": "post",
                            "title": item_data.get("title", ""),
                            "subreddit": item_data.get("subreddit", ""),
                            "url": post_url,
                            "created_utc": item_data.get("created_utc", ""),
                        }
                    )
                elif kind == "t1":
                    comment_url = (
                        f"https://www.reddit.com{item_data.get('permalink', '')}"
                    )
                    all_items.append(
                        {
                            "type": "comment",
                            "subreddit": item_data.get("subreddit", ""),
                            "body": item_data.get("body", ""),
                            "created_utc": item_data.get("created_utc", ""),
                            "url": comment_url,
                        }
                    )
                count += 1
                if count >= limit:
                    break
            params["after"] = data["data"].get("after")
            if not params["after"]:
                break
            time.sleep(random.uniform(1, 2))
            logging.info("Sleeping for random time")
        logging.info("Successfully scraped user data for %s", username)
        return all_items
    def fetch_subreddit_posts(
        self, subreddit, limit=10, category="hot", time_filter="all"
    ):
        logging.info(
            "Fetching subreddit/user posts for %s, limit: %d, category: %s, time_filter: %s",
            subreddit,
            limit,
            category,
            time_filter,
        )
        if category not in ["hot", "top", "new", "userhot", "usertop", "usernew"]:
            raise ValueError("Category for Subredit must be either 'hot', 'top', or 'new' or for User must be 'userhot', 'usertop', or 'usernew'")
        batch_size = min(100, limit)
        total_fetched = 0
        after = None
        all_posts = []
        while total_fetched < limit:
            if category == "hot":
                url = f"https://www.reddit.com/r/{subreddit}/hot.json"
            elif category == "top":
                url = f"https://www.reddit.com/r/{subreddit}/top.json"
            elif category == "new":
                url = f"https://www.reddit.com/r/{subreddit}/new.json"
            elif category == "userhot":
                url = f"https://www.reddit.com/user/{subreddit}/submitted/hot.json"
            elif category == "usertop":
                url = f"https://www.reddit.com/user/{subreddit}/submitted/top.json"
            else:
                url = f"https://www.reddit.com/user/{subreddit}/submitted/new.json"
            params = {
                "limit": batch_size,
                "after": after,
                "raw_json": 1,
                "t": time_filter,
            }
            try:
                response = self.session.get(url, params=params, timeout=self.timeout)
                response.raise_for_status()
                logging.info("Subreddit/user posts request successful")
            except Exception as e:
                logging.info("Subreddit/user posts request unsuccessful: %s", e)
                if response.status_code != 200:
                    print(
                        f"Failed to fetch posts for subreddit/user {subreddit}: {response.status_code}"
                    )
                    break
            data = response.json()
            posts = data["data"]["children"]
            if not posts:
                break
            for post in posts:
                post_data = post["data"]
                post_info = {
                    "title": post_data["title"],
                    "author": post_data["author"],
                    "permalink": post_data["permalink"],
                    "score": post_data["score"],
                    "num_comments": post_data["num_comments"],
                    "created_utc": post_data["created_utc"],
                }
                if "selftext" in post_data:
                    body = post_data["selftext"]
                    if body != None and len(body)>0:
                        post_info["body"] = body
                if "media_metadata" in post_data:
                    media_urls = []
                    for image in post_data["media_metadata"]:
                        if "m" not in post_data["media_metadata"][image]:
                            continue
                        content_type = post_data["media_metadata"][image]["m"]
                        extension = content_type[content_type.find('/')+1:]
                        media_urls.append("https://i.redd.it/{}.{}".format(image, extension))
                    post_info["media_urls"] = media_urls
                elif "media" in post_data and post_data["media"] is not None and "reddit_video" in post_data["media"]:
                    media_url = post_data["media"]["reddit_video"]["fallback_url"]
                    video_url = media_url[:media_url.find('?')]
                    audio_url = video_url[:video_url.rfind('/')] + "/CMAF_AUDIO_128.mp4"
                    post_info["media_urls"] = [video_url, audio_url]
                elif "url" in post_data:
                    url = post_data["url"]
                    if re.fullmatch(r"https:\/\/i\.redd\.it\/.{1,20}", url):
                        post_info["media_urls"] = [url]
                    elif "body" not in post_info:
                        post_info["body"] = url
                if "thumbnail" in post_data and post_data["thumbnail"] != "self":
                    post_info["thumbnail_url"] = post_data["thumbnail"]
                all_posts.append(post_info)
                total_fetched += 1
                if total_fetched >= limit:
                    break
            after = data["data"].get("after")
            if not after:
                break
            time.sleep(random.uniform(1, 2))
            logging.info("Sleeping for random time")
        logging.info("Successfully fetched subreddit posts for %s", subreddit)
        return all_posts
--- a/app/app.py
+++ b/app/app.py
@ -0,0 +1,87 @@
 from flask import Flask, send_file, render_template, request
 from urllib.parse import urlparse
 import config
 import json
 import sqlite3
 import subprocess
 app = Flask(__name__)
@app.route('/file/<path:filename>')
 def serve_file(filename):
 	try:
 		return send_file('{0}/{1}'.format(config.media_dir, filename), as_attachment=False)
 	except FileNotFoundError:
 		return "File not found", 404
@app.route('/hide/<path:permalink>')
 def hide_post(permalink):
 	if permalink[0] != "/":
 		permalink = "/" + permalink
 	connection = sqlite3.connect(config.db_file)
 	cursor = connection.cursor()
 	update = "UPDATE post SET hidden = ? WHERE permalink = ?"
 	binds = [True, permalink]
 	print(update)
 	print(binds)
 	cursor.execute(update,binds)
 	connection.commit()
 	connection.close()
 	return ""
@app.route('/', methods=['GET', 'POST'])
 def index():
 	connection = sqlite3.connect(config.db_file)
 	cursor = connection.cursor()
 	select = "SELECT subreddit, count(*) count FROM post WHERE hidden = ? GROUP BY subreddit ORDER BY count desc LIMIT 1"
 	binds = [False]
 	row = cursor.execute(select, binds).fetchone()
 	connection.close()
 	return get_subreddit(row[0])
@app.route('/r/<path:subreddit>')
 def get_subreddit(subreddit):
 	title = f"/r/{subreddit}"
 	connection = sqlite3.connect(config.db_file)
 	cursor = connection.cursor()
 	select = "SELECT subreddit, count FROM (SELECT subreddit, count(*) count FROM post WHERE hidden = ? GROUP BY subreddit ORDER BY count desc) t WHERE count > 0"
 	binds = [False]
 	results = cursor.execute(select, binds).fetchall()
 	subreddits = [f"/r/{sub[0]}" for sub in results]
 	count = results[subreddits.index(title)][1]
 	select = "SELECT post FROM post WHERE subreddit = ? and hidden = ? ORDER BY score desc LIMIT ?"
 	binds = [subreddit, False, config.posts_per_page_load]
 	results = cursor.execute(select, binds).fetchall()
 	posts = [json.loads(post[0]) for post in results]
 	add_media_html_to_posts(posts)
 	connection.close()
 	return render_template('index.html', title=title, count=count, posts=posts, subreddits=subreddits)
 def add_media_html_to_posts(posts):
 	for post_index, post in enumerate(posts):
 		media_html = []
 		for media_index, media in enumerate(post["media_urls"]):
 			filename = urlparse(media).path
 			if filename[0]=='/':
 				filename = filename[1:]
 			html = get_media_html(filename, True if (post_index < 3 and media_index == 0) else False)
 			media_html.append(html)
 		post["media_html"] = media_html
 def get_media_html(file, priority=False):
 	if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png') or file.endswith('.gif'):
 		return '<img class="invertible" src="/file/{0}" {1}>'.format(file, 'fetchpriority="high" loading="eager"' if priority else '')
 	if file.find("_AUDIO_")>0:
 		return '<audio src="/file/{0}" hidden></audio>'.format(file)
 	if file.endswith('.mp4'):
 		return '<video src="/file/{0}" type="video/mp4" onplay="playAudio(this)" onpause="pauseAudio(this)" onseeked="seekAudio(this)" controls></video>'.format(file)
 	if file.endswith('.webm'):
 		return '<video src="/file/{0}" type="video/webm" controls></video>'.format(file)
 	return file
 if __name__ == '__main__':
 	subprocess.run(["python3", "make_db.py"])
 	#subprocess.run(["python3", "scrape_posts.py"])
 	app.run(host='0.0.0.0', port=8000)
--- a/app/config.py
+++ b/app/config.py
@ -0,0 +1,32 @@
 # Scheduler configuration
 max_posts_per_pull = 100
 pull_by = "day"
 subreddits = [ 
    # name, minimum upvotes
    ("pcgaming",               50),
    ("gadgets",                10),
    ("Nightreign",            100),
    ("CuratedTumblr",         100),
    ("196",                   100),
    ("PoliticalCompassMemes", 100),
    ("meirl",                 100),
    ("me_irl",                100),
    ("AITAH",                 100),
    ("Fauxmoi",               100),
    ("NoFilterNews",          100),
    ("linux",                 100),
    ("linux4noobs",           100),
    ("selfhosted",            100),
    ("HomeServer",            100),
    ("homelab",               100)
 ]
 max_age_days = 30
 max_age_seconds = max_age_days * 24 * 60 * 60
 # Webpage configuration
 posts_per_page_load = 50
 db_dir = "/reddit/db"
 media_dir = "/reddit/media"
 db_file = f"{db_dir}/data.db"
 yars_dir = "/app/YARS"
--- a/app/delete_posts.py
+++ b/app/delete_posts.py
@ -0,0 +1,39 @@
 import config
 import os
 import time
 import sqlite3
 import subprocess
 if __name__ == "__main__":
    connection = sqlite3.connect(config.db_file)
    cursor = connection.cursor()
    now = int(time.time())
    max_created_utc = now - config.max_age_seconds
    select = "SELECT count(*) FROM post WHERE created_utc < ?"
    binds = [max_created_utc]
    results = cursor.execute(select, binds)
    print("Deleting old posts")
    delete = "DELETE FROM post WHERE created_utc < ?"
    cursor.execute(delete, binds)
    print("Deleting old media db rows")
    delete = "DELETE FROM media WHERE permalink NOT IN (SELECT permalink FROM post)"
    cursor.execute(delete)
    all_files_local = subprocess.run(["find", "/reddit/media", "-type", "f"], capture_output=True, text=True)
    all_files_local = set(all_files_local.stdout.splitlines())
    select = "SELECT local from media"
    results = cursor.execute(select).fetchall()
    connection.commit()
    connection.close()
    all_files_db = set([row[0] for row in results])
    extra_files = all_files_local - all_files_db
    print("Deleting old files")
    for file in extra_files:
        print(f"Removing {file}")
        os.remove(file)
    empty_dirs = subprocess.run(["find", "/reddit/media", "-type", "d", "-empty"], capture_output=True, text=True)
    empty_dirs = set(empty_dirs.stdout.splitlines())
    print("Deleting empty directories")
    for dir in empty_dirs:
        print(f"Removind dir {dir}")
        os.rmdir(dir)
    print("Done")
--- a/app/make_db.py
+++ b/app/make_db.py
@ -0,0 +1,12 @@
 import config
 import datetime
 import os
 import sqlite3
 os.makedirs(config.db_dir, exist_ok=True)
 connection = sqlite3.connect(config.db_file)
 cursor = connection.cursor()
 cursor.execute("CREATE TABLE IF NOT EXISTS post(permalink primary key, subreddit, created_utc, score, media_fetched, post, hidden)")
 cursor.execute("CREATE TABLE IF NOT EXISTS media(permalink, url , local, PRIMARY KEY (permalink, url))")
 connection.commit()
 connection.close()
--- a/app/requirements.txt
+++ b/app/requirements.txt
@ -0,0 +1,3 @@
 flask
 requests
 pygments
--- a/app/scrape_posts.py
+++ b/app/scrape_posts.py
@ -0,0 +1,118 @@
 import config
 import datetime
 import json
 import os
 import re
 import sqlite3
 import sys
 current_dir = os.path.dirname(os.path.abspath(__file__))
 project_root = config.yars_dir
 src_path = os.path.join(project_root, "src")
 sys.path.append(src_path)
 from yars.yars import YARS
 from yars.utils import download_image
 # Initialize the YARS Reddit miner
 miner = YARS()
 # Function to scrape subreddit post details and save to JSON
 def scrape_subreddit_data(subreddit, limit=5):
    ret = []
    subreddit_name = subreddit[0]
    minimum_score = subreddit[1]
    print(f"Starting {subreddit_name}")
    empty = dict()
    try:
        subreddit_posts = miner.fetch_subreddit_posts(
            subreddit_name, limit=limit, category="top", time_filter=config.pull_by
        )
        for i, post in enumerate(subreddit_posts, 1):
            score = post.get("score", 0)
            if score < minimum_score:
                continue
            post_data = {
                "permalink": post.get("permalink"),
                "title": post.get("title", ""),
                "author": post.get("author", ""),
                "created_utc": post.get("created_utc", ""),
                "num_comments": post.get("num_comments", 0),
                "score": post.get("score", 0),
                "media_urls" : post.get("media_urls", []),
                "body": post.get("body", None),
            }
            ret.append(post_data)
        print(f"Finished {subreddit_name}")
        return ret
    except Exception as e:
        print(f"Error occurred while scraping subreddit: {e}")
        return ret
 def save_posts_to_db(data, cursor):
    if len(data)==0:
        return
    upsert = "INSERT INTO post(permalink, subreddit, created_utc, score, media_fetched, post, hidden) VALUES "
    upsert += ",".join(["(?,?,?,?,?,?,?)"] * len(data))
    upsert += " ON CONFLICT(permalink) DO UPDATE SET score=excluded.score, post=excluded.post"
    binds = []
    for post in data:
        binds.append(post["permalink"])
        m = re.search(r"\/r\/([a-zA-Z0-9_]+)\/.*", post["permalink"])
        binds.append(m.group(1)) #subreddit
        binds.append(post["created_utc"])
        binds.append(post["score"])
        binds.append(False)
        binds.append(json.dumps(post))
        binds.append(False)
    cursor.execute(upsert, binds)
 def download_media(cursor):
    select = "SELECT post FROM post WHERE media_fetched = ?"
    binds = [False]
    results = cursor.execute(select, binds)
    post = results.fetchone()
    binds = []
    while post is not None:
        post = json.loads(post[0])
        if len(post["media_urls"])>0:
            for url in post["media_urls"]:
                binds.append(post["permalink"])
                binds.append(url)
                binds.append(download_image(url, config.media_dir))
                print("image downloaded")
        post = results.fetchone()
    if len(binds)>0:
        upsert = "INSERT INTO media(permalink, url, local) VALUES "
        upsert += ",".join(["(?,?,?)"] * (len(binds)//3))
        upsert += " ON CONFLICT(permalink, url) DO UPDATE SET local=excluded.local"
        cursor.execute(upsert, binds)
    update = "UPDATE post SET media_fetched = ? WHERE media_fetched = ?"
    binds = [True, False]
    cursor.execute(update, binds)
 def download_comments_for_permalink(permalink, cursor):
    # Currently unused
    post_details = miner.scrape_post_details(permalink)
    update = "UPDATE post SET body = ? WHERE permalink = ?"
    binds = [post_details["body"], permalink]
    cursor.execute(update, binds)
    upsert += "INSERT INTO comments(permalink, comments) VALUES (?, ?) ON CONFLICT(permalink) DO UPDATE SET comments=excluded.comments"
    binds = [permalink, post_details["comments"]]
    cursor.execute(upsert, binds)
 # Main execution
 if __name__ == "__main__":
    os.makedirs(config.media_dir, exist_ok=True)
    connection = sqlite3.connect(config.db_file)
    cursor = connection.cursor()
    for subreddit in config.subreddits:
        post_data = scrape_subreddit_data(subreddit, config.max_posts_per_pull)
        save_posts_to_db(post_data, cursor)
        connection.commit()
    download_media(cursor)
    connection.commit()
    connection.close()
--- a/app/templates/index.html
+++ b/app/templates/index.html
@ -0,0 +1,342 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Reddit, but better</title>
    <style>
        :root {
            --dark: #2c2c2c;
            --darker: #171717;
            --light: #bfbfbf;
        }
        html {
            height: 100%;
        }
        body {
            background-color: var(--darker);
            color: var(--light);
            min-height: 100%;
            margin: 0; /* Removes default browser margin */
        }
        img, video {
            max-width: 100%;
            max-height: 100vh;
            width: auto;
            height: auto;
        }
        div.post {
            background-color: var(--dark);
            border: 2px solid var(--light);
            border-radius: 15px;
            padding: 10px;
            margin-bottom: 20px;
        }
        .sidebar {
            outline: 2px solid var(--light);
            position: sticky;
            top: 0;
            left: 0;
            background-color: var(--dark);
            display: flex;
            flex-wrap: nowrap;
            z-index: 1000;
        }
        .content {
            display: flex;
            flex-grow: 1; /* Takes up remaining space */
            flex-direction: column;
        }
        /* desktop */
        @media (min-aspect-ratio: 1) {
            img, video {
                max-height: 80vh;
            }
            div.post {
                width: 70vw;
                margin-left: 20px;
            }
            .container {
                display: flex;
                flex-direction: row;
            }
            .sidebar {
                width: fit-content;
                height: 100vh;
                flex-direction: column;
                overflow-y: auto;
                padding: 5px
            }
        }
        /* phone */
        @media (max-aspect-ratio: 1) {
            img, video {
                max-height: 100vh;
            }
            div.post {
                width: calc(100vw - 50px);
                margin-top: 10px;
            }
            .container {
                display: flex;
                flex-direction: column;
            }
            .sidebar {
                width: 100vw;
                height: 50px;
                flex-direction: row;
                overflow-x: auto;
                align-items: center;
                padding-top: 5px;
                padding-bottom: 5px;
            }
            .content {
                align-items: center;
            }
        }
        .sidebar a {
            display: block;
            color: var(--light);
            text-decoration: none;
            white-space: nowrap;
            margin: 5px;
            padding: 5px;
        }
        .sidebar a:hover {
            background-color: var(--darker);
            color: var(--light);
        }
        .content h1 {
            margin-left: 20px;
        }
        .invert {
            filter: invert(1);
            transition: filter 0.3s;
        }
        .button-wrapper {
            display: flex;
            width: 100%;
            gap: 10px;
            margin-top: 10px;
        }
        .button-wrapper.gallery {
            gap: 5px;
        }
        .button-wrapper button {
            flex: 1;
            padding: 10px;
            cursor: pointer;
            background-color: var(--darker);
            color: var(--light);
            border: 2px solid var(--light);
            border-radius: 10px;
            font-size: 1.25rem;
            font-weight: bold;
        }
        .button-wrapper button.gallery {
            padding: 5px;
            background-color: var(--darker);
            border-radius: 5px;
            border: none;
            cursor: none;
        }
        .button-wrapper button.gallery.selected {
            background-color: var(--light);
        }
        .text-content {
            overflow: hidden;
            transition: max-height 0.3s ease-out; /* Smooth transition */
            max-height: 20vh;
            position: relative;
        }
        .text-content::after {
            content: "";
            position: absolute;
            bottom: 0;
            left: 0;
            width: 100%;
            height: 30px;
            background: linear-gradient(to bottom, rgba(255,255,255,0), var(--dark));
        }
        .text-content.expanded {
            max-height: 1000vh;
        }
        .text-content.expanded::after {
            display: none;
        }
    </style>
 </head>
 <body>
    <div class="container">
        <div class="sidebar">
            {% for subreddit in subreddits %}
                <a href="{{ subreddit }}">{{ subreddit }}</a>
            {% endfor %}
        </div>
        <div class="content">
            <h1>{{ title }} ({{ count }})</h1>
            {% for post in posts %}
            <div class="post">
                <h2>{{ post.title }}</h2>
                <h4>Score: {{ post.score }}</h4>
                <div class="media-div">
                    {% for media in post.media_html %}
                        {{ media|safe }}
                    {% endfor %}
                    {% if post.media_html|length > 1 %}
                    <span class="button-wrapper gallery">
                    </span>
                    {% endif %}
                </div>
                {% if post.body %}
                <div class="text-content" onclick="expand(this)">
                    {{ post.body }}
                </div>
                {% endif %}
                <span class="button-wrapper">
                    <button type="button" onclick='comments("{{ post.permalink }}")'>Comments</button>
                    <button type="button" onclick='hide(this, "{{ post.permalink }}")'>Hide</button>
                </span>
            </div>
            {% endfor %}
        </div>
    </div>
    <script>
        // setup galleries
        mediaDivs = document.querySelectorAll('.media-div');
        mediaDivs.forEach(div => {
            images = Array.from(div.querySelectorAll('img'));
            if (images.length > 1) {
                buttonSpan = div.querySelector('.button-wrapper:first-of-type');
                images.forEach(image => {
                    image.addEventListener('click', (e) => {
                        if (e.offsetX > image.offsetWidth * 2 / 3) {
                            // scroll right
                            div = e.target.closest('.media-div');
                            images = Array.from(div.querySelectorAll('img'));
                            currentIndex = images.indexOf(e.target)
                            if (currentIndex < (images.length -1)) {
                                buttons = Array.from(div.querySelectorAll('button'));
                                images[currentIndex].style.display = "none";
                                images[currentIndex+1].style.display = "block";
                                buttons[currentIndex].classList.remove('selected');
                                buttons[currentIndex+1].classList.add('selected');
                            } else {
                                e.target.classList.toggle('invert');
                            }
                        } else if (e.offsetX < image.offsetWidth / 3) {
                            // scroll left
                            div = e.target.closest('.media-div');
                            images = Array.from(div.querySelectorAll('img'));
                            currentIndex = images.indexOf(e.target)
                            if (currentIndex > 0) {
                                buttons = Array.from(div.querySelectorAll('button'));
                                images[currentIndex].style.display = "none";
                                images[currentIndex-1].style.display = "block";
                                buttons[currentIndex].classList.remove('selected');
                                buttons[currentIndex-1].classList.add('selected');
                            } else {
                                e.target.classList.toggle('invert');
                            }
                        } else {
                            image.classList.toggle('invert');
                        }
                    });
                });
                firstImage = images.shift();
                firstButton = document.createElement('button');
                firstButton.classList.add('gallery');
                firstButton.classList.add('selected');
                buttonSpan.appendChild(firstButton);
                images.forEach(image => {
                    image.style.display = "none";
                    button = document.createElement('button');
                    button.classList.add('gallery');
                    buttonSpan.appendChild(button);
                });
            } else {
                images.forEach(image => {
                    image.addEventListener('click', () => {
                        image.classList.toggle('invert');
                    });
                });
            }
        });
        // main button code
        function hide(button, permalink){
            const div = button.closest('.post');
            div.scrollTo({
                top: 0,
                behavior: 'smooth'
            });
            div.remove();
            try {
                fetch('/hide' + permalink);
            } catch (error) {
                console.error('Could hide', error);
            }
        }
        function comments(permalink){
            window.open("https://reddit.com" + permalink, '_blank');
        }
        // text expand code
        function checkHeight(){
            const divs = document.querySelectorAll('.text-content');
            divs.forEach(div => {
                height = div.offsetHeight;
                style = window.getComputedStyle(div);
                maxHeight = parseInt(style.maxHeight);
                if (height < maxHeight) {
                    div.classList.add('expanded');
                }
            });
        }
        function expand(div) {
            div.classList.add('expanded');
        }
        window.addEventListener('load', (event) => {
            checkHeight()
        });
        window.addEventListener('resize', (event) => {
            checkHeight()
        });
        // audio/video sync code
        function findAudio(video){
            const div = video.closest('.post');
            return div.querySelector('audio:first-of-type'); 
        }
        function playAudio(video){
            audio = findAudio(video);
            if (audio) {
                audio.play();
                audio.currentTime = video.currentTime;
            }
        }
        function pauseAudio(video){
            audio = findAudio(video);
            if (audio) {
                audio.pause();
                audio.currentTime = video.currentTime;
            }
        }
        function seekAudio(video){
            audio = findAudio(video);
            if (audio) {
                audio.currentTime = video.currentTime;
            }
        }
    </script>
 </body>
 </html>
--- a/compose.yaml
+++ b/compose.yaml
@ -0,0 +1,26 @@
 services:
  ofelia:
    image: mcuadros/ofelia:latest
    restart: unless-stopped
    depends_on:
      - web
    command: daemon --docker
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock:ro
  web: 
    build:
      context: app
      target: builder
    restart: unless-stopped
    stop_signal: SIGINT
    ports:
      - '8001:8000'
    volumes:
      - ./db:/reddit/db
      - ./media:/reddit/media
    labels:
      ofelia.enabled: "true"
      ofelia.job-exec.scrape.schedule: "@every 60m"
      ofelia.job-exec.scrape.command: "python3 /app/scrape_posts.py"
      ofelia.job-exec.clean.schedule: "@every 24h"
      ofelia.job-exec.clean.command: "python3 /app/delete_posts.py"