import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))

import base64
import hashlib
from main.models.users.news_article import NewsArticle
from main.models.users.news_article_image import NewsArticleImage
from setup import DB_SESSION
import os
from urllib import parse
from pyquery import PyQuery

IMAGES_PATH = "/home/sky/frontend/app/assets/images/cms"
USER_ID = 1 # created_by_id for images

def main():
    # this needs to be run with a VPN active
    with DB_SESSION["users"].begin() as db_session:
        articles = db_session.query(NewsArticle).all()

        for article in articles:
            # get banner image data from frontend
            with open(os.path.join(IMAGES_PATH, article.banner_image), "rb") as image_file:
                article.image_data = image_file.read()

            # sort out needlessly escaped special characters in slug
            while article.slug != parse.unquote(article.slug):
                article.slug = parse.unquote(article.slug)

            # replace images in content with database images
            to_replace = []
            pq = PyQuery(article.content)
            for elt in pq("img"):
                tag = pq(elt)
                src = str(tag.attr.src)
                if "static" in src or "imgur" in src:
                    image_name = src.split("/")[-1].split("?")[0].strip()
                    # don't use timestamp keys for these ones in case we have to run the script again
                    image_key = f"{hashlib.md5((article.slug + image_name).encode()).hexdigest()}-{image_name}"

                    with open(os.path.join(IMAGES_PATH, image_name), "rb") as image_file:
                        article_image = NewsArticleImage(image_key, base64.b64encode(image_file.read()), USER_ID)
                        db_session.add(article_image)

                    tag.attr.src = f"/articles/images/{image_key}"

                    # if the image is an actual trix attachment, need to replace URL elsewhere too
                    to_replace.append((src, f"/articles/images/{image_key}"))

            article.content = pq.html()
            for old, new in to_replace:
                article.content = article.content.replace(old, new)

            db_session.add(article)

        db_session.commit()
        db_session.close()

if __name__ == "__main__":
    main()