Initial commit: doujinshi-dl generic plugin framework

History reset as part of DMCA compliance. The project has been refactored into a generic, site-agnostic download framework. Site-specific logic lives in separate plugin packages. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 18:50:21 +02:00 · 2026-03-25 10:37:21 +08:00
commit 81d008036a
53 changed files with 4063 additions and 0 deletions
--- a/doujinshi_dl/core/utils/init.py
+++ b/doujinshi_dl/core/utils/init.py
@@ -0,0 +1,5 @@
+# coding: utf-8
+from doujinshi_dl.core.utils.db import Singleton, DB
+from doujinshi_dl.core.utils.fs import format_filename, generate_cbz, move_to_folder, parse_doujinshi_obj, EXTENSIONS
+from doujinshi_dl.core.utils.html import generate_html, generate_main_html
+from doujinshi_dl.core.utils.http import async_request
--- a/doujinshi_dl/core/utils/db.py
+++ b/doujinshi_dl/core/utils/db.py
@@ -0,0 +1,50 @@
+# coding: utf-8
+"""DB and Singleton utilities."""
+import os
+import sqlite3
+
+
+class _Singleton(type):
+    """ A metaclass that creates a Singleton base class when called. """
+    _instances = {}
+
+    def __call__(cls, *args, **kwargs):
+        if cls not in cls._instances:
+            cls._instances[cls] = super(_Singleton, cls).__call__(*args, **kwargs)
+        return cls._instances[cls]
+
+
+class Singleton(_Singleton(str('SingletonMeta'), (object,), {})):
+    pass
+
+
+class DB(object):
+    conn = None
+    cur = None
+
+    def __enter__(self):
+        from doujinshi_dl.core import config
+        history_path = config.get(
+            'history_path',
+            os.path.expanduser('~/.doujinshi-dl/history.sqlite3'),
+        )
+        self.conn = sqlite3.connect(history_path)
+        self.cur = self.conn.cursor()
+        self.cur.execute('CREATE TABLE IF NOT EXISTS download_history (id text)')
+        self.conn.commit()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.conn.close()
+
+    def clean_all(self):
+        self.cur.execute('DELETE FROM download_history WHERE 1')
+        self.conn.commit()
+
+    def add_one(self, data):
+        self.cur.execute('INSERT INTO download_history VALUES (?)', [data])
+        self.conn.commit()
+
+    def get_all(self):
+        data = self.cur.execute('SELECT id FROM download_history')
+        return [i[0] for i in data]
--- a/doujinshi_dl/core/utils/fs.py
+++ b/doujinshi_dl/core/utils/fs.py
@@ -0,0 +1,98 @@
+# coding: utf-8
+"""Filesystem utilities: filename formatting, CBZ generation, folder management."""
+import os
+import zipfile
+import shutil
+from typing import Tuple
+
+from doujinshi_dl.core.logger import logger
+from doujinshi_dl.constant import PATH_SEPARATOR
+
+MAX_FIELD_LENGTH = 100
+EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
+
+
+def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
+    """
+    It used to be a whitelist approach allowed only alphabet and a part of symbols.
+    but most doujinshi's names include Japanese 2-byte characters and these was rejected.
+    so it is using blacklist approach now.
+    if filename include forbidden characters ('/:,;*?"<>|) ,it replaces space character(" ").
+    """
+    if not _truncate_only:
+        ban_chars = '\\\'/:,;*?"<>|\t\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b'
+        filename = s.translate(str.maketrans(ban_chars, ' ' * len(ban_chars))).strip()
+        filename = ' '.join(filename.split())
+
+        while filename.endswith('.'):
+            filename = filename[:-1]
+    else:
+        filename = s
+
+    # limit `length` chars
+    if len(filename) >= length:
+        filename = filename[:length - 1] + u'…'
+
+    # Remove [] from filename
+    filename = filename.replace('[]', '').strip()
+    return filename
+
+
+def parse_doujinshi_obj(
+        output_dir: str,
+        doujinshi_obj=None,
+        file_type: str = ''
+) -> Tuple[str, str]:
+    filename = f'.{PATH_SEPARATOR}doujinshi.{file_type}'
+    if doujinshi_obj is not None:
+        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
+        _filename = f'{doujinshi_obj.filename}.{file_type}'
+
+        if file_type == 'pdf':
+            _filename = _filename.replace('/', '-')
+
+        filename = os.path.join(output_dir, _filename)
+    else:
+        if file_type == 'html':
+            return output_dir, 'index.html'
+
+        doujinshi_dir = f'.{PATH_SEPARATOR}'
+
+    if not os.path.exists(doujinshi_dir):
+        os.makedirs(doujinshi_dir)
+
+    return doujinshi_dir, filename
+
+
+def generate_cbz(doujinshi_dir, filename):
+    file_list = os.listdir(doujinshi_dir)
+    file_list.sort()
+
+    logger.info(f'Writing CBZ file to path: {filename}')
+    with zipfile.ZipFile(filename, 'w') as cbz_pf:
+        for image in file_list:
+            image_path = os.path.join(doujinshi_dir, image)
+            cbz_pf.write(image_path, image)
+
+    logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
+
+
+def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
+    if not file_type:
+        raise RuntimeError('no file_type specified')
+
+    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
+
+    for fn in os.listdir(doujinshi_dir):
+        file_path = os.path.join(doujinshi_dir, fn)
+        _, ext = os.path.splitext(file_path)
+        if ext in ['.pdf', '.cbz']:
+            continue
+
+        if os.path.isfile(file_path):
+            try:
+                os.remove(file_path)
+            except Exception as e:
+                print(f"Error deleting file: {e}")
+
+    shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
--- a/doujinshi_dl/core/utils/html.py
+++ b/doujinshi_dl/core/utils/html.py
@@ -0,0 +1,118 @@
+# coding: utf-8
+"""HTML viewer generation utilities (generic, no site-specific references)."""
+import json
+import os
+import urllib.parse
+
+from doujinshi_dl.core.logger import logger
+from doujinshi_dl.core.utils.fs import EXTENSIONS, parse_doujinshi_obj
+from doujinshi_dl.constant import PATH_SEPARATOR
+
+
+def _readfile(path):
+    loc = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))  # doujinshi_dl/
+
+    with open(os.path.join(loc, path), 'r') as file:
+        return file.read()
+
+
+def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
+    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, 'html')
+    image_html = ''
+
+    if not os.path.exists(doujinshi_dir):
+        logger.warning(f'Path "{doujinshi_dir}" does not exist, creating.')
+        try:
+            os.makedirs(doujinshi_dir)
+        except EnvironmentError as e:
+            logger.critical(e)
+
+    file_list = os.listdir(doujinshi_dir)
+    file_list.sort()
+
+    for image in file_list:
+        if not os.path.splitext(image)[1] in EXTENSIONS:
+            continue
+        image_html += f'<img src="{image}" class="image-item"/>\n'
+
+    html = _readfile(f'viewer/{template}/index.html')
+    css = _readfile(f'viewer/{template}/styles.css')
+    js = _readfile(f'viewer/{template}/scripts.js')
+
+    if doujinshi_obj is not None:
+        name = doujinshi_obj.name
+    else:
+        metadata_path = os.path.join(doujinshi_dir, "metadata.json")
+        if os.path.exists(metadata_path):
+            with open(metadata_path, 'r') as file:
+                doujinshi_info = json.loads(file.read())
+            name = doujinshi_info.get("title")
+        else:
+            name = 'Doujinshi HTML Viewer'
+
+    data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css)
+    try:
+        with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f:
+            f.write(data.encode('utf-8'))
+
+        logger.log(16, f'HTML Viewer has been written to "{os.path.join(doujinshi_dir, "index.html")}"')
+    except Exception as e:
+        logger.warning(f'Writing HTML Viewer failed ({e})')
+
+
+def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
+    """
+    Generate a main html to show all the contained doujinshi.
+    With a link to their `index.html`.
+    Default output folder will be the CLI path.
+    """
+    import shutil
+
+    image_html = ''
+
+    main = _readfile('viewer/main.html')
+    css = _readfile('viewer/main.css')
+    js = _readfile('viewer/main.js')
+
+    element = '\n\
+            <div class="gallery-favorite">\n\
+                <div class="gallery">\n\
+                    <a href="./{FOLDER}/index.html" class="cover" style="padding:0 0 141.6% 0"><img\n\
+                            src="./{FOLDER}/{IMAGE}" />\n\
+                        <div class="caption">{TITLE}</div>\n\
+                    </a>\n\
+                </div>\n\
+            </div>\n'
+
+    os.chdir(output_dir)
+    doujinshi_dirs = next(os.walk('.'))[1]
+
+    for folder in doujinshi_dirs:
+        files = os.listdir(folder)
+        files.sort()
+
+        if 'index.html' in files:
+            logger.info(f'Add doujinshi "{folder}"')
+        else:
+            continue
+
+        image = files[0]  # 001.jpg or 001.png
+        if folder is not None:
+            title = folder.replace('_', ' ')
+        else:
+            title = 'Doujinshi HTML Viewer'
+
+        image_html += element.format(FOLDER=urllib.parse.quote(folder), IMAGE=image, TITLE=title)
+    if image_html == '':
+        logger.warning('No index.html found, --gen-main paused.')
+        return
+    try:
+        data = main.format(STYLES=css, SCRIPTS=js, PICTURE=image_html)
+        with open('./main.html', 'wb') as f:
+            f.write(data.encode('utf-8'))
+        pkg_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+        shutil.copy(os.path.join(pkg_dir, 'viewer/logo.png'), './')
+        output_dir = output_dir[:-1] if output_dir.endswith('/') else output_dir
+        logger.log(16, f'Main Viewer has been written to "{output_dir}/main.html"')
+    except Exception as e:
+        logger.warning(f'Writing Main Viewer failed ({e})')
--- a/doujinshi_dl/core/utils/http.py
+++ b/doujinshi_dl/core/utils/http.py
@@ -0,0 +1,34 @@
+# coding: utf-8
+"""Generic async HTTP request helper (no site-specific headers injected here)."""
+import httpx
+import urllib3.exceptions
+
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+
+async def async_request(method, url, proxy=None, **kwargs):
+    """
+    Thin async HTTP client wrapper.
+
+    Header injection (Cookie, User-Agent, Referer) is done by callers that
+    have access to site-specific configuration; this helper stays generic.
+    """
+    from doujinshi_dl import constant
+
+    headers = kwargs.pop('headers', {})
+
+    if proxy is None:
+        proxy = constant.CONFIG.get('proxy', '')
+
+    if isinstance(proxy, str) and not proxy:
+        proxy = None
+
+    # Remove 'timeout' from kwargs to avoid duplicate keyword argument since
+    # httpx.AsyncClient accepts it as a constructor arg or request arg.
+    timeout = kwargs.pop('timeout', 30)
+
+    async with httpx.AsyncClient(headers=headers, verify=False, proxy=proxy,
+                                 timeout=timeout) as client:
+        response = await client.request(method, url, **kwargs)
+
+    return response