mirror of
https://github.com/RicterZ/nhentai.git
synced 2026-04-08 18:50:21 +02:00
Initial commit: doujinshi-dl generic plugin framework
History reset as part of DMCA compliance. The project has been refactored into a generic, site-agnostic download framework. Site-specific logic lives in separate plugin packages. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
5
doujinshi_dl/core/utils/__init__.py
Normal file
5
doujinshi_dl/core/utils/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
# coding: utf-8
|
||||
from doujinshi_dl.core.utils.db import Singleton, DB
|
||||
from doujinshi_dl.core.utils.fs import format_filename, generate_cbz, move_to_folder, parse_doujinshi_obj, EXTENSIONS
|
||||
from doujinshi_dl.core.utils.html import generate_html, generate_main_html
|
||||
from doujinshi_dl.core.utils.http import async_request
|
||||
50
doujinshi_dl/core/utils/db.py
Normal file
50
doujinshi_dl/core/utils/db.py
Normal file
@@ -0,0 +1,50 @@
|
||||
# coding: utf-8
|
||||
"""DB and Singleton utilities."""
|
||||
import os
|
||||
import sqlite3
|
||||
|
||||
|
||||
class _Singleton(type):
|
||||
""" A metaclass that creates a Singleton base class when called. """
|
||||
_instances = {}
|
||||
|
||||
def __call__(cls, *args, **kwargs):
|
||||
if cls not in cls._instances:
|
||||
cls._instances[cls] = super(_Singleton, cls).__call__(*args, **kwargs)
|
||||
return cls._instances[cls]
|
||||
|
||||
|
||||
class Singleton(_Singleton(str('SingletonMeta'), (object,), {})):
|
||||
pass
|
||||
|
||||
|
||||
class DB(object):
|
||||
conn = None
|
||||
cur = None
|
||||
|
||||
def __enter__(self):
|
||||
from doujinshi_dl.core import config
|
||||
history_path = config.get(
|
||||
'history_path',
|
||||
os.path.expanduser('~/.doujinshi-dl/history.sqlite3'),
|
||||
)
|
||||
self.conn = sqlite3.connect(history_path)
|
||||
self.cur = self.conn.cursor()
|
||||
self.cur.execute('CREATE TABLE IF NOT EXISTS download_history (id text)')
|
||||
self.conn.commit()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.conn.close()
|
||||
|
||||
def clean_all(self):
|
||||
self.cur.execute('DELETE FROM download_history WHERE 1')
|
||||
self.conn.commit()
|
||||
|
||||
def add_one(self, data):
|
||||
self.cur.execute('INSERT INTO download_history VALUES (?)', [data])
|
||||
self.conn.commit()
|
||||
|
||||
def get_all(self):
|
||||
data = self.cur.execute('SELECT id FROM download_history')
|
||||
return [i[0] for i in data]
|
||||
98
doujinshi_dl/core/utils/fs.py
Normal file
98
doujinshi_dl/core/utils/fs.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
"""Filesystem utilities: filename formatting, CBZ generation, folder management."""
|
||||
import os
|
||||
import zipfile
|
||||
import shutil
|
||||
from typing import Tuple
|
||||
|
||||
from doujinshi_dl.core.logger import logger
|
||||
from doujinshi_dl.constant import PATH_SEPARATOR
|
||||
|
||||
MAX_FIELD_LENGTH = 100
|
||||
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
|
||||
|
||||
|
||||
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
|
||||
"""
|
||||
It used to be a whitelist approach allowed only alphabet and a part of symbols.
|
||||
but most doujinshi's names include Japanese 2-byte characters and these was rejected.
|
||||
so it is using blacklist approach now.
|
||||
if filename include forbidden characters ('/:,;*?"<>|) ,it replaces space character(" ").
|
||||
"""
|
||||
if not _truncate_only:
|
||||
ban_chars = '\\\'/:,;*?"<>|\t\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b'
|
||||
filename = s.translate(str.maketrans(ban_chars, ' ' * len(ban_chars))).strip()
|
||||
filename = ' '.join(filename.split())
|
||||
|
||||
while filename.endswith('.'):
|
||||
filename = filename[:-1]
|
||||
else:
|
||||
filename = s
|
||||
|
||||
# limit `length` chars
|
||||
if len(filename) >= length:
|
||||
filename = filename[:length - 1] + u'…'
|
||||
|
||||
# Remove [] from filename
|
||||
filename = filename.replace('[]', '').strip()
|
||||
return filename
|
||||
|
||||
|
||||
def parse_doujinshi_obj(
|
||||
output_dir: str,
|
||||
doujinshi_obj=None,
|
||||
file_type: str = ''
|
||||
) -> Tuple[str, str]:
|
||||
filename = f'.{PATH_SEPARATOR}doujinshi.{file_type}'
|
||||
if doujinshi_obj is not None:
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
_filename = f'{doujinshi_obj.filename}.{file_type}'
|
||||
|
||||
if file_type == 'pdf':
|
||||
_filename = _filename.replace('/', '-')
|
||||
|
||||
filename = os.path.join(output_dir, _filename)
|
||||
else:
|
||||
if file_type == 'html':
|
||||
return output_dir, 'index.html'
|
||||
|
||||
doujinshi_dir = f'.{PATH_SEPARATOR}'
|
||||
|
||||
if not os.path.exists(doujinshi_dir):
|
||||
os.makedirs(doujinshi_dir)
|
||||
|
||||
return doujinshi_dir, filename
|
||||
|
||||
|
||||
def generate_cbz(doujinshi_dir, filename):
|
||||
file_list = os.listdir(doujinshi_dir)
|
||||
file_list.sort()
|
||||
|
||||
logger.info(f'Writing CBZ file to path: {filename}')
|
||||
with zipfile.ZipFile(filename, 'w') as cbz_pf:
|
||||
for image in file_list:
|
||||
image_path = os.path.join(doujinshi_dir, image)
|
||||
cbz_pf.write(image_path, image)
|
||||
|
||||
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
|
||||
|
||||
|
||||
def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
|
||||
if not file_type:
|
||||
raise RuntimeError('no file_type specified')
|
||||
|
||||
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
|
||||
|
||||
for fn in os.listdir(doujinshi_dir):
|
||||
file_path = os.path.join(doujinshi_dir, fn)
|
||||
_, ext = os.path.splitext(file_path)
|
||||
if ext in ['.pdf', '.cbz']:
|
||||
continue
|
||||
|
||||
if os.path.isfile(file_path):
|
||||
try:
|
||||
os.remove(file_path)
|
||||
except Exception as e:
|
||||
print(f"Error deleting file: {e}")
|
||||
|
||||
shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
|
||||
118
doujinshi_dl/core/utils/html.py
Normal file
118
doujinshi_dl/core/utils/html.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# coding: utf-8
|
||||
"""HTML viewer generation utilities (generic, no site-specific references)."""
|
||||
import json
|
||||
import os
|
||||
import urllib.parse
|
||||
|
||||
from doujinshi_dl.core.logger import logger
|
||||
from doujinshi_dl.core.utils.fs import EXTENSIONS, parse_doujinshi_obj
|
||||
from doujinshi_dl.constant import PATH_SEPARATOR
|
||||
|
||||
|
||||
def _readfile(path):
|
||||
loc = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) # doujinshi_dl/
|
||||
|
||||
with open(os.path.join(loc, path), 'r') as file:
|
||||
return file.read()
|
||||
|
||||
|
||||
def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
||||
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, 'html')
|
||||
image_html = ''
|
||||
|
||||
if not os.path.exists(doujinshi_dir):
|
||||
logger.warning(f'Path "{doujinshi_dir}" does not exist, creating.')
|
||||
try:
|
||||
os.makedirs(doujinshi_dir)
|
||||
except EnvironmentError as e:
|
||||
logger.critical(e)
|
||||
|
||||
file_list = os.listdir(doujinshi_dir)
|
||||
file_list.sort()
|
||||
|
||||
for image in file_list:
|
||||
if not os.path.splitext(image)[1] in EXTENSIONS:
|
||||
continue
|
||||
image_html += f'<img src="{image}" class="image-item"/>\n'
|
||||
|
||||
html = _readfile(f'viewer/{template}/index.html')
|
||||
css = _readfile(f'viewer/{template}/styles.css')
|
||||
js = _readfile(f'viewer/{template}/scripts.js')
|
||||
|
||||
if doujinshi_obj is not None:
|
||||
name = doujinshi_obj.name
|
||||
else:
|
||||
metadata_path = os.path.join(doujinshi_dir, "metadata.json")
|
||||
if os.path.exists(metadata_path):
|
||||
with open(metadata_path, 'r') as file:
|
||||
doujinshi_info = json.loads(file.read())
|
||||
name = doujinshi_info.get("title")
|
||||
else:
|
||||
name = 'Doujinshi HTML Viewer'
|
||||
|
||||
data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css)
|
||||
try:
|
||||
with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f:
|
||||
f.write(data.encode('utf-8'))
|
||||
|
||||
logger.log(16, f'HTML Viewer has been written to "{os.path.join(doujinshi_dir, "index.html")}"')
|
||||
except Exception as e:
|
||||
logger.warning(f'Writing HTML Viewer failed ({e})')
|
||||
|
||||
|
||||
def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
|
||||
"""
|
||||
Generate a main html to show all the contained doujinshi.
|
||||
With a link to their `index.html`.
|
||||
Default output folder will be the CLI path.
|
||||
"""
|
||||
import shutil
|
||||
|
||||
image_html = ''
|
||||
|
||||
main = _readfile('viewer/main.html')
|
||||
css = _readfile('viewer/main.css')
|
||||
js = _readfile('viewer/main.js')
|
||||
|
||||
element = '\n\
|
||||
<div class="gallery-favorite">\n\
|
||||
<div class="gallery">\n\
|
||||
<a href="./{FOLDER}/index.html" class="cover" style="padding:0 0 141.6% 0"><img\n\
|
||||
src="./{FOLDER}/{IMAGE}" />\n\
|
||||
<div class="caption">{TITLE}</div>\n\
|
||||
</a>\n\
|
||||
</div>\n\
|
||||
</div>\n'
|
||||
|
||||
os.chdir(output_dir)
|
||||
doujinshi_dirs = next(os.walk('.'))[1]
|
||||
|
||||
for folder in doujinshi_dirs:
|
||||
files = os.listdir(folder)
|
||||
files.sort()
|
||||
|
||||
if 'index.html' in files:
|
||||
logger.info(f'Add doujinshi "{folder}"')
|
||||
else:
|
||||
continue
|
||||
|
||||
image = files[0] # 001.jpg or 001.png
|
||||
if folder is not None:
|
||||
title = folder.replace('_', ' ')
|
||||
else:
|
||||
title = 'Doujinshi HTML Viewer'
|
||||
|
||||
image_html += element.format(FOLDER=urllib.parse.quote(folder), IMAGE=image, TITLE=title)
|
||||
if image_html == '':
|
||||
logger.warning('No index.html found, --gen-main paused.')
|
||||
return
|
||||
try:
|
||||
data = main.format(STYLES=css, SCRIPTS=js, PICTURE=image_html)
|
||||
with open('./main.html', 'wb') as f:
|
||||
f.write(data.encode('utf-8'))
|
||||
pkg_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
||||
shutil.copy(os.path.join(pkg_dir, 'viewer/logo.png'), './')
|
||||
output_dir = output_dir[:-1] if output_dir.endswith('/') else output_dir
|
||||
logger.log(16, f'Main Viewer has been written to "{output_dir}/main.html"')
|
||||
except Exception as e:
|
||||
logger.warning(f'Writing Main Viewer failed ({e})')
|
||||
34
doujinshi_dl/core/utils/http.py
Normal file
34
doujinshi_dl/core/utils/http.py
Normal file
@@ -0,0 +1,34 @@
|
||||
# coding: utf-8
|
||||
"""Generic async HTTP request helper (no site-specific headers injected here)."""
|
||||
import httpx
|
||||
import urllib3.exceptions
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
async def async_request(method, url, proxy=None, **kwargs):
|
||||
"""
|
||||
Thin async HTTP client wrapper.
|
||||
|
||||
Header injection (Cookie, User-Agent, Referer) is done by callers that
|
||||
have access to site-specific configuration; this helper stays generic.
|
||||
"""
|
||||
from doujinshi_dl import constant
|
||||
|
||||
headers = kwargs.pop('headers', {})
|
||||
|
||||
if proxy is None:
|
||||
proxy = constant.CONFIG.get('proxy', '')
|
||||
|
||||
if isinstance(proxy, str) and not proxy:
|
||||
proxy = None
|
||||
|
||||
# Remove 'timeout' from kwargs to avoid duplicate keyword argument since
|
||||
# httpx.AsyncClient accepts it as a constructor arg or request arg.
|
||||
timeout = kwargs.pop('timeout', 30)
|
||||
|
||||
async with httpx.AsyncClient(headers=headers, verify=False, proxy=proxy,
|
||||
timeout=timeout) as client:
|
||||
response = await client.request(method, url, **kwargs)
|
||||
|
||||
return response
|
||||
Reference in New Issue
Block a user