mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-04-19 18:31:24 +02:00
383 lines
12 KiB
Python
383 lines
12 KiB
Python
# coding: utf-8
|
|
|
|
import sys
|
|
import re
|
|
import os
|
|
import zipfile
|
|
import shutil
|
|
|
|
import httpx
|
|
import requests
|
|
import sqlite3
|
|
import urllib.parse
|
|
from typing import Tuple
|
|
from requests.structures import CaseInsensitiveDict
|
|
|
|
from nhentai import constant
|
|
from nhentai.constant import PATH_SEPARATOR
|
|
from nhentai.logger import logger
|
|
from nhentai.serializer import serialize_comic_xml, serialize_json, set_js_database
|
|
|
|
MAX_FIELD_LENGTH = 100
|
|
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
|
|
|
|
|
|
def request(method, url, **kwargs):
|
|
session = requests.Session()
|
|
session.headers.update({
|
|
'Referer': constant.LOGIN_URL,
|
|
'User-Agent': constant.CONFIG['useragent'],
|
|
'Cookie': constant.CONFIG['cookie']
|
|
})
|
|
|
|
if not kwargs.get('proxies', None):
|
|
kwargs['proxies'] = {
|
|
'https': constant.CONFIG['proxy'],
|
|
'http': constant.CONFIG['proxy'],
|
|
}
|
|
|
|
return getattr(session, method)(url, verify=False, **kwargs)
|
|
|
|
|
|
async def async_request(method, url, proxy = None, **kwargs):
|
|
headers = {
|
|
'Referer': constant.LOGIN_URL,
|
|
'User-Agent': constant.CONFIG['useragent'],
|
|
'Cookie': constant.CONFIG['cookie'],
|
|
}
|
|
|
|
if proxy is None:
|
|
proxy = constant.CONFIG['proxy']
|
|
|
|
if isinstance(proxy, (str, )) and not proxy:
|
|
proxy = None
|
|
|
|
async with httpx.AsyncClient(headers=headers, verify=False, proxy=proxy, **kwargs) as client:
|
|
response = await client.request(method, url, **kwargs)
|
|
|
|
return response
|
|
|
|
|
|
def check_cookie():
|
|
response = request('get', constant.BASE_URL)
|
|
|
|
if response.status_code == 403 and 'Just a moment...' in response.text:
|
|
logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
|
|
sys.exit(1)
|
|
|
|
username = re.findall('"/users/[0-9]+/(.*?)"', response.text)
|
|
if not username:
|
|
logger.warning(
|
|
'Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
|
|
else:
|
|
logger.log(16, f'Login successfully! Your username: {username[0]}')
|
|
|
|
|
|
class _Singleton(type):
|
|
""" A metaclass that creates a Singleton base class when called. """
|
|
_instances = {}
|
|
|
|
def __call__(cls, *args, **kwargs):
|
|
if cls not in cls._instances:
|
|
cls._instances[cls] = super(_Singleton, cls).__call__(*args, **kwargs)
|
|
return cls._instances[cls]
|
|
|
|
|
|
class Singleton(_Singleton(str('SingletonMeta'), (object,), {})):
|
|
pass
|
|
|
|
|
|
def readfile(path):
|
|
loc = os.path.dirname(__file__)
|
|
|
|
with open(os.path.join(loc, path), 'r') as file:
|
|
return file.read()
|
|
|
|
|
|
def parse_doujinshi_obj(
|
|
output_dir: str,
|
|
doujinshi_obj=None,
|
|
file_type: str = ''
|
|
) -> Tuple[str, str]:
|
|
|
|
filename = f'.{PATH_SEPARATOR}doujinshi.{file_type}'
|
|
if doujinshi_obj is not None:
|
|
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
|
_filename = f'{doujinshi_obj.filename}.{file_type}'
|
|
|
|
if file_type == 'cbz':
|
|
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
|
|
|
|
if file_type == 'pdf':
|
|
_filename = _filename.replace('/', '-')
|
|
|
|
filename = os.path.join(output_dir, _filename)
|
|
else:
|
|
doujinshi_dir = f'.{PATH_SEPARATOR}'
|
|
|
|
return doujinshi_dir, filename
|
|
|
|
|
|
def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
|
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
|
|
image_html = ''
|
|
|
|
if not os.path.exists(doujinshi_dir):
|
|
logger.warning(f'Path "{doujinshi_dir}" does not exist, creating.')
|
|
try:
|
|
os.makedirs(doujinshi_dir)
|
|
except EnvironmentError as e:
|
|
logger.critical(e)
|
|
|
|
file_list = os.listdir(doujinshi_dir)
|
|
file_list.sort()
|
|
|
|
for image in file_list:
|
|
if not os.path.splitext(image)[1] in EXTENSIONS:
|
|
continue
|
|
image_html += f'<img src="{image}" class="image-item"/>\n'
|
|
|
|
html = readfile(f'viewer/{template}/index.html')
|
|
css = readfile(f'viewer/{template}/styles.css')
|
|
js = readfile(f'viewer/{template}/scripts.js')
|
|
|
|
if doujinshi_obj is not None:
|
|
# serialize_json(doujinshi_obj, doujinshi_dir)
|
|
name = doujinshi_obj.name
|
|
else:
|
|
name = {'title': 'nHentai HTML Viewer'}
|
|
|
|
data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css)
|
|
try:
|
|
with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f:
|
|
f.write(data.encode('utf-8'))
|
|
|
|
logger.log(16, f'HTML Viewer has been written to "{os.path.join(doujinshi_dir, "index.html")}"')
|
|
except Exception as e:
|
|
logger.warning(f'Writing HTML Viewer failed ({e})')
|
|
|
|
|
|
def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
|
|
if not file_type:
|
|
raise RuntimeError('no file_type specified')
|
|
|
|
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
|
|
|
|
for fn in os.listdir(doujinshi_dir):
|
|
file_path = os.path.join(doujinshi_dir, fn)
|
|
_, ext = os.path.splitext(file_path)
|
|
if ext in ['.pdf', '.cbz']:
|
|
continue
|
|
|
|
if os.path.isfile(file_path):
|
|
try:
|
|
os.remove(file_path)
|
|
except Exception as e:
|
|
print(f"Error deleting file: {e}")
|
|
|
|
shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
|
|
|
|
|
|
def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
|
|
"""
|
|
Generate a main html to show all the contains doujinshi.
|
|
With a link to their `index.html`.
|
|
Default output folder will be the CLI path.
|
|
"""
|
|
|
|
image_html = ''
|
|
|
|
main = readfile('viewer/main.html')
|
|
css = readfile('viewer/main.css')
|
|
js = readfile('viewer/main.js')
|
|
|
|
element = '\n\
|
|
<div class="gallery-favorite">\n\
|
|
<div class="gallery">\n\
|
|
<a href="./{FOLDER}/index.html" class="cover" style="padding:0 0 141.6% 0"><img\n\
|
|
src="./{FOLDER}/{IMAGE}" />\n\
|
|
<div class="caption">{TITLE}</div>\n\
|
|
</a>\n\
|
|
</div>\n\
|
|
</div>\n'
|
|
|
|
os.chdir(output_dir)
|
|
doujinshi_dirs = next(os.walk('.'))[1]
|
|
|
|
for folder in doujinshi_dirs:
|
|
files = os.listdir(folder)
|
|
files.sort()
|
|
|
|
if 'index.html' in files:
|
|
logger.info(f'Add doujinshi "{folder}"')
|
|
else:
|
|
continue
|
|
|
|
image = files[0] # 001.jpg or 001.png
|
|
if folder is not None:
|
|
title = folder.replace('_', ' ')
|
|
else:
|
|
title = 'nHentai HTML Viewer'
|
|
|
|
image_html += element.format(FOLDER=urllib.parse.quote(folder), IMAGE=image, TITLE=title)
|
|
if image_html == '':
|
|
logger.warning('No index.html found, --gen-main paused.')
|
|
return
|
|
try:
|
|
data = main.format(STYLES=css, SCRIPTS=js, PICTURE=image_html)
|
|
with open('./main.html', 'wb') as f:
|
|
f.write(data.encode('utf-8'))
|
|
shutil.copy(os.path.dirname(__file__) + '/viewer/logo.png', './')
|
|
set_js_database()
|
|
output_dir = output_dir[:-1] if output_dir.endswith('/') else output_dir
|
|
logger.log(16, f'Main Viewer has been written to "{output_dir}/main.html"')
|
|
except Exception as e:
|
|
logger.warning(f'Writing Main Viewer failed ({e})')
|
|
|
|
|
|
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
|
|
|
|
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
|
|
|
|
if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
|
|
logger.info(f'Skipped {file_type} file generation: {doujinshi_dir}.{file_type} already exists')
|
|
return
|
|
|
|
if file_type == 'cbz':
|
|
file_list = os.listdir(doujinshi_dir)
|
|
file_list.sort()
|
|
|
|
logger.info(f'Writing CBZ file to path: {filename}')
|
|
with zipfile.ZipFile(filename, 'w') as cbz_pf:
|
|
for image in file_list:
|
|
image_path = os.path.join(doujinshi_dir, image)
|
|
cbz_pf.write(image_path, image)
|
|
|
|
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
|
|
elif file_type == 'pdf':
|
|
try:
|
|
import img2pdf
|
|
|
|
"""Write images to a PDF file using img2pdf."""
|
|
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(EXTENSIONS)]
|
|
file_list.sort()
|
|
|
|
logger.info(f'Writing PDF file to path: {filename}')
|
|
with open(filename, 'wb') as pdf_f:
|
|
full_path_list = (
|
|
[os.path.join(doujinshi_dir, image) for image in file_list]
|
|
)
|
|
pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
|
|
|
|
logger.log(16, f'PDF file has been written to "{filename}"')
|
|
|
|
except ImportError:
|
|
logger.error("Please install img2pdf package by using pip.")
|
|
|
|
elif file_type == 'json':
|
|
serialize_json(doujinshi_obj, doujinshi_dir)
|
|
|
|
|
|
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
|
|
"""
|
|
It used to be a whitelist approach allowed only alphabet and a part of symbols.
|
|
but most doujinshi's names include Japanese 2-byte characters and these was rejected.
|
|
so it is using blacklist approach now.
|
|
if filename include forbidden characters (\'/:,;*?"<>|) ,it replaces space character(" ").
|
|
"""
|
|
# maybe you can use `--format` to select a suitable filename
|
|
|
|
if not _truncate_only:
|
|
ban_chars = '\\\'/:,;*?"<>|\t\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b'
|
|
filename = s.translate(str.maketrans(ban_chars, ' ' * len(ban_chars))).strip()
|
|
filename = ' '.join(filename.split())
|
|
|
|
while filename.endswith('.'):
|
|
filename = filename[:-1]
|
|
else:
|
|
filename = s
|
|
|
|
# limit `length` chars
|
|
if len(filename) >= length:
|
|
filename = filename[:length - 1] + u'…'
|
|
|
|
# Remove [] from filename
|
|
filename = filename.replace('[]', '').strip()
|
|
return filename
|
|
|
|
|
|
def signal_handler(_signal, _frame):
|
|
logger.error('Ctrl-C signal received. Stopping...')
|
|
sys.exit(1)
|
|
|
|
|
|
def paging(page_string):
|
|
# 1,3-5,14 -> [1, 3, 4, 5, 14]
|
|
if not page_string:
|
|
# default, the first page
|
|
return [1]
|
|
|
|
page_list = []
|
|
for i in page_string.split(','):
|
|
if '-' in i:
|
|
start, end = i.split('-')
|
|
if not (start.isdigit() and end.isdigit()):
|
|
raise Exception('Invalid page number')
|
|
page_list.extend(list(range(int(start), int(end) + 1)))
|
|
else:
|
|
if not i.isdigit():
|
|
raise Exception('Invalid page number')
|
|
page_list.append(int(i))
|
|
|
|
return page_list
|
|
|
|
|
|
def generate_metadata_file(output_dir, doujinshi_obj):
|
|
|
|
info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
|
|
|
|
f = open(info_txt_path, 'w', encoding='utf-8')
|
|
|
|
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
|
|
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
|
|
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
|
'SERIES', 'PARODY', 'URL']
|
|
|
|
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
|
|
for i in fields:
|
|
v = temp_dict.get(i)
|
|
v = temp_dict.get(f'{i}s') if v is None else v
|
|
v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
|
|
v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
|
|
f.write(f'{i}: {v}\n')
|
|
|
|
f.close()
|
|
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
|
|
|
|
|
|
class DB(object):
|
|
conn = None
|
|
cur = None
|
|
|
|
def __enter__(self):
|
|
self.conn = sqlite3.connect(constant.NHENTAI_HISTORY)
|
|
self.cur = self.conn.cursor()
|
|
self.cur.execute('CREATE TABLE IF NOT EXISTS download_history (id text)')
|
|
self.conn.commit()
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
self.conn.close()
|
|
|
|
def clean_all(self):
|
|
self.cur.execute('DELETE FROM download_history WHERE 1')
|
|
self.conn.commit()
|
|
|
|
def add_one(self, data):
|
|
self.cur.execute('INSERT INTO download_history VALUES (?)', [data])
|
|
self.conn.commit()
|
|
|
|
def get_all(self):
|
|
data = self.cur.execute('SELECT id FROM download_history')
|
|
return [i[0] for i in data]
|