This commit is contained in:
ricterz 2025-02-28 00:17:05 +08:00
parent ca71a72747
commit 0c9b92ce10
7 changed files with 71 additions and 63 deletions

View File

@ -1,3 +1,3 @@
__version__ = '0.5.25' __version__ = '0.6.0-beta'
__author__ = 'RicterZ' __author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com' __email__ = 'ricterzheng@gmail.com'

View File

@ -65,6 +65,8 @@ def cmd_parser():
# operation options # operation options
parser.add_option('--download', '-D', dest='is_download', action='store_true', parser.add_option('--download', '-D', dest='is_download', action='store_true',
help='download doujinshi (for search results)') help='download doujinshi (for search results)')
parser.add_option('--no-download', dest='no_download', action='store_true', default=False,
help='download doujinshi (for search results)')
parser.add_option('--show', '-S', dest='is_show', action='store_true', parser.add_option('--show', '-S', dest='is_show', action='store_true',
help='just show the doujinshi information') help='just show the doujinshi information')
@ -107,7 +109,6 @@ def cmd_parser():
help='read gallery IDs from file.') help='read gallery IDs from file.')
parser.add_option('--format', type='string', dest='name_format', action='store', parser.add_option('--format', type='string', dest='name_format', action='store',
help='format the saved folder name', default='[%i][%a][%t]') help='format the saved folder name', default='[%i][%a][%t]')
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding', parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding',
default=False, help='no padding in the images filename, such as \'001.jpg\'') default=False, help='no padding in the images filename, such as \'001.jpg\'')
@ -123,16 +124,19 @@ def cmd_parser():
help='generate Comic Book CBZ File') help='generate Comic Book CBZ File')
parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true', parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true',
help='generate PDF file') help='generate PDF file')
parser.add_option('--meta', dest='generate_metadata', action='store_true', default=False,
help='generate a metadata file in doujinshi format')
parser.add_option('--update-meta', dest='update_metadata', action='store_true', default=False,
help='update the metadata file of a doujinshi, update CBZ metadata if exists')
parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
help='remove downloaded doujinshi dir when generated CBZ or PDF file') help='remove downloaded doujinshi dir when generated CBZ or PDF file')
parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False, parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file') help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
parser.add_option('--meta', dest='generate_metadata', action='store_true',
help='generate a metadata file in doujinshi format')
parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False, parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
help='regenerate the cbz or pdf file if exists') help='regenerate the cbz or pdf file if exists')
parser.add_option('--no-metadata', dest='no_metadata', action='store_true', default=False,
help='don\'t generate metadata json file in doujinshi output path')
# nhentai options # nhentai options
parser.add_option('--cookie', type='str', dest='cookie', action='store', parser.add_option('--cookie', type='str', dest='cookie', action='store',
@ -241,8 +245,4 @@ def cmd_parser():
logger.critical('Maximum number of used threads is 15') logger.critical('Maximum number of used threads is 15')
sys.exit(1) sys.exit(1)
if args.dryrun and (args.is_cbz or args.is_pdf):
logger.critical('Cannot generate PDF or CBZ during dry-run')
sys.exit(1)
return args return args

View File

@ -13,7 +13,7 @@ from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader from nhentai.downloader import Downloader
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import BASE_URL from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \ from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \
paging, check_cookie, signal_handler, DB, move_to_folder paging, check_cookie, signal_handler, DB, move_to_folder
@ -97,17 +97,15 @@ def main():
else: else:
continue continue
if not options.dryrun: doujinshi.downloader = downloader
doujinshi.downloader = downloader
if doujinshi.check_if_need_download(options): if doujinshi.check_if_need_download(options):
doujinshi.download() doujinshi.download()
else: else:
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}') logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
continue
if options.generate_metadata: if options.generate_metadata:
generate_metadata_file(options.output_dir, doujinshi) generate_metadata(options.output_dir, doujinshi)
if options.is_save_download_history: if options.is_save_download_history:
with DB() as db: with DB() as db:
@ -116,9 +114,6 @@ def main():
if not options.is_nohtml: if not options.is_nohtml:
generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template']) generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
if not options.no_metadata:
generate_doc('json', options.output_dir, doujinshi, options.regenerate)
if options.is_cbz: if options.is_cbz:
generate_doc('cbz', options.output_dir, doujinshi, options.regenerate) generate_doc('cbz', options.output_dir, doujinshi, options.regenerate)

View File

@ -77,6 +77,9 @@ class Doujinshi(object):
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}') logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
def check_if_need_download(self, options): def check_if_need_download(self, options):
if options.no_download:
return False
base_path = os.path.join(self.downloader.path, self.filename) base_path = os.path.join(self.downloader.path, self.filename)
# regenerate, re-download # regenerate, re-download

View File

@ -4,6 +4,7 @@ import os
from nhentai.constant import PATH_SEPARATOR, LANGUAGE_ISO from nhentai.constant import PATH_SEPARATOR, LANGUAGE_ISO
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from requests.structures import CaseInsensitiveDict
def serialize_json(doujinshi, output_dir: str): def serialize_json(doujinshi, output_dir: str):
@ -77,6 +78,26 @@ def serialize_comic_xml(doujinshi, output_dir):
f.write('</ComicInfo>') f.write('</ComicInfo>')
def serialize_info_txt(doujinshi, output_dir: str):
info_txt_path = os.path.join(output_dir, 'info.txt')
f = open(info_txt_path, 'w', encoding='utf-8')
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
temp_dict = CaseInsensitiveDict(dict(doujinshi.table))
for i in fields:
v = temp_dict.get(i)
v = temp_dict.get(f'{i}s') if v is None else v
v = doujinshi.info.get(i.lower(), None) if v is None else v
v = doujinshi.info.get(f'{i.lower()}s', "Unknown") if v is None else v
f.write(f'{i}: {v}\n')
f.close()
def xml_write_simple_tag(f, name, val, indent=1): def xml_write_simple_tag(f, name, val, indent=1):
f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n') f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n')
@ -131,3 +152,4 @@ def set_js_database():
indexed_json = json.dumps(indexed_json, separators=(',', ':')) indexed_json = json.dumps(indexed_json, separators=(',', ':'))
f.write('var data = ' + indexed_json) f.write('var data = ' + indexed_json)
f.write(';\nvar tags = ' + unique_json) f.write(';\nvar tags = ' + unique_json)

View File

@ -11,12 +11,11 @@ import requests
import sqlite3 import sqlite3
import urllib.parse import urllib.parse
from typing import Tuple from typing import Tuple
from requests.structures import CaseInsensitiveDict
from nhentai import constant from nhentai import constant
from nhentai.constant import PATH_SEPARATOR from nhentai.constant import PATH_SEPARATOR
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.serializer import serialize_comic_xml, serialize_json, set_js_database from nhentai.serializer import serialize_comic_xml, serialize_json, serialize_info_txt, set_js_database
MAX_FIELD_LENGTH = 100 MAX_FIELD_LENGTH = 100
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp') EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
@ -105,9 +104,6 @@ def parse_doujinshi_obj(
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
_filename = f'{doujinshi_obj.filename}.{file_type}' _filename = f'{doujinshi_obj.filename}.{file_type}'
if file_type == 'cbz':
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
if file_type == 'pdf': if file_type == 'pdf':
_filename = _filename.replace('/', '-') _filename = _filename.replace('/', '-')
@ -115,6 +111,9 @@ def parse_doujinshi_obj(
else: else:
doujinshi_dir = f'.{PATH_SEPARATOR}' doujinshi_dir = f'.{PATH_SEPARATOR}'
if not os.path.exists(doujinshi_dir):
os.makedirs(doujinshi_dir)
return doujinshi_dir, filename return doujinshi_dir, filename
@ -235,8 +234,20 @@ def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
logger.warning(f'Writing Main Viewer failed ({e})') logger.warning(f'Writing Main Viewer failed ({e})')
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False): def generate_cbz(doujinshi_dir, filename):
file_list = os.listdir(doujinshi_dir)
file_list.sort()
logger.info(f'Writing CBZ file to path: {filename}')
with zipfile.ZipFile(filename, 'w') as cbz_pf:
for image in file_list:
image_path = os.path.join(doujinshi_dir, image)
cbz_pf.write(image_path, image)
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type) doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate: if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
@ -244,16 +255,9 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
return return
if file_type == 'cbz': if file_type == 'cbz':
file_list = os.listdir(doujinshi_dir) serialize_comic_xml(doujinshi_obj, doujinshi_dir)
file_list.sort() generate_cbz(doujinshi_dir, filename)
logger.info(f'Writing CBZ file to path: {filename}')
with zipfile.ZipFile(filename, 'w') as cbz_pf:
for image in file_list:
image_path = os.path.join(doujinshi_dir, image)
cbz_pf.write(image_path, image)
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
elif file_type == 'pdf': elif file_type == 'pdf':
try: try:
import img2pdf import img2pdf
@ -273,9 +277,16 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
except ImportError: except ImportError:
logger.error("Please install img2pdf package by using pip.") logger.error("Please install img2pdf package by using pip.")
else:
raise ValueError('invalid file type')
elif file_type == 'json':
serialize_json(doujinshi_obj, doujinshi_dir) def generate_metadata(output_dir, doujinshi_obj=None):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '')
serialize_json(doujinshi_obj, doujinshi_dir)
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
serialize_info_txt(doujinshi_obj, doujinshi_dir)
logger.log(16, f'Metadata files have been written to "{doujinshi_dir}"')
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False): def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
@ -332,29 +343,6 @@ def paging(page_string):
return page_list return page_list
def generate_metadata_file(output_dir, doujinshi_obj):
info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
f = open(info_txt_path, 'w', encoding='utf-8')
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
for i in fields:
v = temp_dict.get(i)
v = temp_dict.get(f'{i}s') if v is None else v
v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
f.write(f'{i}: {v}\n')
f.close()
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
class DB(object): class DB(object):
conn = None conn = None
cur = None cur = None

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "nhentai" name = "nhentai"
version = "0.5.25" version = "0.6.0-beta"
description = "nhentai doujinshi downloader" description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"] authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT" license = "MIT"