From 0c9b92ce108ce2b110ab599ecfa77b70bf3e588c Mon Sep 17 00:00:00 2001 From: ricterz Date: Fri, 28 Feb 2025 00:17:05 +0800 Subject: [PATCH] 0.6.0-beta #394 --- nhentai/__init__.py | 2 +- nhentai/cmdline.py | 18 ++++++------ nhentai/command.py | 19 +++++------- nhentai/doujinshi.py | 3 ++ nhentai/serializer.py | 22 ++++++++++++++ nhentai/utils.py | 68 ++++++++++++++++++------------------------- pyproject.toml | 2 +- 7 files changed, 71 insertions(+), 63 deletions(-) diff --git a/nhentai/__init__.py b/nhentai/__init__.py index 5f799c5..3eb9cdd 100644 --- a/nhentai/__init__.py +++ b/nhentai/__init__.py @@ -1,3 +1,3 @@ -__version__ = '0.5.25' +__version__ = '0.6.0-beta' __author__ = 'RicterZ' __email__ = 'ricterzheng@gmail.com' diff --git a/nhentai/cmdline.py b/nhentai/cmdline.py index 78344b9..481d768 100644 --- a/nhentai/cmdline.py +++ b/nhentai/cmdline.py @@ -65,6 +65,8 @@ def cmd_parser(): # operation options parser.add_option('--download', '-D', dest='is_download', action='store_true', help='download doujinshi (for search results)') + parser.add_option('--no-download', dest='no_download', action='store_true', default=False, + help='download doujinshi (for search results)') parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information') @@ -107,7 +109,6 @@ def cmd_parser(): help='read gallery IDs from file.') parser.add_option('--format', type='string', dest='name_format', action='store', help='format the saved folder name', default='[%i][%a][%t]') - parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download') parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding', default=False, help='no padding in the images filename, such as \'001.jpg\'') @@ -123,16 +124,19 @@ def cmd_parser(): help='generate Comic Book CBZ File') parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true', help='generate PDF file') + + parser.add_option('--meta', dest='generate_metadata', action='store_true', default=False, + help='generate a metadata file in doujinshi format') + parser.add_option('--update-meta', dest='update_metadata', action='store_true', default=False, + help='update the metadata file of a doujinshi, update CBZ metadata if exists') + parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, help='remove downloaded doujinshi dir when generated CBZ or PDF file') parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False, help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file') - parser.add_option('--meta', dest='generate_metadata', action='store_true', - help='generate a metadata file in doujinshi format') + parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False, help='regenerate the cbz or pdf file if exists') - parser.add_option('--no-metadata', dest='no_metadata', action='store_true', default=False, - help='don\'t generate metadata json file in doujinshi output path') # nhentai options parser.add_option('--cookie', type='str', dest='cookie', action='store', @@ -241,8 +245,4 @@ def cmd_parser(): logger.critical('Maximum number of used threads is 15') sys.exit(1) - if args.dryrun and (args.is_cbz or args.is_pdf): - logger.critical('Cannot generate PDF or CBZ during dry-run') - sys.exit(1) - return args diff --git a/nhentai/command.py b/nhentai/command.py index 42c7614..8dc2cf9 100644 --- a/nhentai/command.py +++ b/nhentai/command.py @@ -13,7 +13,7 @@ from nhentai.doujinshi import Doujinshi from nhentai.downloader import Downloader from nhentai.logger import logger from nhentai.constant import BASE_URL -from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \ +from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \ paging, check_cookie, signal_handler, DB, move_to_folder @@ -97,17 +97,15 @@ def main(): else: continue - if not options.dryrun: - doujinshi.downloader = downloader + doujinshi.downloader = downloader - if doujinshi.check_if_need_download(options): - doujinshi.download() - else: - logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}') - continue + if doujinshi.check_if_need_download(options): + doujinshi.download() + else: + logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}') if options.generate_metadata: - generate_metadata_file(options.output_dir, doujinshi) + generate_metadata(options.output_dir, doujinshi) if options.is_save_download_history: with DB() as db: @@ -116,9 +114,6 @@ def main(): if not options.is_nohtml: generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template']) - if not options.no_metadata: - generate_doc('json', options.output_dir, doujinshi, options.regenerate) - if options.is_cbz: generate_doc('cbz', options.output_dir, doujinshi, options.regenerate) diff --git a/nhentai/doujinshi.py b/nhentai/doujinshi.py index b947102..de10fa8 100644 --- a/nhentai/doujinshi.py +++ b/nhentai/doujinshi.py @@ -77,6 +77,9 @@ class Doujinshi(object): logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}') def check_if_need_download(self, options): + if options.no_download: + return False + base_path = os.path.join(self.downloader.path, self.filename) # regenerate, re-download diff --git a/nhentai/serializer.py b/nhentai/serializer.py index c831dd1..57c339e 100644 --- a/nhentai/serializer.py +++ b/nhentai/serializer.py @@ -4,6 +4,7 @@ import os from nhentai.constant import PATH_SEPARATOR, LANGUAGE_ISO from xml.sax.saxutils import escape +from requests.structures import CaseInsensitiveDict def serialize_json(doujinshi, output_dir: str): @@ -77,6 +78,26 @@ def serialize_comic_xml(doujinshi, output_dir): f.write('') +def serialize_info_txt(doujinshi, output_dir: str): + info_txt_path = os.path.join(output_dir, 'info.txt') + f = open(info_txt_path, 'w', encoding='utf-8') + + fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR', + 'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES', + 'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS', + 'SERIES', 'PARODY', 'URL'] + + temp_dict = CaseInsensitiveDict(dict(doujinshi.table)) + for i in fields: + v = temp_dict.get(i) + v = temp_dict.get(f'{i}s') if v is None else v + v = doujinshi.info.get(i.lower(), None) if v is None else v + v = doujinshi.info.get(f'{i.lower()}s', "Unknown") if v is None else v + f.write(f'{i}: {v}\n') + + f.close() + + def xml_write_simple_tag(f, name, val, indent=1): f.write(f'{" "*indent}<{name}>{escape(str(val))}\n') @@ -131,3 +152,4 @@ def set_js_database(): indexed_json = json.dumps(indexed_json, separators=(',', ':')) f.write('var data = ' + indexed_json) f.write(';\nvar tags = ' + unique_json) + diff --git a/nhentai/utils.py b/nhentai/utils.py index 2b9631c..83bd728 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -11,12 +11,11 @@ import requests import sqlite3 import urllib.parse from typing import Tuple -from requests.structures import CaseInsensitiveDict from nhentai import constant from nhentai.constant import PATH_SEPARATOR from nhentai.logger import logger -from nhentai.serializer import serialize_comic_xml, serialize_json, set_js_database +from nhentai.serializer import serialize_comic_xml, serialize_json, serialize_info_txt, set_js_database MAX_FIELD_LENGTH = 100 EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp') @@ -105,9 +104,6 @@ def parse_doujinshi_obj( doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) _filename = f'{doujinshi_obj.filename}.{file_type}' - if file_type == 'cbz': - serialize_comic_xml(doujinshi_obj, doujinshi_dir) - if file_type == 'pdf': _filename = _filename.replace('/', '-') @@ -115,6 +111,9 @@ def parse_doujinshi_obj( else: doujinshi_dir = f'.{PATH_SEPARATOR}' + if not os.path.exists(doujinshi_dir): + os.makedirs(doujinshi_dir) + return doujinshi_dir, filename @@ -235,8 +234,20 @@ def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'): logger.warning(f'Writing Main Viewer failed ({e})') -def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False): +def generate_cbz(doujinshi_dir, filename): + file_list = os.listdir(doujinshi_dir) + file_list.sort() + logger.info(f'Writing CBZ file to path: {filename}') + with zipfile.ZipFile(filename, 'w') as cbz_pf: + for image in file_list: + image_path = os.path.join(doujinshi_dir, image) + cbz_pf.write(image_path, image) + + logger.log(16, f'Comic Book CBZ file has been written to "{filename}"') + + +def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False): doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type) if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate: @@ -244,16 +255,9 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa return if file_type == 'cbz': - file_list = os.listdir(doujinshi_dir) - file_list.sort() + serialize_comic_xml(doujinshi_obj, doujinshi_dir) + generate_cbz(doujinshi_dir, filename) - logger.info(f'Writing CBZ file to path: {filename}') - with zipfile.ZipFile(filename, 'w') as cbz_pf: - for image in file_list: - image_path = os.path.join(doujinshi_dir, image) - cbz_pf.write(image_path, image) - - logger.log(16, f'Comic Book CBZ file has been written to "{filename}"') elif file_type == 'pdf': try: import img2pdf @@ -273,9 +277,16 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa except ImportError: logger.error("Please install img2pdf package by using pip.") + else: + raise ValueError('invalid file type') - elif file_type == 'json': - serialize_json(doujinshi_obj, doujinshi_dir) + +def generate_metadata(output_dir, doujinshi_obj=None): + doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '') + serialize_json(doujinshi_obj, doujinshi_dir) + serialize_comic_xml(doujinshi_obj, doujinshi_dir) + serialize_info_txt(doujinshi_obj, doujinshi_dir) + logger.log(16, f'Metadata files have been written to "{doujinshi_dir}"') def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False): @@ -332,29 +343,6 @@ def paging(page_string): return page_list -def generate_metadata_file(output_dir, doujinshi_obj): - - info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt') - - f = open(info_txt_path, 'w', encoding='utf-8') - - fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR', - 'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES', - 'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS', - 'SERIES', 'PARODY', 'URL'] - - temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table)) - for i in fields: - v = temp_dict.get(i) - v = temp_dict.get(f'{i}s') if v is None else v - v = doujinshi_obj.info.get(i.lower(), None) if v is None else v - v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v - f.write(f'{i}: {v}\n') - - f.close() - logger.log(16, f'Metadata Info has been written to "{info_txt_path}"') - - class DB(object): conn = None cur = None diff --git a/pyproject.toml b/pyproject.toml index b4a96d4..db3a885 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "nhentai" -version = "0.5.25" +version = "0.6.0-beta" description = "nhentai doujinshi downloader" authors = ["Ricter Z "] license = "MIT"