0.5.8 #343

improve download logic #343
generate_metadata_file no need to use parse_doujinshi_obj
2026-01-09 14:52:45 +01:00 · 2024-09-22 14:42:02 +08:00 · 2024-09-22 14:39:32 +08:00 · 2024-09-22 14:11:55 +08:00 · 2024-09-22 13:35:07 +08:00 · 2024-09-22 12:30:55 +08:00
8 changed files with 140 additions and 120 deletions
--- a/nhentai/init.py
+++ b/nhentai/init.py
@@ -1,3 +1,3 @@
-__version__ = '0.5.7'
+__version__ = '0.5.8'
 __author__ = 'RicterZ'
 __email__ = 'ricterzheng@gmail.com'
--- a/nhentai/cmdline.py
+++ b/nhentai/cmdline.py
@@ -118,8 +118,8 @@ def cmd_parser():
                      help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
    parser.add_option('--meta', dest='generate_metadata', action='store_true',
                      help='generate a metadata file in doujinshi format')
-    parser.add_option('--regenerate-cbz', dest='regenerate_cbz', action='store_true', default=False,
+    parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
-                      help='regenerate the cbz file if exists')
+                      help='regenerate the cbz or pdf file if exists')
    # nhentai options
    parser.add_option('--cookie', type='str', dest='cookie', action='store',
--- a/nhentai/command.py
+++ b/nhentai/command.py
@@ -11,7 +11,7 @@ from nhentai.doujinshi import Doujinshi
 from nhentai.downloader import Downloader
 from nhentai.logger import logger
 from nhentai.constant import BASE_URL
-from nhentai.utils import generate_html, generate_cbz, generate_main_html, generate_pdf, generate_metadata_file, \
+from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
    paging, check_cookie, signal_handler, DB
@@ -87,22 +87,29 @@ def main():
            if not options.dryrun:
                doujinshi.downloader = downloader
-                doujinshi.download(regenerate_cbz=options.regenerate_cbz)
+
                if doujinshi.check_if_need_download(options):
                    doujinshi.download()
                else:
                    logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
            if options.generate_metadata:
-                table = doujinshi.table
+                generate_metadata_file(options.output_dir, doujinshi)
                generate_metadata_file(options.output_dir, table, doujinshi)
            if options.is_save_download_history:
                with DB() as db:
                    db.add_one(doujinshi.id)
-            if not options.is_nohtml and not options.is_cbz and not options.is_pdf:
+            if not options.is_nohtml:
                generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
-            elif options.is_cbz:
+
-                generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir, True, options.move_to_folder)
+            if options.is_cbz:
-            elif options.is_pdf:
+                generate_doc('cbz', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
-                generate_pdf(options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder)
+                             options.regenerate)
            if options.is_pdf:
                generate_doc('pdf', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
                             options.regenerate)
        if options.main_viewer:
            generate_main_html(options.output_dir)
--- a/nhentai/doujinshi.py
+++ b/nhentai/doujinshi.py
@@ -1,4 +1,5 @@
 # coding: utf-8
 import os
 from tabulate import tabulate
@@ -55,6 +56,7 @@ class Doujinshi(object):
            ['Parodies', self.info.parodies],
            ['Doujinshi', self.name],
            ['Subtitle', self.info.subtitle],
            ['Date', self.info.date],
            ['Characters', self.info.characters],
            ['Authors', self.info.artists],
            ['Groups', self.info.groups],
@@ -70,7 +72,33 @@ class Doujinshi(object):
    def show(self):
        logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
-    def download(self, regenerate_cbz=False):
+    def check_if_need_download(self, options):
        base_path = os.path.join(self.downloader.path, self.filename)
        # doujinshi directory is not exist, we need to download definitely
        if not (os.path.exists(base_path) and os.path.isdir(base_path)):
            return True
        # regenerate, we need to re-download from nhentai
        if options.regenerate:
            return True
        if options.is_pdf:
            file_ext = 'pdf'
        elif options.is_cbz:
            file_ext = 'cbz'
        else:
            # re-download
            return True
        # pdf or cbz file exists, we needn't to re-download it
        if os.path.exists(f'{base_path}.{file_ext}') or os.path.exists(f'{base_path}/{self.filename}.{file_ext}'):
            return False
        # fallback
        return True
    def download(self):
        logger.info(f'Starting to download doujinshi: {self.name}')
        if self.downloader:
            download_queue = []
@@ -80,9 +108,10 @@ class Doujinshi(object):
            for i in range(1, min(self.pages, len(self.ext)) + 1):
                download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}')
-            self.downloader.start_download(download_queue, self.filename, regenerate_cbz=regenerate_cbz)
+            return self.downloader.start_download(download_queue, self.filename)
        else:
            logger.critical('Downloader has not been loaded')
            return False
 if __name__ == '__main__':
--- a/nhentai/downloader.py
+++ b/nhentai/downloader.py
@@ -57,7 +57,7 @@ class Downloader(Singleton):
        save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
        try:
            if os.path.exists(save_file_path):
-                logger.warning(f'Ignored exists file: {save_file_path}')
+                logger.warning(f'Skipped download: {save_file_path} already exists')
                return 1, url
            response = None
@@ -115,18 +115,13 @@ class Downloader(Singleton):
        return 1, url
-    def start_download(self, queue, folder='', regenerate_cbz=False):
+    def start_download(self, queue, folder='') -> bool:
        if not isinstance(folder, (str, )):
            folder = str(folder)
        if self.path:
            folder = os.path.join(self.path, folder)
        if os.path.exists(folder + '.cbz'):
            if not regenerate_cbz:
                logger.warning(f'CBZ file "{folder}.cbz" exists, ignored download request')
                return
        logger.info(f'Doujinshi will be saved at "{folder}"')
        if not os.path.exists(folder):
            try:
@@ -134,9 +129,9 @@ class Downloader(Singleton):
            except EnvironmentError as e:
                logger.critical(str(e))
-        else:
+        if os.getenv('DEBUG', None) == 'NODOWNLOAD':
-            logger.warning(f'Path "{folder}" already exist.')
+            # Assuming we want to continue with rest of process.
-
+            return True
        queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
        pool = multiprocessing.Pool(self.size, init_worker)
@@ -145,6 +140,8 @@ class Downloader(Singleton):
        pool.close()
        pool.join()
        return True
 def download_wrapper(obj, url, folder='', proxy=None):
    if sys.platform == 'darwin' or semaphore.get_value():
--- a/nhentai/serializer.py
+++ b/nhentai/serializer.py
@@ -22,7 +22,7 @@ def serialize_json(doujinshi, output_dir):
        metadata['group'] = [i.strip() for i in doujinshi.info.groups.split(',')]
    if doujinshi.info.languages:
        metadata['language'] = [i.strip() for i in doujinshi.info.languages.split(',')]
-    metadata['category'] = doujinshi.info.categories
+    metadata['category'] = [i.strip() for i in doujinshi.info.categories.split(',')]
    metadata['URL'] = doujinshi.url
    metadata['Pages'] = doujinshi.pages
--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@@ -5,14 +5,16 @@ import re
 import os
 import zipfile
 import shutil
 import requests
 import sqlite3
 import urllib.parse
 from typing import Optional, Tuple
 from nhentai import constant
 from nhentai.logger import logger
 from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
 MAX_FIELD_LENGTH = 100
@@ -38,7 +40,8 @@ def check_cookie():
    username = re.findall('"/users/[0-9]+/(.*?)"', response.text)
    if not username:
-        logger.warning('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
+        logger.warning(
            'Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
    else:
        logger.log(16, f'Login successfully! Your username: {username[0]}')
@@ -64,13 +67,31 @@ def readfile(path):
        return file.read()
-def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
+def parse_doujinshi_obj(
-    image_html = ''
+        output_dir: str,
        doujinshi_obj=None,
        file_type: str = ''
 ) -> Tuple[str, str]:
    filename = './doujinshi' + file_type
    doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
    if doujinshi_obj is not None:
-        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
+        _filename = f'{doujinshi_obj.filename}.{file_type}'
-    else:
+
-        doujinshi_dir = '.'
+        if file_type == 'cbz':
            serialize_comic_xml(doujinshi_obj, doujinshi_dir)
        if file_type == 'pdf':
            _filename = _filename.replace('/', '-')
        filename = os.path.join(output_dir, _filename)
    return doujinshi_dir, filename
 def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
    image_html = ''
    if not os.path.exists(doujinshi_dir):
        logger.warning(f'Path "{doujinshi_dir}" does not exist, creating.')
@@ -148,7 +169,7 @@ def generate_main_html(output_dir='./'):
        else:
            title = 'nHentai HTML Viewer'
-        image_html += element.format(FOLDER=folder, IMAGE=image, TITLE=title)
+        image_html += element.format(FOLDER=urllib.parse.quote(folder), IMAGE=image, TITLE=title)
    if image_html == '':
        logger.warning('No index.html found, --gen-main paused.')
        return
@@ -158,94 +179,65 @@ def generate_main_html(output_dir='./'):
            f.write(data.encode('utf-8'))
        shutil.copy(os.path.dirname(__file__) + '/viewer/logo.png', './')
        set_js_database()
-        logger.log(16, f'Main Viewer has been written to "{output_dir}main.html"')
+        output_dir = output_dir[:-1] if output_dir.endswith('/') else output_dir
        logger.log(16, f'Main Viewer has been written to "{output_dir}/main.html"')
    except Exception as e:
        logger.warning(f'Writing Main Viewer failed ({e})')
-def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True, move_to_folder=False):
+def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir=False,
-    if doujinshi_obj is not None:
+                 move_to_folder=False, regenerate=False):
        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
        if os.path.exists(doujinshi_dir+".cbz"):
            logger.warning(f'Comic Book CBZ file exists, skip "{doujinshi_dir}"')
            return
        if write_comic_info:
            serialize_comic_xml(doujinshi_obj, doujinshi_dir)
        cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), f'{doujinshi_obj.filename}.cbz')
    else:
        cbz_filename = './doujinshi.cbz'
        doujinshi_dir = '.'
-    file_list = os.listdir(doujinshi_dir)
+    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
    file_list.sort()
-    logger.info(f'Writing CBZ file to path: {cbz_filename}')
+    if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
-    with zipfile.ZipFile(cbz_filename, 'w') as cbz_pf:
+        logger.info(f'Skipped {file_type} file generation: {doujinshi_dir}.{file_type} already exists')
-        for image in file_list:
+        return
-            image_path = os.path.join(doujinshi_dir, image)
+
-            cbz_pf.write(image_path, image)
+    if file_type == 'cbz':
        file_list = os.listdir(doujinshi_dir)
        file_list.sort()
        logger.info(f'Writing CBZ file to path: {filename}')
        with zipfile.ZipFile(filename, 'w') as cbz_pf:
            for image in file_list:
                image_path = os.path.join(doujinshi_dir, image)
                cbz_pf.write(image_path, image)
        logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
    elif file_type == 'pdf':
        try:
            import img2pdf
            """Write images to a PDF file using img2pdf."""
            file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]
            file_list.sort()
            logger.info(f'Writing PDF file to path: {filename}')
            with open(filename, 'wb') as pdf_f:
                full_path_list = (
                    [os.path.join(doujinshi_dir, image) for image in file_list]
                )
                pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
            logger.log(16, f'PDF file has been written to "{filename}"')
        except ImportError:
            logger.error("Please install img2pdf package by using pip.")
    if rm_origin_dir:
        shutil.rmtree(doujinshi_dir, ignore_errors=True)
    if move_to_folder:
-            for filename in os.listdir(doujinshi_dir):
+        for filename in os.listdir(doujinshi_dir):
-                file_path = os.path.join(doujinshi_dir, filename)
+            file_path = os.path.join(doujinshi_dir, filename)
-                if os.path.isfile(file_path):
+            if os.path.isfile(file_path):
-                    try:
+                try:
-                        os.remove(file_path)
+                    os.remove(file_path)
-                    except Exception as e:
+                except Exception as e:
-                        print(f"Error deleting file: {e}")
+                    print(f"Error deleting file: {e}")
-            shutil.move(cbz_filename, doujinshi_dir)
+        shutil.move(filename, doujinshi_dir)
    logger.log(16, f'Comic Book CBZ file has been written to "{doujinshi_dir}"')
 def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, move_to_folder=False):
    try:
        import img2pdf
        """Write images to a PDF file using img2pdf."""
        if doujinshi_obj is not None:
            doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
            filename = doujinshi_obj.filename.replace('/', '-')
            pdf_filename = os.path.join(
                os.path.join(doujinshi_dir, '..'),
                f'{filename}.pdf'
            )
        else:
            pdf_filename = './doujinshi.pdf'
            doujinshi_dir = '.'
        file_list = os.listdir(doujinshi_dir)
        file_list.sort()
        logger.info(f'Writing PDF file to path: {pdf_filename}')
        with open(pdf_filename, 'wb') as pdf_f:
            full_path_list = (
                [os.path.join(doujinshi_dir, image) for image in file_list]
            )
            pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
        if rm_origin_dir:
            shutil.rmtree(doujinshi_dir, ignore_errors=True)
        if move_to_folder:
            for filename in os.listdir(doujinshi_dir):
                file_path = os.path.join(doujinshi_dir, filename)
                if os.path.isfile(file_path):
                    try:
                        os.remove(file_path)
                    except Exception as e:
                        print(f"Error deleting file: {e}")
            shutil.move(pdf_filename, doujinshi_dir)
        logger.log(16, f'PDF file has been written to "{doujinshi_dir}"')
    except ImportError:
        logger.error("Please install img2pdf package by using pip.")
 def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
@@ -301,17 +293,11 @@ def paging(page_string):
    return page_list
-def generate_metadata_file(output_dir, table, doujinshi_obj=None):
+def generate_metadata_file(output_dir, doujinshi_obj):
    logger.info('Writing Metadata Info')
-    if doujinshi_obj is not None:
+    info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
    else:
        doujinshi_dir = '.'
-    logger.info(doujinshi_dir)
+    f = open(info_txt_path, 'w', encoding='utf-8')
    f = open(os.path.join(doujinshi_dir, 'info.txt'), 'w', encoding='utf-8')
    fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
              'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
@@ -323,10 +309,11 @@ def generate_metadata_file(output_dir, table, doujinshi_obj=None):
    for i in range(len(fields)):
        f.write(f'{fields[i]}: ')
        if fields[i] in special_fields:
-            f.write(str(table[special_fields.index(fields[i])][1]))
+            f.write(str(doujinshi_obj.table[special_fields.index(fields[i])][1]))
        f.write('\n')
    f.close()
    logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
 class DB(object):
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "nhentai"
-version = "0.5.7"
+version = "0.5.8"
 description = "nhentai doujinshi downloader"
 authors = ["Ricter Z <ricterzheng@gmail.com>"]
 license = "MIT"
Author	SHA1	Message	Date
ricterz	f6e9d08fc7	0.5.8 #343	2024-09-22 14:42:02 +08:00
ricterz	9c1c2ea069	improve download logic #343	2024-09-22 14:39:32 +08:00
ricterz	984ae4262c	generate_metadata_file no need to use parse_doujinshi_obj	2024-09-22 14:11:55 +08:00
ricterz	cbf9448ed9	improve #342	2024-09-22 13:35:07 +08:00
ricterz	16bac45f02	generate html viewer automatically after download #342	2024-09-22 12:30:55 +08:00
normalizedwater546	7fa9193112	fix: non-image files in pdf conversion causing crash	2024-09-22 02:05:32 +00:00
normalizedwater546	a05a308e71	fix: check if metadata file is downloaded before skipping	2024-09-22 01:39:40 +00:00
normalizedwater546	5a29eaf775	fix: add file_type check to downloader If you wanted to generate both .cbz and .pdf, the .pdf will be skipped if .cbz was generated first.	2024-09-22 01:38:54 +00:00
normalizedwater546	497eb6fe50	fix: remove warning for folder already exists in downloader Nothing is wrong with the folder already existing -- silently ignore and move on. Might still have other files inside that haven't been downloaded yet.	2024-09-22 01:00:06 +00:00
normalizedwater546	4bfe104714	refactor: de-dupe doujinshi_obj parsers	2024-09-22 00:44:06 +00:00
normalizedwater546	12364e980c	fix process continuing despite cbz download request skipped	2024-09-22 00:43:10 +00:00
ricterz	b51e812449	fix #330	2024-09-21 11:49:22 +08:00
ricterz	0ed5fa1931	fix #320	2024-09-21 00:43:14 +08:00
ricterz	7f655b0f10	fix #295	2024-09-21 00:32:10 +08:00
ricterz	dec3f44542	add some debug hack	2024-09-21 00:21:01 +08:00