0.5.8 #343

improve download logic #343
generate_metadata_file no need to use parse_doujinshi_obj
2026-01-07 22:11:37 +01:00 · 2024-09-22 14:42:02 +08:00 · 2024-09-22 14:39:32 +08:00 · 2024-09-22 14:11:55 +08:00 · 2024-09-22 13:35:07 +08:00 · 2024-09-22 12:30:55 +08:00
8 changed files with 140 additions and 120 deletions
--- a/nhentai/init.py
+++ b/nhentai/init.py
@@ -1,3 +1,3 @@
-__version__ = '0.5.7'
+__version__ = '0.5.8'
 __author__ = 'RicterZ'
 __email__ = 'ricterzheng@gmail.com'
--- a/nhentai/cmdline.py
+++ b/nhentai/cmdline.py
@@ -118,8 +118,8 @@ def cmd_parser():
                      help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
    parser.add_option('--meta', dest='generate_metadata', action='store_true',
                      help='generate a metadata file in doujinshi format')
-    parser.add_option('--regenerate-cbz', dest='regenerate_cbz', action='store_true', default=False,
-                      help='regenerate the cbz file if exists')
+    parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
+                      help='regenerate the cbz or pdf file if exists')

    # nhentai options
    parser.add_option('--cookie', type='str', dest='cookie', action='store',
--- a/nhentai/command.py
+++ b/nhentai/command.py
@@ -11,7 +11,7 @@ from nhentai.doujinshi import Doujinshi
 from nhentai.downloader import Downloader
 from nhentai.logger import logger
 from nhentai.constant import BASE_URL
-from nhentai.utils import generate_html, generate_cbz, generate_main_html, generate_pdf, generate_metadata_file, \
+from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
    paging, check_cookie, signal_handler, DB


@@ -87,22 +87,29 @@ def main():

            if not options.dryrun:
                doujinshi.downloader = downloader
-                doujinshi.download(regenerate_cbz=options.regenerate_cbz)
+
+                if doujinshi.check_if_need_download(options):
+                    doujinshi.download()
+                else:
+                    logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')

            if options.generate_metadata:
-                table = doujinshi.table
-                generate_metadata_file(options.output_dir, table, doujinshi)
+                generate_metadata_file(options.output_dir, doujinshi)

            if options.is_save_download_history:
                with DB() as db:
                    db.add_one(doujinshi.id)

-            if not options.is_nohtml and not options.is_cbz and not options.is_pdf:
+            if not options.is_nohtml:
                generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
-            elif options.is_cbz:
-                generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir, True, options.move_to_folder)
-            elif options.is_pdf:
-                generate_pdf(options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder)
+
+            if options.is_cbz:
+                generate_doc('cbz', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
+                             options.regenerate)
+
+            if options.is_pdf:
+                generate_doc('pdf', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
+                             options.regenerate)

        if options.main_viewer:
            generate_main_html(options.output_dir)
--- a/nhentai/doujinshi.py
+++ b/nhentai/doujinshi.py
@@ -1,4 +1,5 @@
 # coding: utf-8
+import os

 from tabulate import tabulate

@@ -55,6 +56,7 @@ class Doujinshi(object):
            ['Parodies', self.info.parodies],
            ['Doujinshi', self.name],
            ['Subtitle', self.info.subtitle],
+            ['Date', self.info.date],
            ['Characters', self.info.characters],
            ['Authors', self.info.artists],
            ['Groups', self.info.groups],
@@ -70,7 +72,33 @@ class Doujinshi(object):
    def show(self):
        logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')

-    def download(self, regenerate_cbz=False):
+    def check_if_need_download(self, options):
+        base_path = os.path.join(self.downloader.path, self.filename)
+
+        # doujinshi directory is not exist, we need to download definitely
+        if not (os.path.exists(base_path) and os.path.isdir(base_path)):
+            return True
+
+        # regenerate, we need to re-download from nhentai
+        if options.regenerate:
+            return True
+
+        if options.is_pdf:
+            file_ext = 'pdf'
+        elif options.is_cbz:
+            file_ext = 'cbz'
+        else:
+            # re-download
+            return True
+
+        # pdf or cbz file exists, we needn't to re-download it
+        if os.path.exists(f'{base_path}.{file_ext}') or os.path.exists(f'{base_path}/{self.filename}.{file_ext}'):
+            return False
+
+        # fallback
+        return True
+
+    def download(self):
        logger.info(f'Starting to download doujinshi: {self.name}')
        if self.downloader:
            download_queue = []
@@ -80,9 +108,10 @@ class Doujinshi(object):
            for i in range(1, min(self.pages, len(self.ext)) + 1):
                download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}')

-            self.downloader.start_download(download_queue, self.filename, regenerate_cbz=regenerate_cbz)
+            return self.downloader.start_download(download_queue, self.filename)
        else:
            logger.critical('Downloader has not been loaded')
+            return False


 if __name__ == '__main__':
--- a/nhentai/downloader.py
+++ b/nhentai/downloader.py
@@ -57,7 +57,7 @@ class Downloader(Singleton):
        save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
        try:
            if os.path.exists(save_file_path):
-                logger.warning(f'Ignored exists file: {save_file_path}')
+                logger.warning(f'Skipped download: {save_file_path} already exists')
                return 1, url

            response = None
@@ -115,18 +115,13 @@ class Downloader(Singleton):

        return 1, url

-    def start_download(self, queue, folder='', regenerate_cbz=False):
+    def start_download(self, queue, folder='') -> bool:
        if not isinstance(folder, (str, )):
            folder = str(folder)

        if self.path:
            folder = os.path.join(self.path, folder)

-        if os.path.exists(folder + '.cbz'):
-            if not regenerate_cbz:
-                logger.warning(f'CBZ file "{folder}.cbz" exists, ignored download request')
-                return
-
        logger.info(f'Doujinshi will be saved at "{folder}"')
        if not os.path.exists(folder):
            try:
@@ -134,9 +129,9 @@ class Downloader(Singleton):
            except EnvironmentError as e:
                logger.critical(str(e))

-        else:
-            logger.warning(f'Path "{folder}" already exist.')
-
+        if os.getenv('DEBUG', None) == 'NODOWNLOAD':
+            # Assuming we want to continue with rest of process.
+            return True
        queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]

        pool = multiprocessing.Pool(self.size, init_worker)
@@ -145,6 +140,8 @@ class Downloader(Singleton):
        pool.close()
        pool.join()

+        return True
+

 def download_wrapper(obj, url, folder='', proxy=None):
    if sys.platform == 'darwin' or semaphore.get_value():
--- a/nhentai/serializer.py
+++ b/nhentai/serializer.py
@@ -22,7 +22,7 @@ def serialize_json(doujinshi, output_dir):
        metadata['group'] = [i.strip() for i in doujinshi.info.groups.split(',')]
    if doujinshi.info.languages:
        metadata['language'] = [i.strip() for i in doujinshi.info.languages.split(',')]
-    metadata['category'] = doujinshi.info.categories
+    metadata['category'] = [i.strip() for i in doujinshi.info.categories.split(',')]
    metadata['URL'] = doujinshi.url
    metadata['Pages'] = doujinshi.pages

--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@@ -5,14 +5,16 @@ import re
 import os
 import zipfile
 import shutil
+
 import requests
 import sqlite3
+import urllib.parse
+from typing import Optional, Tuple

 from nhentai import constant
 from nhentai.logger import logger
 from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database

-
 MAX_FIELD_LENGTH = 100


@@ -38,7 +40,8 @@ def check_cookie():

    username = re.findall('"/users/[0-9]+/(.*?)"', response.text)
    if not username:
-        logger.warning('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
+        logger.warning(
+            'Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
    else:
        logger.log(16, f'Login successfully! Your username: {username[0]}')

@@ -64,13 +67,31 @@ def readfile(path):
        return file.read()


-def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
-    image_html = ''
+def parse_doujinshi_obj(
+        output_dir: str,
+        doujinshi_obj=None,
+        file_type: str = ''
+) -> Tuple[str, str]:
+    filename = './doujinshi' + file_type

+    doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
    if doujinshi_obj is not None:
-        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
-    else:
-        doujinshi_dir = '.'
+        _filename = f'{doujinshi_obj.filename}.{file_type}'
+
+        if file_type == 'cbz':
+            serialize_comic_xml(doujinshi_obj, doujinshi_dir)
+
+        if file_type == 'pdf':
+            _filename = _filename.replace('/', '-')
+
+        filename = os.path.join(output_dir, _filename)
+
+    return doujinshi_dir, filename
+
+
+def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
+    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
+    image_html = ''

    if not os.path.exists(doujinshi_dir):
        logger.warning(f'Path "{doujinshi_dir}" does not exist, creating.')
@@ -148,7 +169,7 @@ def generate_main_html(output_dir='./'):
        else:
            title = 'nHentai HTML Viewer'

-        image_html += element.format(FOLDER=folder, IMAGE=image, TITLE=title)
+        image_html += element.format(FOLDER=urllib.parse.quote(folder), IMAGE=image, TITLE=title)
    if image_html == '':
        logger.warning('No index.html found, --gen-main paused.')
        return
@@ -158,94 +179,65 @@ def generate_main_html(output_dir='./'):
            f.write(data.encode('utf-8'))
        shutil.copy(os.path.dirname(__file__) + '/viewer/logo.png', './')
        set_js_database()
-        logger.log(16, f'Main Viewer has been written to "{output_dir}main.html"')
+        output_dir = output_dir[:-1] if output_dir.endswith('/') else output_dir
+        logger.log(16, f'Main Viewer has been written to "{output_dir}/main.html"')
    except Exception as e:
        logger.warning(f'Writing Main Viewer failed ({e})')


-def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True, move_to_folder=False):
-    if doujinshi_obj is not None:
-        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
-        if os.path.exists(doujinshi_dir+".cbz"):
-            logger.warning(f'Comic Book CBZ file exists, skip "{doujinshi_dir}"')
-            return
-        if write_comic_info:
-            serialize_comic_xml(doujinshi_obj, doujinshi_dir)
-        cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), f'{doujinshi_obj.filename}.cbz')
-    else:
-        cbz_filename = './doujinshi.cbz'
-        doujinshi_dir = '.'
+def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir=False,
+                 move_to_folder=False, regenerate=False):

-    file_list = os.listdir(doujinshi_dir)
-    file_list.sort()
+    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)

-    logger.info(f'Writing CBZ file to path: {cbz_filename}')
-    with zipfile.ZipFile(cbz_filename, 'w') as cbz_pf:
-        for image in file_list:
-            image_path = os.path.join(doujinshi_dir, image)
-            cbz_pf.write(image_path, image)
+    if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
+        logger.info(f'Skipped {file_type} file generation: {doujinshi_dir}.{file_type} already exists')
+        return
+
+    if file_type == 'cbz':
+        file_list = os.listdir(doujinshi_dir)
+        file_list.sort()
+
+        logger.info(f'Writing CBZ file to path: {filename}')
+        with zipfile.ZipFile(filename, 'w') as cbz_pf:
+            for image in file_list:
+                image_path = os.path.join(doujinshi_dir, image)
+                cbz_pf.write(image_path, image)
+
+        logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
+    elif file_type == 'pdf':
+        try:
+            import img2pdf
+
+            """Write images to a PDF file using img2pdf."""
+            file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]
+            file_list.sort()
+
+            logger.info(f'Writing PDF file to path: {filename}')
+            with open(filename, 'wb') as pdf_f:
+                full_path_list = (
+                    [os.path.join(doujinshi_dir, image) for image in file_list]
+                )
+                pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
+
+            logger.log(16, f'PDF file has been written to "{filename}"')
+
+        except ImportError:
+            logger.error("Please install img2pdf package by using pip.")

    if rm_origin_dir:
        shutil.rmtree(doujinshi_dir, ignore_errors=True)

    if move_to_folder:
-            for filename in os.listdir(doujinshi_dir):
-                file_path = os.path.join(doujinshi_dir, filename)
-                if os.path.isfile(file_path):
-                    try:
-                        os.remove(file_path)
-                    except Exception as e:
-                        print(f"Error deleting file: {e}")
+        for filename in os.listdir(doujinshi_dir):
+            file_path = os.path.join(doujinshi_dir, filename)
+            if os.path.isfile(file_path):
+                try:
+                    os.remove(file_path)
+                except Exception as e:
+                    print(f"Error deleting file: {e}")

-            shutil.move(cbz_filename, doujinshi_dir)
-
-    logger.log(16, f'Comic Book CBZ file has been written to "{doujinshi_dir}"')
-
-
-def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, move_to_folder=False):
-    try:
-        import img2pdf
-
-        """Write images to a PDF file using img2pdf."""
-        if doujinshi_obj is not None:
-            doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
-            filename = doujinshi_obj.filename.replace('/', '-')
-            pdf_filename = os.path.join(
-                os.path.join(doujinshi_dir, '..'),
-                f'{filename}.pdf'
-            )
-        else:
-            pdf_filename = './doujinshi.pdf'
-            doujinshi_dir = '.'
-
-        file_list = os.listdir(doujinshi_dir)
-        file_list.sort()
-
-        logger.info(f'Writing PDF file to path: {pdf_filename}')
-        with open(pdf_filename, 'wb') as pdf_f:
-            full_path_list = (
-                [os.path.join(doujinshi_dir, image) for image in file_list]
-            )
-            pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
-
-        if rm_origin_dir:
-            shutil.rmtree(doujinshi_dir, ignore_errors=True)
-
-        if move_to_folder:
-            for filename in os.listdir(doujinshi_dir):
-                file_path = os.path.join(doujinshi_dir, filename)
-                if os.path.isfile(file_path):
-                    try:
-                        os.remove(file_path)
-                    except Exception as e:
-                        print(f"Error deleting file: {e}")
-
-            shutil.move(pdf_filename, doujinshi_dir)
-
-        logger.log(16, f'PDF file has been written to "{doujinshi_dir}"')
-
-    except ImportError:
-        logger.error("Please install img2pdf package by using pip.")
+        shutil.move(filename, doujinshi_dir)


 def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
@@ -301,17 +293,11 @@ def paging(page_string):
    return page_list


-def generate_metadata_file(output_dir, table, doujinshi_obj=None):
-    logger.info('Writing Metadata Info')
+def generate_metadata_file(output_dir, doujinshi_obj):

-    if doujinshi_obj is not None:
-        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
-    else:
-        doujinshi_dir = '.'
+    info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')

-    logger.info(doujinshi_dir)
-
-    f = open(os.path.join(doujinshi_dir, 'info.txt'), 'w', encoding='utf-8')
+    f = open(info_txt_path, 'w', encoding='utf-8')

    fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
              'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
@@ -323,10 +309,11 @@ def generate_metadata_file(output_dir, table, doujinshi_obj=None):
    for i in range(len(fields)):
        f.write(f'{fields[i]}: ')
        if fields[i] in special_fields:
-            f.write(str(table[special_fields.index(fields[i])][1]))
+            f.write(str(doujinshi_obj.table[special_fields.index(fields[i])][1]))
        f.write('\n')

    f.close()
+    logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')


 class DB(object):
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "nhentai"
-version = "0.5.7"
+version = "0.5.8"
 description = "nhentai doujinshi downloader"
 authors = ["Ricter Z <ricterzheng@gmail.com>"]
 license = "MIT"
Author	SHA1	Message	Date
ricterz	f6e9d08fc7	0.5.8 #343	2024-09-22 14:42:02 +08:00
ricterz	9c1c2ea069	improve download logic #343	2024-09-22 14:39:32 +08:00
ricterz	984ae4262c	generate_metadata_file no need to use parse_doujinshi_obj	2024-09-22 14:11:55 +08:00
ricterz	cbf9448ed9	improve #342	2024-09-22 13:35:07 +08:00
ricterz	16bac45f02	generate html viewer automatically after download #342	2024-09-22 12:30:55 +08:00
normalizedwater546	7fa9193112	fix: non-image files in pdf conversion causing crash	2024-09-22 02:05:32 +00:00
normalizedwater546	a05a308e71	fix: check if metadata file is downloaded before skipping	2024-09-22 01:39:40 +00:00
normalizedwater546	5a29eaf775	fix: add file_type check to downloader If you wanted to generate both .cbz and .pdf, the .pdf will be skipped if .cbz was generated first.	2024-09-22 01:38:54 +00:00
normalizedwater546	497eb6fe50	fix: remove warning for folder already exists in downloader Nothing is wrong with the folder already existing -- silently ignore and move on. Might still have other files inside that haven't been downloaded yet.	2024-09-22 01:00:06 +00:00
normalizedwater546	4bfe104714	refactor: de-dupe doujinshi_obj parsers	2024-09-22 00:44:06 +00:00
normalizedwater546	12364e980c	fix process continuing despite cbz download request skipped	2024-09-22 00:43:10 +00:00
ricterz	b51e812449	fix #330	2024-09-21 11:49:22 +08:00
ricterz	0ed5fa1931	fix #320	2024-09-21 00:43:14 +08:00
ricterz	7f655b0f10	fix #295	2024-09-21 00:32:10 +08:00
ricterz	dec3f44542	add some debug hack	2024-09-21 00:21:01 +08:00