Merge pull request #405 from luolili233/master

Fix parsing bug in ComicInfo.xml, fixes #404
Update serializer.py
2025-09-18 11:38:41 +02:00 · 2025-05-29 21:36:52 +08:00 · 2025-05-28 14:58:37 +08:00 · 2025-03-26 22:57:29 +08:00 · 2025-03-26 22:56:11 +08:00 · 2025-03-26 15:14:15 +08:00
13 changed files with 338 additions and 212 deletions
--- a/README.rst
+++ b/README.rst
@@ -22,7 +22,7 @@ From Github:

    git clone https://github.com/RicterZ/nhentai
    cd nhentai
-    python setup.py install
+    pip install --no-cache-dir .

 Build Docker container:

@@ -136,6 +136,8 @@ Format output doujinshi folder name:
 .. code-block:: bash

    nhentai --id 261100 --format '[%i]%s'
+    # for Windows
+    nhentai --id 261100 --format "[%%i]%%s"

 Supported doujinshi folder formatter:

@@ -148,6 +150,7 @@ Supported doujinshi folder formatter:
 - %p: Doujinshi pretty name
 - %ag: Doujinshi authors name or groups name

+Note: for Windows operation system, please use double "%", such as "%%i".

 Other options:

--- a/nhentai/init.py
+++ b/nhentai/init.py
@@ -1,3 +1,3 @@
-__version__ = '0.5.20'
+__version__ = '0.6.0-beta'
 __author__ = 'RicterZ'
 __email__ = 'ricterzheng@gmail.com'
--- a/nhentai/cmdline.py
+++ b/nhentai/cmdline.py
@@ -6,10 +6,10 @@ import json
 import nhentai.constant as constant

 from urllib.parse import urlparse
-from optparse import OptionParser
+from argparse import ArgumentParser

 from nhentai import __version__
-from nhentai.utils import generate_html, generate_main_html, DB
+from nhentai.utils import generate_html, generate_main_html, DB, EXTENSIONS
 from nhentai.logger import logger
 from nhentai.constant import PATH_SEPARATOR

@@ -57,103 +57,133 @@ def callback(option, _opt_str, _value, parser):
 def cmd_parser():
    load_config()

-    parser = OptionParser('\n  nhentai --search [keyword] --download'
-                          '\n  NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]'
-                          '\n  nhentai --file [filename]'
-                          '\n\nEnvironment Variable:\n'
-                          '  NHENTAI                 nhentai mirror url')
+    parser = ArgumentParser(
+        description='\n  nhentai --search [keyword] --download'
+                    '\n  NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]'
+                    '\n  nhentai --file [filename]'
+                    '\n\nEnvironment Variable:\n'
+                    '  NHENTAI                 nhentai mirror url'
+    )
+
    # operation options
-    parser.add_option('--download', '-D', dest='is_download', action='store_true',
-                      help='download doujinshi (for search results)')
-    parser.add_option('--show', '-S', dest='is_show', action='store_true',
-                      help='just show the doujinshi information')
+    parser.add_argument('--download', '-D', dest='is_download', action='store_true',
+                        help='download doujinshi (for search results)')
+    parser.add_argument('--no-download', dest='no_download', action='store_true', default=False,
+                        help='download doujinshi (for search results)')
+    parser.add_argument('--show', '-S', dest='is_show', action='store_true',
+                        help='just show the doujinshi information')

    # doujinshi options
-    parser.add_option('--id', dest='id', action='callback', callback=callback,
-                      help='doujinshi ids set, e.g. 167680 167681 167682')
-    parser.add_option('--search', '-s', type='string', dest='keyword', action='store',
-                      help='search doujinshi by keyword')
-    parser.add_option('--favorites', '-F', action='store_true', dest='favorites',
-                      help='list or download your favorites')
-    parser.add_option('--artist', '-a', action='store', dest='artist',
-                      help='list doujinshi by artist name')
+    parser.add_argument('--id', dest='id', nargs='+', type=int,
+                        help='doujinshi ids set, e.g. 167680 167681 167682')
+    parser.add_argument('--search', '-s', type=str, dest='keyword',
+                        help='search doujinshi by keyword')
+    parser.add_argument('--favorites', '-F', action='store_true', dest='favorites',
+                        help='list or download your favorites')
+    parser.add_argument('--artist', '-a', type=str, dest='artist',
+                        help='list doujinshi by artist name')

    # page options
-    parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
-                      help='all search results')
-    parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
-                      help='page number of search results. e.g. 1,2-5,14')
-    parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
-                      help='sorting of doujinshi (recent / popular / popular-[today|week])',
-                      choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
+    parser.add_argument('--page-all', dest='page_all', action='store_true', default=False,
+                        help='all search results')
+    parser.add_argument('--page', '--page-range', type=str, dest='page',
+                        help='page number of search results. e.g. 1,2-5,14')
+    parser.add_argument('--sorting', '--sort', dest='sorting', type=str, default='popular',
+                        help='sorting of doujinshi (recent / popular / popular-[today|week])',
+                        choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])

    # download options
-    parser.add_option('--output', '-o', type='string', dest='output_dir', action='store',
-                      default=f'.{PATH_SEPARATOR}',
-                      help='output dir')
-    parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
-                      help='thread count for downloading doujinshi')
-    parser.add_option('--timeout', '-T', type='int', dest='timeout', action='store', default=30,
-                      help='timeout for downloading doujinshi')
-    parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0,
-                      help='slow down between downloading every doujinshi')
-    parser.add_option('--retry', type='int', dest='retry', action='store', default=3,
-                      help='retry times when downloading failed')
-    parser.add_option('--exit-on-fail', dest='exit_on_fail', action='store_true', default=False,
-                      help='exit on fail to prevent generating incomplete files')
-    parser.add_option('--proxy', type='string', dest='proxy', action='store',
-                      help='store a proxy, for example: -p "http://127.0.0.1:1080"')
-    parser.add_option('--file', '-f', type='string', dest='file', action='store',
-                      help='read gallery IDs from file.')
-    parser.add_option('--format', type='string', dest='name_format', action='store',
-                      help='format the saved folder name', default='[%i][%a][%t]')
-    parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
+    parser.add_argument('--output', '-o', type=str, dest='output_dir', default='.',
+                        help='output dir')
+    parser.add_argument('--threads', '-t', type=int, dest='threads', default=5,
+                        help='thread count for downloading doujinshi')
+    parser.add_argument('--timeout', '-T', type=int, dest='timeout', default=30,
+                        help='timeout for downloading doujinshi')
+    parser.add_argument('--delay', '-d', type=int, dest='delay', default=0,
+                        help='slow down between downloading every doujinshi')
+    parser.add_argument('--retry', type=int, dest='retry', default=3,
+                        help='retry times when downloading failed')
+    parser.add_argument('--exit-on-fail', dest='exit_on_fail', action='store_true', default=False,
+                        help='exit on fail to prevent generating incomplete files')
+    parser.add_argument('--proxy', type=str, dest='proxy',
+                        help='store a proxy, for example: -p "http://127.0.0.1:1080"')
+    parser.add_argument('--file', '-f', type=str, dest='file',
+                        help='read gallery IDs from file.')
+    parser.add_argument('--format', type=str, dest='name_format', default='[%i][%a][%t]',
+                        help='format the saved folder name')

-    parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding',
-                      default=False, help='no padding in the images filename, such as \'001.jpg\'')
+    parser.add_argument('--no-filename-padding', action='store_true', dest='no_filename_padding',
+                        default=False, help='no padding in the images filename, such as \'001.jpg\'')

    # generate options
-    parser.add_option('--html', dest='html_viewer', action='store_true',
-                      help='generate a html viewer at current directory')
-    parser.add_option('--no-html', dest='is_nohtml', action='store_true',
-                      help='don\'t generate HTML after downloading')
-    parser.add_option('--gen-main', dest='main_viewer', action='store_true',
-                      help='generate a main viewer contain all the doujin in the folder')
-    parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true',
-                      help='generate Comic Book CBZ File')
-    parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true',
-                      help='generate PDF file')
-    parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
-                      help='remove downloaded doujinshi dir when generated CBZ or PDF file')
-    parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
-                      help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
-    parser.add_option('--meta', dest='generate_metadata', action='store_true',
-                      help='generate a metadata file in doujinshi format')
-    parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
-                      help='regenerate the cbz or pdf file if exists')
+    parser.add_argument('--html', dest='html_viewer', type=str, nargs='?', const='.',
+                        help='generate an HTML viewer in the specified directory, or scan all subfolders '
+                             'within the entire directory to generate the HTML viewer. By default, current '
+                             'working directory is used.')
+    parser.add_argument('--no-html', dest='is_nohtml', action='store_true',
+                        help='don\'t generate HTML after downloading')
+    parser.add_argument('--gen-main', dest='main_viewer', action='store_true',
+                        help='generate a main viewer contain all the doujin in the folder')
+    parser.add_argument('--cbz', '-C', dest='is_cbz', action='store_true',
+                        help='generate Comic Book CBZ File')
+    parser.add_argument('--pdf', '-P', dest='is_pdf', action='store_true',
+                        help='generate PDF file')
+
+    parser.add_argument('--meta', dest='generate_metadata', action='store_true', default=False,
+                        help='generate a metadata file in doujinshi format')
+    parser.add_argument('--update-meta', dest='update_metadata', action='store_true', default=False,
+                        help='update the metadata file of a doujinshi, update CBZ metadata if exists')
+
+    parser.add_argument('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
+                        help='remove downloaded doujinshi dir when generated CBZ or PDF file')
+    parser.add_argument('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
+                        help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
+
+    parser.add_argument('--regenerate', dest='regenerate', action='store_true', default=False,
+                        help='regenerate the cbz or pdf file if exists')
+    parser.add_argument('--zip', action='store_true', help='Package into a single zip file')

    # nhentai options
-    parser.add_option('--cookie', type='str', dest='cookie', action='store',
-                      help='set cookie of nhentai to bypass Cloudflare captcha')
-    parser.add_option('--useragent', '--user-agent', type='str', dest='useragent', action='store',
-                      help='set useragent to bypass Cloudflare captcha')
-    parser.add_option('--language', type='str', dest='language', action='store',
-                      help='set default language to parse doujinshis')
-    parser.add_option('--clean-language', dest='clean_language', action='store_true', default=False,
-                      help='set DEFAULT as language to parse doujinshis')
-    parser.add_option('--save-download-history', dest='is_save_download_history', action='store_true',
-                      default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them')
-    parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history',
-                      help='clean download history')
-    parser.add_option('--template', dest='viewer_template', action='store',
-                      help='set viewer template', default='')
-    parser.add_option('--legacy', dest='legacy', action='store_true', default=False,
-                      help='use legacy searching method')
+    parser.add_argument('--cookie', type=str, dest='cookie',
+                        help='set cookie of nhentai to bypass Cloudflare captcha')
+    parser.add_argument('--useragent', '--user-agent', type=str, dest='useragent',
+                        help='set useragent to bypass Cloudflare captcha')
+    parser.add_argument('--language', type=str, dest='language',
+                        help='set default language to parse doujinshis')
+    parser.add_argument('--clean-language', dest='clean_language', action='store_true', default=False,
+                        help='set DEFAULT as language to parse doujinshis')
+    parser.add_argument('--save-download-history', dest='is_save_download_history', action='store_true',
+                        default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them')
+    parser.add_argument('--clean-download-history', action='store_true', default=False, dest='clean_download_history',
+                        help='clean download history')
+    parser.add_argument('--template', dest='viewer_template', type=str, default='',
+                        help='set viewer template')
+    parser.add_argument('--legacy', dest='legacy', action='store_true', default=False,
+                        help='use legacy searching method')

-    args, _ = parser.parse_args(sys.argv[1:])
+    args = parser.parse_args()

    if args.html_viewer:
-        generate_html(template=constant.CONFIG['template'])
+        if not os.path.exists(args.html_viewer):
+            logger.error(f'Path \'{args.html_viewer}\' not exists')
+            sys.exit(1)
+
+        for root, dirs, files in os.walk(args.html_viewer):
+            if not dirs:
+                generate_html(output_dir=args.html_viewer, template=constant.CONFIG['template'])
+                sys.exit(0)
+
+            for dir_name in dirs:
+                # it will scan the entire subdirectories
+                doujinshi_dir = os.path.join(root, dir_name)
+                items = set(map(lambda s: os.path.splitext(s)[1], os.listdir(doujinshi_dir)))
+
+                # skip directory without any images
+                if items & set(EXTENSIONS):
+                    generate_html(output_dir=doujinshi_dir, template=constant.CONFIG['template'])
+
+            sys.exit(0)
+
        sys.exit(0)

    if args.main_viewer and not args.id and not args.keyword and not args.favorites:
@@ -169,22 +199,24 @@ def cmd_parser():

    # --- set config ---
    if args.cookie is not None:
-        constant.CONFIG['cookie'] = args.cookie
+        constant.CONFIG['cookie'] = args.cookie.strip()
        write_config()
        logger.info('Cookie saved.')
-        sys.exit(0)
-    elif args.useragent is not None:
-        constant.CONFIG['useragent'] = args.useragent
+
+    if args.useragent is not None:
+        constant.CONFIG['useragent'] = args.useragent.strip()
        write_config()
        logger.info('User-Agent saved.')
-        sys.exit(0)
-    elif args.language is not None:
+
+    if args.language is not None:
        constant.CONFIG['language'] = args.language
        write_config()
        logger.info(f'Default language now set to "{args.language}"')
-        sys.exit(0)
        # TODO: search without language

+    if any([args.cookie, args.useragent, args.language]):
+        sys.exit(0)
+
    if args.proxy is not None:
        proxy_url = urlparse(args.proxy)
        if not args.proxy == '' and proxy_url.scheme not in ('http', 'https', 'socks5', 'socks5h',
@@ -237,8 +269,4 @@ def cmd_parser():
        logger.critical('Maximum number of used threads is 15')
        sys.exit(1)

-    if args.dryrun and (args.is_cbz or args.is_pdf):
-        logger.critical('Cannot generate PDF or CBZ during dry-run')
-        sys.exit(1)
-
    return args
--- a/nhentai/command.py
+++ b/nhentai/command.py
@@ -4,18 +4,16 @@ import shutil
 import sys
 import signal
 import platform
-import urllib
-
 import urllib3.exceptions

 from nhentai import constant
 from nhentai.cmdline import cmd_parser, banner, write_config
 from nhentai.parser import doujinshi_parser, search_parser, legacy_search_parser, print_doujinshi, favorites_parser
 from nhentai.doujinshi import Doujinshi
-from nhentai.downloader import Downloader
+from nhentai.downloader import Downloader, CompressedDownloader
 from nhentai.logger import logger
 from nhentai.constant import BASE_URL
-from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
+from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \
    paging, check_cookie, signal_handler, DB, move_to_folder


@@ -51,6 +49,9 @@ def main():

    page_list = paging(options.page)

+    if options.retry:
+        constant.RETRY_TIMES = int(options.retry)
+
    if options.favorites:
        if not options.is_download:
            logger.warning('You do not specify --download option')
@@ -79,14 +80,18 @@ def main():

    if options.is_save_download_history:
        with DB() as db:
-            data = map(int, db.get_all())
+            data = set(map(int, db.get_all()))

        doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
+        logger.info(f'New doujinshis account: {len(doujinshi_ids)}')
+
+    if options.zip:
+        options.is_nohtml = True

    if not options.is_show:
-        downloader = Downloader(path=options.output_dir, threads=options.threads,
+        downloader = (CompressedDownloader if options.zip else Downloader)(path=options.output_dir, threads=options.threads,
                                timeout=options.timeout, delay=options.delay,
-                                retry=options.retry, exit_on_fail=options.exit_on_fail,
+                                exit_on_fail=options.exit_on_fail,
                                no_filename_padding=options.no_filename_padding)

        for doujinshi_id in doujinshi_ids:
@@ -96,17 +101,15 @@ def main():
            else:
                continue

-            if not options.dryrun:
-                doujinshi.downloader = downloader
+            doujinshi.downloader = downloader

-                if doujinshi.check_if_need_download(options):
-                    doujinshi.download()
-                else:
-                    logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
-                    continue
+            if doujinshi.check_if_need_download(options):
+                doujinshi.download()
+            else:
+                logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')

            if options.generate_metadata:
-                generate_metadata_file(options.output_dir, doujinshi)
+                generate_metadata(options.output_dir, doujinshi)

            if options.is_save_download_history:
                with DB() as db:
--- a/nhentai/constant.py
+++ b/nhentai/constant.py
@@ -37,6 +37,8 @@ FAV_URL = f'{BASE_URL}/favorites/'

 PATH_SEPARATOR = os.path.sep

+RETRY_TIMES = 3
+

 IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
 IMAGE_URL_MIRRORS = [
--- a/nhentai/doujinshi.py
+++ b/nhentai/doujinshi.py
@@ -77,6 +77,9 @@ class Doujinshi(object):
        logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')

    def check_if_need_download(self, options):
+        if options.no_download:
+            return False
+
        base_path = os.path.join(self.downloader.path, self.filename)

        # regenerate, re-download
--- a/nhentai/downloader.py
+++ b/nhentai/downloader.py
@@ -4,6 +4,8 @@ import os
 import asyncio
 import httpx
 import urllib3.exceptions
+import zipfile
+import io

 from urllib.parse import urlparse
 from nhentai import constant
@@ -13,11 +15,6 @@ from nhentai.utils import Singleton, async_request

 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

-
-class NHentaiImageNotExistException(Exception):
-    pass
-
-
 def download_callback(result):
    result, data = result
    if result == 0:
@@ -34,13 +31,12 @@ def download_callback(result):


 class Downloader(Singleton):
-    def __init__(self, path='', threads=5, timeout=30, delay=0, retry=3, exit_on_fail=False,
+    def __init__(self, path='', threads=5, timeout=30, delay=0, exit_on_fail=False,
                 no_filename_padding=False):
        self.threads = threads
        self.path = str(path)
        self.timeout = timeout
        self.delay = delay
-        self.retry = retry
        self.exit_on_fail = exit_on_fail
        self.folder = None
        self.semaphore = None
@@ -78,13 +74,7 @@ class Downloader(Singleton):
        else:
            filename = base_filename + extension

-        save_file_path = os.path.join(self.folder, filename)
-
        try:
-            if os.path.exists(save_file_path):
-                logger.warning(f'Skipped download: {save_file_path} already exists')
-                return 1, url
-
            response = await async_request('GET', url, timeout=self.timeout, proxy=proxy)

            if response.status_code != 200:
@@ -101,7 +91,7 @@ class Downloader(Singleton):
                return -1, url

        except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
-            if retried < self.retry:
+            if retried < constant.RETRY_TIMES:
                logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
                return await self.download(
                    url=url,
@@ -111,13 +101,9 @@ class Downloader(Singleton):
                    proxy=proxy,
                )
            else:
-                logger.warning(f'Download {filename} failed with {self.retry} times retried, skipped')
+                logger.warning(f'Download {filename} failed with {constant.RETRY_TIMES} times retried, skipped')
                return -2, url

-        except NHentaiImageNotExistException as e:
-            os.remove(save_file_path)
-            return -3, url
-
        except Exception as e:
            import traceback

@@ -131,11 +117,11 @@ class Downloader(Singleton):

        return 1, url

-    async def save(self, save_file_path, response) -> bool:
+    async def save(self, filename, response) -> bool:
        if response is None:
            logger.error('Error: Response is None')
            return False
-        save_file_path = os.path.join(self.folder, save_file_path)
+        save_file_path = os.path.join(self.folder, filename)
        with open(save_file_path, 'wb') as f:
            if response is not None:
                length = response.headers.get('content-length')
@@ -146,6 +132,15 @@ class Downloader(Singleton):
                        f.write(chunk)
        return True

+    def create_storage_object(self, folder:str):
+        if not os.path.exists(folder):
+            try:
+                os.makedirs(folder)
+            except EnvironmentError as e:
+                logger.critical(str(e))
+        self.folder:str = folder
+        self.close = lambda: None # Only available in class CompressedDownloader
+
    def start_download(self, queue, folder='') -> bool:
        if not isinstance(folder, (str,)):
            folder = str(folder)
@@ -154,12 +149,7 @@ class Downloader(Singleton):
            folder = os.path.join(self.path, folder)

        logger.info(f'Doujinshi will be saved at "{folder}"')
-        if not os.path.exists(folder):
-            try:
-                os.makedirs(folder)
-            except EnvironmentError as e:
-                logger.critical(str(e))
-        self.folder = folder
+        self.create_storage_object(folder)

        if os.getenv('DEBUG', None) == 'NODOWNLOAD':
            # Assuming we want to continue with rest of process.
@@ -175,4 +165,31 @@ class Downloader(Singleton):
        # Prevent coroutines infection
        asyncio.run(self.fiber(coroutines))

+        self.close()
+
+        return True
+
+class CompressedDownloader(Downloader):
+    def create_storage_object(self, folder):
+        filename = f'{folder}.zip'
+        print(filename)
+        self.zipfile = zipfile.ZipFile(filename,'w')
+        self.close = lambda: self.zipfile.close()
+
+    async def save(self, filename, response) -> bool:
+        if response is None:
+            logger.error('Error: Response is None')
+            return False
+
+        image_data = io.BytesIO()
+        length = response.headers.get('content-length')
+        if length is None:
+            content = await response.read()
+            image_data.write(content)
+        else:
+            async for chunk in response.aiter_bytes(2048):
+                image_data.write(chunk)
+
+        image_data.seek(0)
+        self.zipfile.writestr(filename, image_data.read())
        return True
--- a/nhentai/parser.py
+++ b/nhentai/parser.py
@@ -92,13 +92,27 @@ def favorites_parser(page=None):
        page_range_list = range(1, pages + 1)

    for page in page_range_list:
-        try:
-            logger.info(f'Getting doujinshi ids of page {page}')
-            resp = request('get', f'{constant.FAV_URL}?page={page}').content
+        logger.info(f'Getting doujinshi ids of page {page}')

-            result.extend(_get_title_and_id(resp))
-        except Exception as e:
-            logger.error(f'Error: {e}, continue')
+        i = 0
+        while i <= constant.RETRY_TIMES + 1:
+            i += 1
+            if i > 3:
+                logger.error(f'Failed to get favorites at page {page} after 3 times retried, skipped')
+                break
+
+            try:
+                resp = request('get', f'{constant.FAV_URL}?page={page}').content
+                temp_result = _get_title_and_id(resp)
+                if not temp_result:
+                    logger.warning(f'Failed to get favorites at page {page}, retrying ({i} times) ...')
+                    continue
+                else:
+                    result.extend(temp_result)
+                    break
+
+            except Exception as e:
+                logger.warning(f'Error: {e}, retrying ({i} times) ...')

    return result

@@ -141,17 +155,19 @@ def doujinshi_parser(id_, counter=0):
    title = doujinshi_info.find('h1').text
    pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
    subtitle = doujinshi_info.find('h2')
-    favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')
+    favorite_counts = doujinshi_info.find('span', class_='nobold').text.strip('(').strip(')')

    doujinshi['name'] = title
    doujinshi['pretty_name'] = pretty_name
    doujinshi['subtitle'] = subtitle.text if subtitle else ''
-    doujinshi['favorite_counts'] = int(favorite_counts.text.strip()) if favorite_counts else 0
+    doujinshi['favorite_counts'] = int(favorite_counts) if favorite_counts and favorite_counts.isdigit() else 0

    doujinshi_cover = html.find('div', attrs={'id': 'cover'})
    # img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
    #                   doujinshi_cover.a.img.attrs['data-src'])
-    img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
+
+    # fix cover.webp.webp
+    img_id = re.search(r'/galleries/(\d+)/cover(\.webp|\.jpg|\.png)?\.\w+$', doujinshi_cover.a.img.attrs['data-src'])

    ext = []
    for i in html.find_all('div', attrs={'class': 'thumb-container'}):
@@ -261,7 +277,7 @@ def search_parser(keyword, sorting, page, is_page_all=False):
        i = 0

        logger.info(f'Searching doujinshis using keywords "{keyword}" on page {p}{total}')
-        while i < 3:
+        while i < constant.RETRY_TIMES:
            try:
                url = request('get', url=constant.SEARCH_URL, params={'query': keyword,
                                                                      'page': p, 'sort': sorting}).url
--- a/nhentai/serializer.py
+++ b/nhentai/serializer.py
@@ -2,12 +2,12 @@
 import json
 import os

-from nhentai.constant import PATH_SEPARATOR
+from nhentai.constant import PATH_SEPARATOR, LANGUAGE_ISO
 from xml.sax.saxutils import escape
-from nhentai.constant import LANGUAGE_ISO
+from requests.structures import CaseInsensitiveDict


-def serialize_json(doujinshi, output_dir):
+def serialize_json(doujinshi, output_dir: str):
    metadata = {'title': doujinshi.name,
                'subtitle': doujinshi.info.subtitle}
    if doujinshi.info.favorite_counts:
@@ -61,6 +61,8 @@ def serialize_comic_xml(doujinshi, output_dir):
            xml_write_simple_tag(f, 'Day', dt.day)
        if doujinshi.info.parodies:
            xml_write_simple_tag(f, 'Series', doujinshi.info.parodies)
+        if doujinshi.info.groups:
+            xml_write_simple_tag(f, 'Groups', doujinshi.info.groups)
        if doujinshi.info.characters:
            xml_write_simple_tag(f, 'Characters', doujinshi.info.characters)
        if doujinshi.info.tags:
@@ -78,6 +80,26 @@ def serialize_comic_xml(doujinshi, output_dir):
        f.write('</ComicInfo>')


+def serialize_info_txt(doujinshi, output_dir: str):
+    info_txt_path = os.path.join(output_dir, 'info.txt')
+    f = open(info_txt_path, 'w', encoding='utf-8')
+
+    fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
+              'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
+              'TAGS',  'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
+              'SERIES', 'PARODY', 'URL']
+
+    temp_dict = CaseInsensitiveDict(dict(doujinshi.table))
+    for i in fields:
+        v = temp_dict.get(i)
+        v = temp_dict.get(f'{i}s') if v is None else v
+        v = doujinshi.info.get(i.lower(), None) if v is None else v
+        v = doujinshi.info.get(f'{i.lower()}s', "Unknown") if v is None else v
+        f.write(f'{i}: {v}\n')
+
+    f.close()
+
+
 def xml_write_simple_tag(f, name, val, indent=1):
    f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n')

@@ -132,3 +154,4 @@ def set_js_database():
        indexed_json = json.dumps(indexed_json, separators=(',', ':'))
        f.write('var data = ' + indexed_json)
        f.write(';\nvar tags = ' + unique_json)
+
--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@@ -1,5 +1,5 @@
 # coding: utf-8
-
+import json
 import sys
 import re
 import os
@@ -11,24 +11,33 @@ import requests
 import sqlite3
 import urllib.parse
 from typing import Tuple
-from requests.structures import CaseInsensitiveDict

 from nhentai import constant
 from nhentai.constant import PATH_SEPARATOR
 from nhentai.logger import logger
-from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
+from nhentai.serializer import serialize_comic_xml, serialize_json, serialize_info_txt, set_js_database

 MAX_FIELD_LENGTH = 100
 EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')

+def get_headers():
+    headers = {
+        'Referer': constant.LOGIN_URL
+    }
+
+    user_agent = constant.CONFIG.get('useragent')
+    if user_agent and user_agent.strip():
+        headers['User-Agent'] = user_agent
+
+    cookie = constant.CONFIG.get('cookie')
+    if cookie and cookie.strip():
+        headers['Cookie'] = cookie
+
+    return headers

 def request(method, url, **kwargs):
    session = requests.Session()
-    session.headers.update({
-        'Referer': constant.LOGIN_URL,
-        'User-Agent': constant.CONFIG['useragent'],
-        'Cookie': constant.CONFIG['cookie']
-    })
+    session.headers.update(get_headers())

    if not kwargs.get('proxies', None):
        kwargs['proxies'] = {
@@ -40,11 +49,7 @@ def request(method, url, **kwargs):


 async def async_request(method, url, proxy = None, **kwargs):
-    headers = {
-        'Referer': constant.LOGIN_URL,
-        'User-Agent': constant.CONFIG['useragent'],
-        'Cookie': constant.CONFIG['cookie'],
-    }
+    headers=get_headers()

    if proxy is None:
        proxy = constant.CONFIG['proxy']
@@ -105,21 +110,24 @@ def parse_doujinshi_obj(
        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
        _filename = f'{doujinshi_obj.filename}.{file_type}'

-        if file_type == 'cbz':
-            serialize_comic_xml(doujinshi_obj, doujinshi_dir)
-
        if file_type == 'pdf':
            _filename = _filename.replace('/', '-')

        filename = os.path.join(output_dir, _filename)
    else:
+        if file_type == 'html':
+            return output_dir, 'index.html'
+
        doujinshi_dir = f'.{PATH_SEPARATOR}'

+    if not os.path.exists(doujinshi_dir):
+        os.makedirs(doujinshi_dir)
+
    return doujinshi_dir, filename


 def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
-    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
+    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, 'html')
    image_html = ''

    if not os.path.exists(doujinshi_dir):
@@ -142,10 +150,16 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
    js = readfile(f'viewer/{template}/scripts.js')

    if doujinshi_obj is not None:
-        serialize_json(doujinshi_obj, doujinshi_dir)
+        # serialize_json(doujinshi_obj, doujinshi_dir)
        name = doujinshi_obj.name
    else:
-        name = {'title': 'nHentai HTML Viewer'}
+        metadata_path = os.path.join(doujinshi_dir, "metadata.json")
+        if os.path.exists(metadata_path):
+            with open(metadata_path, 'r') as file:
+                doujinshi_info = json.loads(file.read())
+            name = doujinshi_info.get("title")
+        else:
+            name = 'nHentai HTML Viewer'

    data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css)
    try:
@@ -235,8 +249,20 @@ def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
        logger.warning(f'Writing Main Viewer failed ({e})')


-def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
+def generate_cbz(doujinshi_dir, filename):
+    file_list = os.listdir(doujinshi_dir)
+    file_list.sort()

+    logger.info(f'Writing CBZ file to path: {filename}')
+    with zipfile.ZipFile(filename, 'w') as cbz_pf:
+        for image in file_list:
+            image_path = os.path.join(doujinshi_dir, image)
+            cbz_pf.write(image_path, image)
+
+    logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
+
+
+def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)

    if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
@@ -244,16 +270,9 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
        return

    if file_type == 'cbz':
-        file_list = os.listdir(doujinshi_dir)
-        file_list.sort()
+        serialize_comic_xml(doujinshi_obj, doujinshi_dir)
+        generate_cbz(doujinshi_dir, filename)

-        logger.info(f'Writing CBZ file to path: {filename}')
-        with zipfile.ZipFile(filename, 'w') as cbz_pf:
-            for image in file_list:
-                image_path = os.path.join(doujinshi_dir, image)
-                cbz_pf.write(image_path, image)
-
-        logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
    elif file_type == 'pdf':
        try:
            import img2pdf
@@ -273,6 +292,16 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa

        except ImportError:
            logger.error("Please install img2pdf package by using pip.")
+    else:
+        raise ValueError('invalid file type')
+
+
+def generate_metadata(output_dir, doujinshi_obj=None):
+    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '')
+    serialize_json(doujinshi_obj, doujinshi_dir)
+    serialize_comic_xml(doujinshi_obj, doujinshi_dir)
+    serialize_info_txt(doujinshi_obj, doujinshi_dir)
+    logger.log(16, f'Metadata files have been written to "{doujinshi_dir}"')


 def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
@@ -329,29 +358,6 @@ def paging(page_string):
    return page_list


-def generate_metadata_file(output_dir, doujinshi_obj):
-
-    info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
-
-    f = open(info_txt_path, 'w', encoding='utf-8')
-
-    fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
-              'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
-              'TAGS',  'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
-              'SERIES', 'PARODY', 'URL']
-
-    temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
-    for i in fields:
-        v = temp_dict.get(i)
-        v = temp_dict.get(f'{i}s') if v is None else v
-        v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
-        v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
-        f.write(f'{i}: {v}\n')
-
-    f.close()
-    logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
-
-
 class DB(object):
    conn = None
    cur = None
--- a/nhentai/viewer/default/scripts.js
+++ b/nhentai/viewer/default/scripts.js
@@ -49,8 +49,8 @@ document.onkeypress = event => {
    switch (event.key.toLowerCase()) {
        // Previous Image
        case 'w':
-	   scrollBy(0, -40);
-	   break;
+            scrollBy(0, -40);
+            break;
        case 'a':
            changePage(currentPage - 1);
            break;
@@ -61,7 +61,7 @@ document.onkeypress = event => {
        // Next Image
        case ' ':
        case 's':
-	    scrollBy(0, 40);
+            scrollBy(0, 40);
            break;
        case 'd':
            changePage(currentPage + 1);
@@ -75,11 +75,13 @@ document.onkeydown = event =>{
            changePage(currentPage - 1);
            break;
        case 38: //up
+            changePage(currentPage - 1);
            break;
        case 39: //right
            changePage(currentPage + 1);
            break;
        case 40: //down
+            changePage(currentPage + 1);
            break;
    }
 };
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "nhentai"
-version = "0.5.20"
+version = "0.6.0-beta"
 description = "nhentai doujinshi downloader"
 authors = ["Ricter Z <ricterzheng@gmail.com>"]
 license = "MIT"
--- a/tests/test_download.py
+++ b/tests/test_download.py
@@ -1,14 +1,27 @@
 import unittest
 import os
+import zipfile
 import urllib3.exceptions

 from nhentai import constant
 from nhentai.cmdline import load_config
-from nhentai.downloader import Downloader
+from nhentai.downloader import Downloader, CompressedDownloader
 from nhentai.parser import doujinshi_parser
 from nhentai.doujinshi import Doujinshi
 from nhentai.utils import generate_html

+did = 440546
+
+def has_jepg_file(path):
+    with zipfile.ZipFile(path, 'r') as zf:
+        return '01.jpg' in zf.namelist()
+
+def is_zip_file(path):
+    try:
+        with zipfile.ZipFile(path, 'r') as _:
+            return True
+    except (zipfile.BadZipFile, FileNotFoundError):
+        return False

 class TestDownload(unittest.TestCase):
    def setUp(self) -> None:
@@ -17,17 +30,27 @@ class TestDownload(unittest.TestCase):
        constant.CONFIG['cookie'] = os.getenv('NHENTAI_COOKIE')
        constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA')

+        self.info = Doujinshi(**doujinshi_parser(did), name_format='%i')
+
    def test_download(self):
-        did = 440546
-        info = Doujinshi(**doujinshi_parser(did), name_format='%i')
+        info = self.info
        info.downloader = Downloader(path='/tmp', threads=5)
        info.download()

-        self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
+        self.assertTrue(os.path.exists(f'/tmp/{did}/01.jpg'))

        generate_html('/tmp', info)
        self.assertTrue(os.path.exists(f'/tmp/{did}/index.html'))

+    def test_zipfile_download(self):
+        info = self.info
+        info.downloader = CompressedDownloader(path='/tmp', threads=5)
+        info.download()
+
+        zipfile_path = f'/tmp/{did}.zip'
+        self.assertTrue(os.path.exists(zipfile_path))
+        self.assertTrue(is_zip_file(zipfile_path))
+        self.assertTrue(has_jepg_file(zipfile_path))

 if __name__ == '__main__':
    unittest.main()
Author	SHA1	Message	Date
Ricter Zheng	3be7c02458	Merge pull request #405 from luolili233/master Fix parsing bug in ComicInfo.xml, fixes #404	2025-05-29 21:36:52 +08:00
萝莉璃	fd1a40867e	Update serializer.py	2025-05-28 14:58:37 +08:00
Ricter Zheng	6752edfc9d	Merge pull request #402 from hzxjy1/zipTest Close zipfile hander manually and add a test	2025-03-26 22:57:29 +08:00
Ricter Zheng	9a5fcd7d23	Merge pull request #401 from hzxjy1/NoneType Fix none attributes in session headers	2025-03-26 22:56:11 +08:00
Hellagur4225	b4cc498a5f	add a test for zipfile download	2025-03-26 15:14:15 +08:00
Hellagur4225	a4eb7f3b5f	fix the uncontrollable zipfile closing function	2025-03-26 15:11:26 +08:00
Hellagur4225	36aa321ade	Fix none attributes in session headers	2025-03-24 10:13:42 +08:00
Ricter Zheng	aa84b57a43	use argparse, fix #396	2025-03-12 02:50:22 +08:00
ricterz	a3c70a0c30	fix #396	2025-03-11 22:23:17 +08:00
Ricter Zheng	86060ae0a6	Merge pull request #398 from hzxjy1/zipfile feat: add compress option	2025-03-11 22:04:09 +08:00
ricterz	9648c21b32	tiny fix of #397	2025-03-11 22:02:37 +08:00
Hellagur4225	625feb5d21	Remove unused option	2025-03-08 17:37:42 +08:00
Hellagur4225	6efbc73c10	feat: add compress option	2025-03-08 17:31:56 +08:00
ricterz	34c1ea8952	new feature #396	2025-02-28 18:59:32 +08:00
ricterz	2e895d8d0f	fix title #396	2025-02-28 18:24:56 +08:00
ricterz	0c9b92ce10	0.6.0-beta #394	2025-02-28 00:17:05 +08:00
ricterz	ca71a72747	fix #395	2025-02-27 22:07:40 +08:00
ricterz	1b7f19ee18	0.5.25, fix #393	2025-02-26 00:13:41 +08:00
ricterz	132f4c83da	Merge branch 'master' of github.com:RicterZ/nhentai	2025-02-26 00:12:49 +08:00
ricterz	6789b2b363	fix bug of cover.webp.webp	2025-02-25 23:51:13 +08:00
Ricter Zheng	a6ac725ca7	Merge pull request #392 from akakishi/master Update installation instructions in README.rst	2025-02-23 20:29:15 +08:00
akakishi	b32962bca4	Update README.rst File `setup.py` was removed in a previous commit; updated README to reflect the new installation process.	2025-02-23 01:18:54 -03:00
ricterz	8a7be0e33d	0.5.24	2025-02-09 20:16:44 +08:00
ricterz	0a47527461	optimize logger output #390	2025-02-09 20:15:17 +08:00
ricterz	023c8969eb	add global retry for search, download, fetch favorites	2025-02-09 20:02:52 +08:00
ricterz	29c3abbe5c	Merge branch 'master' of github.com:RicterZ/nhentai	2025-02-08 16:21:08 +08:00
ricterzheng	057fae8a83	0.5.23	2025-02-03 15:47:51 +08:00
ricterzheng	248d31edf0	get favorite count #386 even if not login	2025-02-03 15:45:39 +08:00
ricterzheng	4bfe0de078	0.5.22	2025-02-03 15:29:34 +08:00
ricterzheng	780a6c82b2	split metadata.json out from html generate function #386	2025-02-03 15:26:14 +08:00
ricterzheng	8791e7af55	update README to fix #367	2025-02-03 14:53:09 +08:00
ricterzheng	b434c4d58d	0.5.21	2025-02-03 14:34:14 +08:00
ricterz	ba59dcf4db	add up/down arrow	2025-01-16 22:40:53 +08:00