remove debug print

Merge pull request #348 from JustAHumanBean/webp
add webp support
2025-07-01 16:09:28 +02:00 · 2024-11-09 11:51:03 +08:00 · 2024-11-08 16:33:21 +08:00 · 2024-11-08 07:49:20 +00:00 · 2024-11-08 07:47:50 +00:00 · 2024-11-08 07:46:53 +00:00
11 changed files with 79 additions and 61 deletions
--- a/.gitignore
+++ b/.gitignore
@ -8,3 +8,4 @@ dist/
 output/
 venv/
 .vscode/
+test-output
--- a/README.rst
+++ b/README.rst
@ -161,25 +161,21 @@ Other options:
      NHENTAI                 nhentai mirror url

    Options:
-      # Operation options, control the program behaviors
      -h, --help            show this help message and exit
      -D, --download        download doujinshi (for search results)
      -S, --show            just show the doujinshi information
-
-      # Doujinshi options, specify id, keyword, etc.
      --id                  doujinshi ids set, e.g. 167680 167681 167682
      -s KEYWORD, --search=KEYWORD
                            search doujinshi by keyword
      -F, --favorites       list or download your favorites
-
-      # Page options, control the page to fetch / download
+      -a ARTIST, --artist=ARTIST
+                            list doujinshi by artist name
      --page-all            all search results
      --page=PAGE, --page-range=PAGE
                            page number of search results. e.g. 1,2-5,14
-      --sorting=SORTING     sorting of doujinshi (recent / popular /
+      --sorting=SORTING, --sort=SORTING
+                            sorting of doujinshi (recent / popular /
                            popular-[today|week])
-
-      # Download options, the output directory, threads, timeout, delay, etc.
      -o OUTPUT_DIR, --output=OUTPUT_DIR
                            output dir
      -t THREADS, --threads=THREADS
@ -192,8 +188,6 @@ Other options:
      -f FILE, --file=FILE  read gallery IDs from file.
      --format=NAME_FORMAT  format the saved folder name
      --dry-run             Dry run, skip file download
-
-      # Generate options, for generate html viewer, cbz file, pdf file, etc
      --html                generate a html viewer at current directory
      --no-html             don't generate HTML after downloading
      --gen-main            generate a main viewer contain all the doujin in the
@ -202,12 +196,10 @@ Other options:
      -P, --pdf             generate PDF file
      --rm-origin-dir       remove downloaded doujinshi dir when generated CBZ or
                            PDF file
-      --move-to-folder      remove files in doujinshi dir then move new file to folder 
-                            when generated CBZ or PDF file  
+      --move-to-folder      remove files in doujinshi dir then move new file to
+                            folder when generated CBZ or PDF file
      --meta                generate a metadata file in doujinshi format
-      --regenerate-cbz      regenerate the cbz file if exists
-
-      # nhentai options, set cookie, user-agent, language, remove caches, histories, etc
+      --regenerate          regenerate the cbz or pdf file if exists
      --cookie=COOKIE       set cookie of nhentai to bypass Cloudflare captcha
      --useragent=USERAGENT, --user-agent=USERAGENT
                            set useragent to bypass Cloudflare captcha
@ -231,6 +223,9 @@ For example:
 .. code-block::

    i.h.loli.club -> i.nhentai.net
+    i3.h.loli.club -> i3.nhentai.net
+    i5.h.loli.club -> i5.nhentai.net
+    i7.h.loli.club -> i7.nhentai.net
    h.loli.club -> nhentai.net

 Set `NHENTAI` env var to your nhentai mirror.
--- a/nhentai/init.py
+++ b/nhentai/init.py
@ -1,3 +1,3 @@
-__version__ = '0.5.8'
+__version__ = '0.5.12'
 __author__ = 'RicterZ'
 __email__ = 'ricterzheng@gmail.com'
--- a/nhentai/command.py
+++ b/nhentai/command.py
@ -1,4 +1,6 @@
 # coding: utf-8
+import os
+import shutil
 import sys
 import signal
 import platform
@ -12,7 +14,7 @@ from nhentai.downloader import Downloader
 from nhentai.logger import logger
 from nhentai.constant import BASE_URL
 from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
-    paging, check_cookie, signal_handler, DB
+    paging, check_cookie, signal_handler, DB, move_to_folder


 def main():
@ -92,6 +94,7 @@ def main():
                    doujinshi.download()
                else:
                    logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
+                    continue

            if options.generate_metadata:
                generate_metadata_file(options.output_dir, doujinshi)
@ -104,12 +107,22 @@ def main():
                generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])

            if options.is_cbz:
-                generate_doc('cbz', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
-                             options.regenerate)
+                generate_doc('cbz', options.output_dir, doujinshi, options.regenerate)

            if options.is_pdf:
-                generate_doc('pdf', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
-                             options.regenerate)
+                generate_doc('pdf', options.output_dir, doujinshi, options.regenerate)
+
+            if options.move_to_folder:
+                if options.is_cbz:
+                    move_to_folder(options.output_dir, doujinshi, 'cbz')
+                if options.is_pdf:
+                    move_to_folder(options.output_dir, doujinshi, 'pdf')
+
+            if options.rm_origin_dir:
+                if options.move_to_folder:
+                    logger.critical('You specified both --move-to-folder and --rm-origin-dir options, '
+                                    'you will not get anything :(')
+                shutil.rmtree(os.path.join(options.output_dir, doujinshi.filename), ignore_errors=True)

        if options.main_viewer:
            generate_main_html(options.output_dir)
--- a/nhentai/constant.py
+++ b/nhentai/constant.py
@ -38,9 +38,9 @@ FAV_URL = f'{BASE_URL}/favorites/'

 IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
 IMAGE_URL_MIRRORS = [
-    f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}'
-    f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}'
-    f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}'
+    f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
+    f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
+    f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
 ]

 NHENTAI_HOME = get_nhentai_home()
--- a/nhentai/doujinshi.py
+++ b/nhentai/doujinshi.py
@ -12,6 +12,7 @@ EXT_MAP = {
    'j': 'jpg',
    'p': 'png',
    'g': 'gif',
+    'w': 'webp',
 }


@ -75,26 +76,28 @@ class Doujinshi(object):
    def check_if_need_download(self, options):
        base_path = os.path.join(self.downloader.path, self.filename)

-        # doujinshi directory is not exist, we need to download definitely
-        if not (os.path.exists(base_path) and os.path.isdir(base_path)):
-            return True
-
-        # regenerate, we need to re-download from nhentai
+        # regenerate, re-download
        if options.regenerate:
            return True

+        # pdf or cbz file exists, skip re-download
+        # doujinshi directory may not exist b/c of --rm-origin-dir option set.
+        # user should pass --regenerate option to get back origin dir.
+        ret_pdf = ret_cbz = None
        if options.is_pdf:
-            file_ext = 'pdf'
-        elif options.is_cbz:
-            file_ext = 'cbz'
-        else:
-            # re-download
-            return True
+            ret_pdf = os.path.exists(f'{base_path}.pdf') or os.path.exists(f'{base_path}/{self.filename}.pdf')

-        # pdf or cbz file exists, we needn't to re-download it
-        if os.path.exists(f'{base_path}.{file_ext}') or os.path.exists(f'{base_path}/{self.filename}.{file_ext}'):
+        if options.is_cbz:
+            ret_cbz = os.path.exists(f'{base_path}.cbz') or os.path.exists(f'{base_path}/{self.filename}.cbz')
+
+        ret = list(filter(lambda s: s is not None, [ret_cbz, ret_pdf]))
+        if ret and all(ret):
            return False

+        # doujinshi directory doesn't exist, re-download
+        if not (os.path.exists(base_path) and os.path.isdir(base_path)):
+            return True
+
        # fallback
        return True

--- a/nhentai/downloader.py
+++ b/nhentai/downloader.py
@ -69,7 +69,7 @@ class Downloader(Singleton):
                        if response.status_code != 200:
                            path = urlparse(url).path
                            for mirror in constant.IMAGE_URL_MIRRORS:
-                                print(f'{mirror}{path}')
+                                # print(f'{mirror}{path}')
                                mirror_url = f'{mirror}{path}'
                                response = request('get', mirror_url, stream=True,
                                                   timeout=self.timeout, proxies=proxy)
--- a/nhentai/parser.py
+++ b/nhentai/parser.py
@ -148,7 +148,7 @@ def doujinshi_parser(id_, counter=0):
    doujinshi['subtitle'] = subtitle.text if subtitle else ''

    doujinshi_cover = html.find('div', attrs={'id': 'cover'})
-    img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif)$',
+    img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
                       doujinshi_cover.a.img.attrs['data-src'])

    ext = []
--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@ -72,8 +72,8 @@ def parse_doujinshi_obj(
        doujinshi_obj=None,
        file_type: str = ''
 ) -> Tuple[str, str]:
-    filename = './doujinshi' + file_type

+    filename = f'./doujinshi.{file_type}'
    doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
    if doujinshi_obj is not None:
        _filename = f'{doujinshi_obj.filename}.{file_type}'
@ -104,7 +104,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
    file_list.sort()

    for image in file_list:
-        if not os.path.splitext(image)[1] in ('.jpg', '.png'):
+        if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'):
            continue
        image_html += f'<img src="{image}" class="image-item"/>\n'

@ -128,6 +128,27 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
        logger.warning(f'Writing HTML Viewer failed ({e})')


+def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
+    if not file_type:
+        raise RuntimeError('no file_type specified')
+
+    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
+
+    for fn in os.listdir(doujinshi_dir):
+        file_path = os.path.join(doujinshi_dir, fn)
+        _, ext = os.path.splitext(file_path)
+        if ext in ['.pdf', '.cbz']:
+            continue
+
+        if os.path.isfile(file_path):
+            try:
+                os.remove(file_path)
+            except Exception as e:
+                print(f"Error deleting file: {e}")
+
+    shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
+
+
 def generate_main_html(output_dir='./'):
    """
    Generate a main html to show all the contains doujinshi.
@ -185,8 +206,7 @@ def generate_main_html(output_dir='./'):
        logger.warning(f'Writing Main Viewer failed ({e})')


-def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir=False,
-                 move_to_folder=False, regenerate=False):
+def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):

    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)

@ -210,7 +230,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir
            import img2pdf

            """Write images to a PDF file using img2pdf."""
-            file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]
+            file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))]
            file_list.sort()

            logger.info(f'Writing PDF file to path: {filename}')
@ -225,20 +245,6 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir
        except ImportError:
            logger.error("Please install img2pdf package by using pip.")

-    if rm_origin_dir:
-        shutil.rmtree(doujinshi_dir, ignore_errors=True)
-
-    if move_to_folder:
-        for filename in os.listdir(doujinshi_dir):
-            file_path = os.path.join(doujinshi_dir, filename)
-            if os.path.isfile(file_path):
-                try:
-                    os.remove(file_path)
-                except Exception as e:
-                    print(f"Error deleting file: {e}")
-
-        shutil.move(filename, doujinshi_dir)
-

 def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
    """
@ -303,7 +309,7 @@ def generate_metadata_file(output_dir, doujinshi_obj):
              'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
              'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
              'SERIES', 'PARODY', 'URL']
-    special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
+    special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'DATE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
                      'LANGUAGE', 'TAGS', 'URL', 'PAGES']

    for i in range(len(fields)):
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "nhentai"
-version = "0.5.8"
+version = "0.5.12"
 description = "nhentai doujinshi downloader"
 authors = ["Ricter Z <ricterzheng@gmail.com>"]
 license = "MIT"
--- a/setup.cfg
+++ b/setup.cfg
@ -1,3 +1,3 @@
 [metadata]
-description-file = README.rst
+description_file = README.rst
Author	SHA1	Message	Date
ricterz	cac07a517e	remove debug print	2024-11-09 11:51:03 +08:00
Ricter Zheng	f30ff59b2b	Merge pull request #348 from JustAHumanBean/webp add webp support	2024-11-08 16:33:21 +08:00
JustAHumanBean	1504ee779f	Update utils.py	2024-11-08 07:49:20 +00:00
JustAHumanBean	98d9eecf6d	Update parser.py	2024-11-08 07:47:50 +00:00
JustAHumanBean	e16e623b9d	Update doujinshi.py	2024-11-08 07:46:53 +00:00
ricterzheng	c3f3182df3	0.5.12	2024-10-01 22:55:01 +09:00
ricterzheng	12aad842f8	fix #347	2024-10-01 22:42:26 +09:00
ricterzheng	f9f76ab0f5	0.5.11	2024-10-01 12:48:28 +09:00
ricterzheng	744a9e4418	Merge branch 'master' of github.com:RicterZ/nhentai	2024-10-01 12:47:48 +09:00
ricterzheng	c3e9fff491	fix bug #345	2024-10-01 12:47:13 +09:00
ricterzheng	a84e2c5714	fix bug #341	2024-10-01 12:47:10 +09:00
ricterzheng	c814c35c50	fix bug #341	2024-10-01 12:39:28 +09:00
ricterz	e2f71437e2	fix setuptools warning	2024-09-22 16:37:49 +08:00
ricterz	2fa45ae4df	0.5.10	2024-09-22 16:36:50 +08:00
ricterz	17bc33c6cb	fix arguments pass issue #344	2024-09-22 16:34:53 +08:00
ricterz	09bb8460f6	fix overwrite issue #344	2024-09-22 16:32:01 +08:00
normalizedwater546	eb5b93d654	fix: pdf/cbz file already exists, but download process continues	2024-09-22 07:33:52 +00:00
normalizedwater546	cb6cf6df1a	regression: pdf/cbz file already exists, but origin files are downloaded anyways. - call download with `--cbz --rm-origin-dir`, and run command twice. - user should pass `--regenerate` option to get back origin dir.	2024-09-22 07:24:16 +00:00
ricterz	98a66a3cb0	0.5.9	2024-09-22 15:09:36 +08:00
ricterz	02d47632cf	fix bug of move-to-dir	2024-09-22 15:07:53 +08:00
ricterz	f932b1fbbe	update README: mirror setup	2024-09-22 14:45:07 +08:00
ricterz	fd9e92f9d4	update README	2024-09-22 14:44:42 +08:00
Ricter Zheng	a8a48c6ce7	Merge pull request #343 from RicterZ/pull-342 improve #342	2024-09-22 14:42:32 +08:00