0.5.16

fix #359
2025-07-02 00:19:29 +02:00 · 2024-12-08 12:32:10 +08:00 · 2024-12-08 12:31:58 +08:00
15 changed files with 147 additions and 113 deletions
--- a/2
+++ b/2
@ -5,7 +5,7 @@ COPY requirements.txt ./
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
-RUN pip install .
+RUN python setup.py install
 WORKDIR /output
 ENTRYPOINT ["nhentai"]
--- a/README.rst
+++ b/README.rst
@ -59,7 +59,7 @@ On Gentoo Linux:
 .. code-block::
-    layman -fa glibOne
+    layman -fa glicOne
    sudo emerge net-misc/nhentai
 On NixOS:
@ -129,7 +129,7 @@ Download your favorites with delay:
 .. code-block:: bash
-    nhentai --favorites --download --delay 1 --page 3-5,7
+    nhentai --favorites --download --delay 1
 Format output doujinshi folder name:
--- a/nhentai/init.py
+++ b/nhentai/init.py
@ -1,3 +1,3 @@
-__version__ = '0.5.19'
+__version__ = '0.5.16'
 __author__ = 'RicterZ'
 __email__ = 'ricterzheng@gmail.com'
--- a/nhentai/cmdline.py
+++ b/nhentai/cmdline.py
@ -11,7 +11,6 @@ from optparse import OptionParser
 from nhentai import __version__
 from nhentai.utils import generate_html, generate_main_html, DB
 from nhentai.logger import logger
 from nhentai.constant import PATH_SEPARATOR
 def banner():
@ -38,7 +37,7 @@ def write_config():
        f.write(json.dumps(constant.CONFIG))
-def callback(option, _opt_str, _value, parser):
+def callback(option, opt_str, value, parser):
    if option == '--id':
        pass
    value = []
@ -65,8 +64,7 @@ def cmd_parser():
    # operation options
    parser.add_option('--download', '-D', dest='is_download', action='store_true',
                      help='download doujinshi (for search results)')
-    parser.add_option('--show', '-S', dest='is_show', action='store_true',
+    parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information')
                      help='just show the doujinshi information')
    # doujinshi options
    parser.add_option('--id', dest='id', action='callback', callback=callback,
@ -81,15 +79,14 @@ def cmd_parser():
    # page options
    parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
                      help='all search results')
-    parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
+    parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='1',
                      help='page number of search results. e.g. 1,2-5,14')
    parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
                      help='sorting of doujinshi (recent / popular / popular-[today|week])',
                      choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
    # download options
-    parser.add_option('--output', '-o', type='string', dest='output_dir', action='store',
+    parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='./',
                      default=f'.{PATH_SEPARATOR}',
                      help='output dir')
    parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
                      help='thread count for downloading doujinshi')
@ -99,8 +96,7 @@ def cmd_parser():
                      help='slow down between downloading every doujinshi')
    parser.add_option('--proxy', type='string', dest='proxy', action='store',
                      help='store a proxy, for example: -p "http://127.0.0.1:1080"')
-    parser.add_option('--file', '-f', type='string', dest='file', action='store',
+    parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.')
                      help='read gallery IDs from file.')
    parser.add_option('--format', type='string', dest='name_format', action='store',
                      help='format the saved folder name', default='[%i][%a][%t]')
    parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
--- a/nhentai/command.py
+++ b/nhentai/command.py
@ -48,7 +48,7 @@ def main():
        if not options.is_download:
            logger.warning('You do not specify --download option')
-        doujinshis = favorites_parser(page=page_list) if options.page else favorites_parser()
+        doujinshis = favorites_parser() if options.page_all else favorites_parser(page=page_list)
    elif options.keyword:
        if constant.CONFIG['language']:
--- a/nhentai/constant.py
+++ b/nhentai/constant.py
@ -35,16 +35,11 @@ LOGIN_URL = f'{BASE_URL}/login/'
 CHALLENGE_URL = f'{BASE_URL}/challenge'
 FAV_URL = f'{BASE_URL}/favorites/'
 PATH_SEPARATOR = os.path.sep
-
+IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
 IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
 IMAGE_URL_MIRRORS = [
    f'{urlparse(BASE_URL).scheme}://i2.{urlparse(BASE_URL).hostname}',
    f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
    f'{urlparse(BASE_URL).scheme}://i4.{urlparse(BASE_URL).hostname}',
    f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
    f'{urlparse(BASE_URL).scheme}://i6.{urlparse(BASE_URL).hostname}',
    f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
 ]
--- a/nhentai/doujinshi.py
+++ b/nhentai/doujinshi.py
@ -57,7 +57,7 @@ class Doujinshi(object):
        self.table = [
            ['Parodies', self.info.parodies],
-            ['Title', self.name],
+            ['Doujinshi', self.name],
            ['Subtitle', self.info.subtitle],
            ['Date', self.info.date],
            ['Characters', self.info.characters],
@ -65,7 +65,7 @@ class Doujinshi(object):
            ['Groups', self.info.groups],
            ['Languages', self.info.languages],
            ['Tags', self.info.tags],
-            ['Favorite Counts', self.favorite_counts],
+            ['Favorite Counts', self.info.favorite_counts],
            ['URL', self.url],
            ['Pages', self.pages],
        ]
--- a/nhentai/downloader.py
+++ b/nhentai/downloader.py
@ -13,7 +13,6 @@ from nhentai.utils import Singleton, async_request
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 class NHentaiImageNotExistException(Exception):
    pass
@ -33,40 +32,35 @@ def download_callback(result):
        logger.log(16, f'{data} downloaded successfully')
 class Downloader(Singleton):
    def __init__(self, path='', threads=5, timeout=30, delay=0):
        self.threads = threads
        self.path = str(path)
        self.timeout = timeout
        self.delay = delay
        self.folder = None
        self.semaphore = None
    async def fiber(self, tasks):
        self.semaphore = asyncio.Semaphore(self.threads)
        for completed_task in asyncio.as_completed(tasks):
            try:
                result = await completed_task
                if result[1]:
                logger.info(f'{result[1]} download completed')
                else:
                    logger.warning(f'{result[1]} download failed, return value {result[0]}')
            except Exception as e:
                logger.error(f'An error occurred: {e}')
    async def _semaphore_download(self, *args, **kwargs):
        async with self.semaphore:
            return await self.download(*args, **kwargs)
-    async def download(self, url, folder='', filename='', retried=0, proxy=None, length=0):
+    async def download(self, url, folder='', filename='', retried=0, proxy=None):
        logger.info(f'Starting to download {url} ...')
        if self.delay:
            await asyncio.sleep(self.delay)
        filename = filename if filename else os.path.basename(urlparse(url).path)
        base_filename, extension = os.path.splitext(filename)
        filename = base_filename.zfill(length) + extension
        save_file_path = os.path.join(self.folder, filename)
@ -88,11 +82,11 @@ class Downloader(Singleton):
            if not await self.save(filename, response):
                logger.error(f'Can not download image {url}')
-                return 1, url
+                return 1, None
        except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
            if retried < 3:
-                logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
+                logger.info(f'Download {filename} failed, retrying({retried + 1}) times...')
                return await self.download(
                    url=url,
                    folder=folder,
@ -101,8 +95,7 @@ class Downloader(Singleton):
                    proxy=proxy,
                )
            else:
-                logger.warning(f'Download {filename} failed with 3 times retried, skipped')
+                return 0, None
                return 0, url
        except NHentaiImageNotExistException as e:
            os.remove(save_file_path)
@ -114,10 +107,10 @@ class Downloader(Singleton):
            logger.error(f"Exception type: {type(e)}")
            traceback.print_stack()
            logger.critical(str(e))
-            return 0, url
+            return 0, None
        except KeyboardInterrupt:
-            return -3, url
+            return -3, None
        return 1, url
@ -136,6 +129,7 @@ class Downloader(Singleton):
                        f.write(chunk)
        return True
    def start_download(self, queue, folder='') -> bool:
        if not isinstance(folder, (str,)):
            folder = str(folder)
@ -155,10 +149,9 @@ class Downloader(Singleton):
            # Assuming we want to continue with rest of process.
            return True
-        digit_length = len(str(len(queue)))
+
        logger.info(f'Total download pages: {len(queue)}')
        coroutines = [
-            self._semaphore_download(url, filename=os.path.basename(urlparse(url).path), length=digit_length)
+            self._semaphore_download(url, filename=os.path.basename(urlparse(url).path))
            for url in queue
        ]
--- a/nhentai/parser.py
+++ b/nhentai/parser.py
@ -135,36 +135,37 @@ def doujinshi_parser(id_, counter=0):
        logger.warning(f'Error: {e}, ignored')
        return None
    # print(response)
    html = BeautifulSoup(response, 'html.parser')
    doujinshi_info = html.find('div', attrs={'id': 'info'})
    title = doujinshi_info.find('h1').text
    pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
    subtitle = doujinshi_info.find('h2')
-    favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')
+    favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')\
    if favorite_counts:
        favorite_counts = favorite_counts.text.strip()
    else:
        favorite_counts = 0
    doujinshi['name'] = title
    doujinshi['pretty_name'] = pretty_name
    doujinshi['subtitle'] = subtitle.text if subtitle else ''
-    doujinshi['favorite_counts'] = int(favorite_counts.text.strip()) if favorite_counts else 0
+    doujinshi['favorite_counts'] = favorite_counts
    doujinshi_cover = html.find('div', attrs={'id': 'cover'})
-    # img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
+    img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
-    #                   doujinshi_cover.a.img.attrs['data-src'])
+                       doujinshi_cover.a.img.attrs['data-src'])
    img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
    ext = []
    for i in html.find_all('div', attrs={'class': 'thumb-container'}):
-        base_name = os.path.basename(i.img.attrs['data-src'])
+        _, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1)
-        ext_name = base_name.split('.')
+        ext.append(ext_name)
        if len(ext_name) == 3:
            ext.append(ext_name[1])
        else:
            ext.append(ext_name[-1])
    if not img_id:
-        logger.critical(f'Tried yo get image id failed of id: {id_}')
+        logger.critical('Tried yo get image id failed')
-        return None
+        sys.exit(1)
    doujinshi['img_id'] = img_id.group(1)
    doujinshi['ext'] = ext
@ -191,6 +192,53 @@ def doujinshi_parser(id_, counter=0):
    return doujinshi
 def legacy_doujinshi_parser(id_):
    if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
        raise Exception(f'Doujinshi id({id_}) is not valid')
    id_ = int(id_)
    logger.info(f'Fetching information of doujinshi id {id_}')
    doujinshi = dict()
    doujinshi['id'] = id_
    url = f'{constant.DETAIL_URL}/{id_}'
    i = 0
    while 5 > i:
        try:
            response = request('get', url).json()
        except Exception as e:
            i += 1
            if not i < 5:
                logger.critical(str(e))
                sys.exit(1)
            continue
        break
    doujinshi['name'] = response['title']['english']
    doujinshi['subtitle'] = response['title']['japanese']
    doujinshi['img_id'] = response['media_id']
    doujinshi['ext'] = ''.join([i['t'] for i in response['images']['pages']])
    doujinshi['pages'] = len(response['images']['pages'])
    # gain information of the doujinshi
    needed_fields = ['character', 'artist', 'language', 'tag', 'parody', 'group', 'category']
    for tag in response['tags']:
        tag_type = tag['type']
        if tag_type in needed_fields:
            if tag_type == 'tag':
                if tag_type not in doujinshi:
                    doujinshi[tag_type] = {}
                tag['name'] = tag['name'].replace(' ', '-')
                tag['name'] = tag['name'].lower()
                doujinshi[tag_type][tag['name']] = tag['id']
            elif tag_type not in doujinshi:
                doujinshi[tag_type] = tag['name']
            else:
                doujinshi[tag_type] += ', ' + tag['name']
    return doujinshi
 def print_doujinshi(doujinshi_list):
    if not doujinshi_list:
        return
--- a/nhentai/serializer.py
+++ b/nhentai/serializer.py
@ -1,8 +1,6 @@
 # coding: utf-8
 import json
 import os
 from nhentai.constant import PATH_SEPARATOR
 from xml.sax.saxutils import escape
 from nhentai.constant import LANGUAGE_ISO
@ -84,7 +82,7 @@ def xml_write_simple_tag(f, name, val, indent=1):
 def merge_json():
    lst = []
-    output_dir = f".{PATH_SEPARATOR}"
+    output_dir = "./"
    os.chdir(output_dir)
    doujinshi_dirs = next(os.walk('.'))[1]
    for folder in doujinshi_dirs:
--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@ -5,21 +5,19 @@ import re
 import os
 import zipfile
 import shutil
 import copy
 import httpx
 import requests
 import sqlite3
 import urllib.parse
-from typing import Tuple
+from typing import Optional, Tuple
 from requests.structures import CaseInsensitiveDict
 from nhentai import constant
 from nhentai.constant import PATH_SEPARATOR
 from nhentai.logger import logger
 from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
 MAX_FIELD_LENGTH = 100
 EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
 def request(method, url, **kwargs):
@ -101,9 +99,9 @@ def parse_doujinshi_obj(
        file_type: str = ''
 ) -> Tuple[str, str]:
-    filename = f'.{PATH_SEPARATOR}doujinshi.{file_type}'
+    filename = f'./doujinshi.{file_type}'
    if doujinshi_obj is not None:
    doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
    if doujinshi_obj is not None:
        _filename = f'{doujinshi_obj.filename}.{file_type}'
        if file_type == 'cbz':
@ -113,8 +111,6 @@ def parse_doujinshi_obj(
            _filename = _filename.replace('/', '-')
        filename = os.path.join(output_dir, _filename)
    else:
        doujinshi_dir = f'.{PATH_SEPARATOR}'
    return doujinshi_dir, filename
@ -134,7 +130,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
    file_list.sort()
    for image in file_list:
-        if not os.path.splitext(image)[1] in EXTENSIONS:
+        if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'):
            continue
        image_html += f'<img src="{image}" class="image-item"/>\n'
@ -179,7 +175,7 @@ def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
    shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
-def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
+def generate_main_html(output_dir='./'):
    """
    Generate a main html to show all the contains doujinshi.
    With a link to their `index.html`.
@ -260,7 +256,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
            import img2pdf
            """Write images to a PDF file using img2pdf."""
-            file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(EXTENSIONS)]
+            file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))]
            file_list.sort()
            logger.info(f'Writing PDF file to path: {filename}')
@ -281,7 +277,7 @@ def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
    It used to be a whitelist approach allowed only alphabet and a part of symbols.
    but most doujinshi's names include Japanese 2-byte characters and these was rejected.
    so it is using blacklist approach now.
-    if filename include forbidden characters (\'/:,;*?"<>|) ,it replaces space character(" ").
+    if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' '). 
    """
    # maybe you can use `--format` to select a suitable filename
@ -304,7 +300,7 @@ def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
    return filename
-def signal_handler(_signal, _frame):
+def signal_handler(signal, frame):
    logger.error('Ctrl-C signal received. Stopping...')
    sys.exit(1)
@ -312,8 +308,7 @@ def signal_handler(_signal, _frame):
 def paging(page_string):
    # 1,3-5,14 -> [1, 3, 4, 5, 14]
    if not page_string:
-        # default, the first page
+        return []
        return [1]
    page_list = []
    for i in page_string.split(','):
@ -338,16 +333,16 @@ def generate_metadata_file(output_dir, doujinshi_obj):
    fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
              'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
-              'TAGS',  'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
+              'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
              'SERIES', 'PARODY', 'URL']
    special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'DATE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
                      'LANGUAGE', 'TAGS', 'URL', 'PAGES']
-    temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
+    for i in range(len(fields)):
-    for i in fields:
+        f.write(f'{fields[i]}: ')
-        v = temp_dict.get(i)
+        if fields[i] in special_fields:
-        v = temp_dict.get(f'{i}s') if v is None else v
+            f.write(str(doujinshi_obj.table[special_fields.index(fields[i])][1]))
-        v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
+        f.write('\n')
        v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
        f.write(f'{i}: {v}\n')
    f.close()
    logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "nhentai"
-version = "0.5.19"
+version = "0.5.15"
 description = "nhentai doujinshi downloader"
 authors = ["Ricter Z <ricterzheng@gmail.com>"]
 license = "MIT"
@ -20,6 +20,3 @@ httpx = "0.27.2"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 [tool.poetry.scripts]
 nhentai = 'nhentai.command:main'
--- a/qodana.yaml
+++ b/qodana.yaml
@ -1,29 +0,0 @@
 #-------------------------------------------------------------------------------#
 #               Qodana analysis is configured by qodana.yaml file               #
 #             https://www.jetbrains.com/help/qodana/qodana-yaml.html            #
 #-------------------------------------------------------------------------------#
 version: "1.0"
 #Specify inspection profile for code analysis
 profile:
  name: qodana.starter
 #Enable inspections
 #include:
 #  - name: <SomeEnabledInspectionId>
 #Disable inspections
 #exclude:
 #  - name: <SomeDisabledInspectionId>
 #    paths:
 #      - <path/where/not/run/inspection>
 #Execute shell command before Qodana execution (Applied in CI/CD pipeline)
 #bootstrap: sh ./prepare-qodana.sh
 #Install IDE plugins before Qodana execution (Applied in CI/CD pipeline)
 #plugins:
 #  - id: <plugin.id> #(plugin id can be found at https://plugins.jetbrains.com)
 #Specify Qodana linter for analysis (Applied in CI/CD pipeline)
 linter: jetbrains/qodana-python:2024.3
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,3 @@
 [metadata]
 description_file = README.rst
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,38 @@
 # coding: utf-8
 import codecs
 from setuptools import setup, find_packages
 from nhentai import __version__, __author__, __email__
 with open('requirements.txt') as f:
    requirements = [l for l in f.read().splitlines() if l]
 def long_description():
    with codecs.open('README.rst', 'rb') as readme:
        return readme.read().decode('utf-8')
 setup(
    name='nhentai',
    version=__version__,
    packages=find_packages(),
    author=__author__,
    author_email=__email__,
    keywords=['nhentai', 'doujinshi', 'downloader'],
    description='nhentai.net doujinshis downloader',
    long_description=long_description(),
    url='https://github.com/RicterZ/nhentai',
    download_url='https://github.com/RicterZ/nhentai/tarball/master',
    include_package_data=True,
    zip_safe=False,
    install_requires=requirements,
    entry_points={
        'console_scripts': [
            'nhentai = nhentai.command:main',
        ]
    },
    license='MIT',
 )
Author	SHA1	Message	Date
ricterzheng	405d879db6	0.5.16	2024-12-08 12:32:10 +08:00
ricterzheng	41342a6da0	fix #359	2024-12-08 12:31:58 +08:00