0.5.19

Merge pull request #373 from nicojust/fix-favorite-metadata-output
fix favorite_counts output in metadata
2025-07-01 16:09:28 +02:00 · 2025-01-15 19:47:24 +08:00 · 2025-01-15 12:26:24 +08:00 · 2025-01-15 12:26:01 +08:00 · 2025-01-13 19:51:40 +01:00 · 2025-01-13 19:51:04 +01:00
11 changed files with 57 additions and 33 deletions
--- a/README.rst
+++ b/README.rst
@ -129,7 +129,7 @@ Download your favorites with delay:

 .. code-block:: bash

-    nhentai --favorites --download --delay 1
+    nhentai --favorites --download --delay 1 --page 3-5,7

 Format output doujinshi folder name:

--- a/nhentai/init.py
+++ b/nhentai/init.py
@ -1,3 +1,3 @@
-__version__ = '0.5.17.2'
+__version__ = '0.5.19'
 __author__ = 'RicterZ'
 __email__ = 'ricterzheng@gmail.com'
--- a/nhentai/cmdline.py
+++ b/nhentai/cmdline.py
@ -11,6 +11,7 @@ from optparse import OptionParser
 from nhentai import __version__
 from nhentai.utils import generate_html, generate_main_html, DB
 from nhentai.logger import logger
+from nhentai.constant import PATH_SEPARATOR


 def banner():
@ -64,7 +65,8 @@ def cmd_parser():
    # operation options
    parser.add_option('--download', '-D', dest='is_download', action='store_true',
                      help='download doujinshi (for search results)')
-    parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information')
+    parser.add_option('--show', '-S', dest='is_show', action='store_true',
+                      help='just show the doujinshi information')

    # doujinshi options
    parser.add_option('--id', dest='id', action='callback', callback=callback,
@ -79,14 +81,15 @@ def cmd_parser():
    # page options
    parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
                      help='all search results')
-    parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='1',
+    parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
                      help='page number of search results. e.g. 1,2-5,14')
    parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
                      help='sorting of doujinshi (recent / popular / popular-[today|week])',
                      choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])

    # download options
-    parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='./',
+    parser.add_option('--output', '-o', type='string', dest='output_dir', action='store',
+                      default=f'.{PATH_SEPARATOR}',
                      help='output dir')
    parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
                      help='thread count for downloading doujinshi')
@ -96,7 +99,8 @@ def cmd_parser():
                      help='slow down between downloading every doujinshi')
    parser.add_option('--proxy', type='string', dest='proxy', action='store',
                      help='store a proxy, for example: -p "http://127.0.0.1:1080"')
-    parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.')
+    parser.add_option('--file', '-f', type='string', dest='file', action='store',
+                      help='read gallery IDs from file.')
    parser.add_option('--format', type='string', dest='name_format', action='store',
                      help='format the saved folder name', default='[%i][%a][%t]')
    parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
--- a/nhentai/command.py
+++ b/nhentai/command.py
@ -48,7 +48,7 @@ def main():
        if not options.is_download:
            logger.warning('You do not specify --download option')

-        doujinshis = favorites_parser() if options.page_all else favorites_parser(page=page_list)
+        doujinshis = favorites_parser(page=page_list) if options.page else favorites_parser()

    elif options.keyword:
        if constant.CONFIG['language']:
--- a/nhentai/constant.py
+++ b/nhentai/constant.py
@ -35,11 +35,16 @@ LOGIN_URL = f'{BASE_URL}/login/'
 CHALLENGE_URL = f'{BASE_URL}/challenge'
 FAV_URL = f'{BASE_URL}/favorites/'

+PATH_SEPARATOR = os.path.sep

-IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
+
+IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
 IMAGE_URL_MIRRORS = [
+    f'{urlparse(BASE_URL).scheme}://i2.{urlparse(BASE_URL).hostname}',
    f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
+    f'{urlparse(BASE_URL).scheme}://i4.{urlparse(BASE_URL).hostname}',
    f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
+    f'{urlparse(BASE_URL).scheme}://i6.{urlparse(BASE_URL).hostname}',
    f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
 ]

--- a/nhentai/doujinshi.py
+++ b/nhentai/doujinshi.py
@ -65,7 +65,7 @@ class Doujinshi(object):
            ['Groups', self.info.groups],
            ['Languages', self.info.languages],
            ['Tags', self.info.tags],
-            ['Favorite Counts', self.info.favorite_counts],
+            ['Favorite Counts', self.favorite_counts],
            ['URL', self.url],
            ['Pages', self.pages],
        ]
--- a/nhentai/downloader.py
+++ b/nhentai/downloader.py
@ -47,7 +47,10 @@ class Downloader(Singleton):
        for completed_task in asyncio.as_completed(tasks):
            try:
                result = await completed_task
-                logger.info(f'{result[1]} download completed')
+                if result[1]:
+                    logger.info(f'{result[1]} download completed')
+                else:
+                    logger.warning(f'{result[1]} download failed, return value {result[0]}')
            except Exception as e:
                logger.error(f'An error occurred: {e}')

@ -85,11 +88,11 @@ class Downloader(Singleton):

            if not await self.save(filename, response):
                logger.error(f'Can not download image {url}')
-                return 1, None
+                return 1, url

        except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
            if retried < 3:
-                logger.info(f'Download {filename} failed, retrying({retried + 1}) times...')
+                logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
                return await self.download(
                    url=url,
                    folder=folder,
@ -98,7 +101,8 @@ class Downloader(Singleton):
                    proxy=proxy,
                )
            else:
-                return 0, None
+                logger.warning(f'Download {filename} failed with 3 times retried, skipped')
+                return 0, url

        except NHentaiImageNotExistException as e:
            os.remove(save_file_path)
@ -110,10 +114,10 @@ class Downloader(Singleton):
            logger.error(f"Exception type: {type(e)}")
            traceback.print_stack()
            logger.critical(str(e))
-            return 0, None
+            return 0, url

        except KeyboardInterrupt:
-            return -3, None
+            return -3, url

        return 1, url

@ -152,6 +156,7 @@ class Downloader(Singleton):
            return True

        digit_length = len(str(len(queue)))
+        logger.info(f'Total download pages: {len(queue)}')
        coroutines = [
            self._semaphore_download(url, filename=os.path.basename(urlparse(url).path), length=digit_length)
            for url in queue
--- a/nhentai/parser.py
+++ b/nhentai/parser.py
@ -141,23 +141,26 @@ def doujinshi_parser(id_, counter=0):
    title = doujinshi_info.find('h1').text
    pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
    subtitle = doujinshi_info.find('h2')
-    favorite_counts = str(doujinshi_info.find('span', class_='nobold').find('span', class_='count'))
-    if favorite_counts is None:
-        favorite_counts = '0'
+    favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')

    doujinshi['name'] = title
    doujinshi['pretty_name'] = pretty_name
    doujinshi['subtitle'] = subtitle.text if subtitle else ''
-    doujinshi['favorite_counts'] = favorite_counts.strip()
+    doujinshi['favorite_counts'] = int(favorite_counts.text.strip()) if favorite_counts else 0

    doujinshi_cover = html.find('div', attrs={'id': 'cover'})
-    img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
-                       doujinshi_cover.a.img.attrs['data-src'])
+    # img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
+    #                   doujinshi_cover.a.img.attrs['data-src'])
+    img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])

    ext = []
    for i in html.find_all('div', attrs={'class': 'thumb-container'}):
-        _, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1)
-        ext.append(ext_name)
+        base_name = os.path.basename(i.img.attrs['data-src'])
+        ext_name = base_name.split('.')
+        if len(ext_name) == 3:
+            ext.append(ext_name[1])
+        else:
+            ext.append(ext_name[-1])

    if not img_id:
        logger.critical(f'Tried yo get image id failed of id: {id_}')
--- a/nhentai/serializer.py
+++ b/nhentai/serializer.py
@ -1,6 +1,8 @@
 # coding: utf-8
 import json
 import os
+
+from nhentai.constant import PATH_SEPARATOR
 from xml.sax.saxutils import escape
 from nhentai.constant import LANGUAGE_ISO

@ -82,7 +84,7 @@ def xml_write_simple_tag(f, name, val, indent=1):

 def merge_json():
    lst = []
-    output_dir = "./"
+    output_dir = f".{PATH_SEPARATOR}"
    os.chdir(output_dir)
    doujinshi_dirs = next(os.walk('.'))[1]
    for folder in doujinshi_dirs:
--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@ -14,10 +14,12 @@ from typing import Tuple
 from requests.structures import CaseInsensitiveDict

 from nhentai import constant
+from nhentai.constant import PATH_SEPARATOR
 from nhentai.logger import logger
 from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database

 MAX_FIELD_LENGTH = 100
+EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')


 def request(method, url, **kwargs):
@ -99,9 +101,9 @@ def parse_doujinshi_obj(
        file_type: str = ''
 ) -> Tuple[str, str]:

-    filename = f'./doujinshi.{file_type}'
-    doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
+    filename = f'.{PATH_SEPARATOR}doujinshi.{file_type}'
    if doujinshi_obj is not None:
+        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
        _filename = f'{doujinshi_obj.filename}.{file_type}'

        if file_type == 'cbz':
@ -111,6 +113,8 @@ def parse_doujinshi_obj(
            _filename = _filename.replace('/', '-')

        filename = os.path.join(output_dir, _filename)
+    else:
+        doujinshi_dir = f'.{PATH_SEPARATOR}'

    return doujinshi_dir, filename

@ -130,7 +134,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
    file_list.sort()

    for image in file_list:
-        if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'):
+        if not os.path.splitext(image)[1] in EXTENSIONS:
            continue
        image_html += f'<img src="{image}" class="image-item"/>\n'

@ -175,7 +179,7 @@ def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
    shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))


-def generate_main_html(output_dir='./'):
+def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
    """
    Generate a main html to show all the contains doujinshi.
    With a link to their `index.html`.
@ -256,7 +260,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
            import img2pdf

            """Write images to a PDF file using img2pdf."""
-            file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))]
+            file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(EXTENSIONS)]
            file_list.sort()

            logger.info(f'Writing PDF file to path: {filename}')
@ -308,7 +312,8 @@ def signal_handler(_signal, _frame):
 def paging(page_string):
    # 1,3-5,14 -> [1, 3, 4, 5, 14]
    if not page_string:
-        return []
+        # default, the first page
+        return [1]

    page_list = []
    for i in page_string.split(','):
@ -333,7 +338,7 @@ def generate_metadata_file(output_dir, doujinshi_obj):

    fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
              'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
-              'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
+              'TAGS',  'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
              'SERIES', 'PARODY', 'URL']

    temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "nhentai"
-version = "0.5.17.2"
+version = "0.5.19"
 description = "nhentai doujinshi downloader"
 authors = ["Ricter Z <ricterzheng@gmail.com>"]
 license = "MIT"
Author	SHA1	Message	Date
ricterz	a83c571ec4	0.5.19	2025-01-15 19:47:24 +08:00
Ricter Zheng	e7ff5dab3d	Merge pull request #373 from nicojust/fix-favorite-metadata-output fix favorite_counts output in metadata	2025-01-15 12:26:24 +08:00
Ricter Zheng	a166898b60	fix #374	2025-01-15 12:26:01 +08:00
Nekwo	ce25051fa3	fix: output favorite_counts as an int	2025-01-13 19:51:40 +01:00
Nekwo	41fba6b5ac	fix: add missing favorite_counts in metadata file	2025-01-13 19:51:04 +01:00
ricterz	8944ece4a8	use os.path.sep as path separator	2025-01-11 08:48:43 +08:00
ricterz	6b4c4bdc70	0.5.18	2025-01-11 08:35:40 +08:00
ricterz	d1d0c22af8	fix #349	2025-01-11 08:34:30 +08:00
ricterz	803957ba88	fix #349	2025-01-11 08:33:59 +08:00
ricterz	13b584a820	fix #371 and #324	2025-01-11 08:02:36 +08:00
ricterz	be08fcf4cb	fix #368	2025-01-11 07:54:28 +08:00
ricterz	b585225308	fix #370	2025-01-11 07:52:51 +08:00
ricterz	54af682848	fix #369	2025-01-11 07:50:41 +08:00