0.6.0-beta #394

fix #395
0.5.25, fix #393
2025-07-01 07:59:29 +02:00 · 2025-02-28 00:17:05 +08:00 · 2025-02-27 22:07:40 +08:00 · 2025-02-26 00:13:41 +08:00 · 2025-02-26 00:12:49 +08:00 · 2025-02-25 23:51:13 +08:00
12 changed files with 119 additions and 90 deletions
--- a/README.rst
+++ b/README.rst
@ -22,7 +22,7 @@ From Github:

    git clone https://github.com/RicterZ/nhentai
    cd nhentai
-    python setup.py install
+    pip install --no-cache-dir .

 Build Docker container:

--- a/nhentai/init.py
+++ b/nhentai/init.py
@ -1,3 +1,3 @@
-__version__ = '0.5.22'
+__version__ = '0.6.0-beta'
 __author__ = 'RicterZ'
 __email__ = 'ricterzheng@gmail.com'
--- a/nhentai/cmdline.py
+++ b/nhentai/cmdline.py
@ -65,6 +65,8 @@ def cmd_parser():
    # operation options
    parser.add_option('--download', '-D', dest='is_download', action='store_true',
                      help='download doujinshi (for search results)')
+    parser.add_option('--no-download', dest='no_download', action='store_true', default=False,
+                      help='download doujinshi (for search results)')
    parser.add_option('--show', '-S', dest='is_show', action='store_true',
                      help='just show the doujinshi information')

@ -107,7 +109,6 @@ def cmd_parser():
                      help='read gallery IDs from file.')
    parser.add_option('--format', type='string', dest='name_format', action='store',
                      help='format the saved folder name', default='[%i][%a][%t]')
-    parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')

    parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding',
                      default=False, help='no padding in the images filename, such as \'001.jpg\'')
@ -123,16 +124,19 @@ def cmd_parser():
                      help='generate Comic Book CBZ File')
    parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true',
                      help='generate PDF file')
+
+    parser.add_option('--meta', dest='generate_metadata', action='store_true', default=False,
+                      help='generate a metadata file in doujinshi format')
+    parser.add_option('--update-meta', dest='update_metadata', action='store_true', default=False,
+                      help='update the metadata file of a doujinshi, update CBZ metadata if exists')
+
    parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
                      help='remove downloaded doujinshi dir when generated CBZ or PDF file')
    parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
                      help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
-    parser.add_option('--meta', dest='generate_metadata', action='store_true',
-                      help='generate a metadata file in doujinshi format')
+
    parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
                      help='regenerate the cbz or pdf file if exists')
-    parser.add_option('--no-metadata', dest='no_metadata', action='store_true', default=False,
-                      help='don\'t generate metadata json file in doujinshi output path')

    # nhentai options
    parser.add_option('--cookie', type='str', dest='cookie', action='store',
@ -171,22 +175,24 @@ def cmd_parser():

    # --- set config ---
    if args.cookie is not None:
-        constant.CONFIG['cookie'] = args.cookie
+        constant.CONFIG['cookie'] = args.cookie.strip()
        write_config()
        logger.info('Cookie saved.')
-        sys.exit(0)
-    elif args.useragent is not None:
-        constant.CONFIG['useragent'] = args.useragent
+
+    if args.useragent is not None:
+        constant.CONFIG['useragent'] = args.useragent.strip()
        write_config()
        logger.info('User-Agent saved.')
-        sys.exit(0)
-    elif args.language is not None:
+
+    if args.language is not None:
        constant.CONFIG['language'] = args.language
        write_config()
        logger.info(f'Default language now set to "{args.language}"')
-        sys.exit(0)
        # TODO: search without language

+    if any([args.cookie, args.useragent, args.language]):
+        sys.exit(0)
+
    if args.proxy is not None:
        proxy_url = urlparse(args.proxy)
        if not args.proxy == '' and proxy_url.scheme not in ('http', 'https', 'socks5', 'socks5h',
@ -239,8 +245,4 @@ def cmd_parser():
        logger.critical('Maximum number of used threads is 15')
        sys.exit(1)

-    if args.dryrun and (args.is_cbz or args.is_pdf):
-        logger.critical('Cannot generate PDF or CBZ during dry-run')
-        sys.exit(1)
-
    return args
--- a/nhentai/command.py
+++ b/nhentai/command.py
@ -4,8 +4,6 @@ import shutil
 import sys
 import signal
 import platform
-import urllib
-
 import urllib3.exceptions

 from nhentai import constant
@ -15,8 +13,7 @@ from nhentai.doujinshi import Doujinshi
 from nhentai.downloader import Downloader
 from nhentai.logger import logger
 from nhentai.constant import BASE_URL
-from nhentai.serializer import serialize_json
-from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
+from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \
    paging, check_cookie, signal_handler, DB, move_to_folder


@ -52,6 +49,9 @@ def main():

    page_list = paging(options.page)

+    if options.retry:
+        constant.RETRY_TIMES = int(options.retry)
+
    if options.favorites:
        if not options.is_download:
            logger.warning('You do not specify --download option')
@ -87,7 +87,7 @@ def main():
    if not options.is_show:
        downloader = Downloader(path=options.output_dir, threads=options.threads,
                                timeout=options.timeout, delay=options.delay,
-                                retry=options.retry, exit_on_fail=options.exit_on_fail,
+                                exit_on_fail=options.exit_on_fail,
                                no_filename_padding=options.no_filename_padding)

        for doujinshi_id in doujinshi_ids:
@ -97,17 +97,15 @@ def main():
            else:
                continue

-            if not options.dryrun:
-                doujinshi.downloader = downloader
+            doujinshi.downloader = downloader

-                if doujinshi.check_if_need_download(options):
-                    doujinshi.download()
-                else:
-                    logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
-                    continue
+            if doujinshi.check_if_need_download(options):
+                doujinshi.download()
+            else:
+                logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')

            if options.generate_metadata:
-                generate_metadata_file(options.output_dir, doujinshi)
+                generate_metadata(options.output_dir, doujinshi)

            if options.is_save_download_history:
                with DB() as db:
@ -116,9 +114,6 @@ def main():
            if not options.is_nohtml:
                generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])

-            if not options.no_metadata:
-                generate_doc('json', options.output_dir, doujinshi, options.regenerate)
-
            if options.is_cbz:
                generate_doc('cbz', options.output_dir, doujinshi, options.regenerate)

--- a/nhentai/constant.py
+++ b/nhentai/constant.py
@ -37,6 +37,8 @@ FAV_URL = f'{BASE_URL}/favorites/'

 PATH_SEPARATOR = os.path.sep

+RETRY_TIMES = 3
+

 IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
 IMAGE_URL_MIRRORS = [
--- a/nhentai/doujinshi.py
+++ b/nhentai/doujinshi.py
@ -77,6 +77,9 @@ class Doujinshi(object):
        logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')

    def check_if_need_download(self, options):
+        if options.no_download:
+            return False
+
        base_path = os.path.join(self.downloader.path, self.filename)

        # regenerate, re-download
--- a/nhentai/downloader.py
+++ b/nhentai/downloader.py
@ -34,13 +34,12 @@ def download_callback(result):


 class Downloader(Singleton):
-    def __init__(self, path='', threads=5, timeout=30, delay=0, retry=3, exit_on_fail=False,
+    def __init__(self, path='', threads=5, timeout=30, delay=0, exit_on_fail=False,
                 no_filename_padding=False):
        self.threads = threads
        self.path = str(path)
        self.timeout = timeout
        self.delay = delay
-        self.retry = retry
        self.exit_on_fail = exit_on_fail
        self.folder = None
        self.semaphore = None
@ -101,7 +100,7 @@ class Downloader(Singleton):
                return -1, url

        except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
-            if retried < self.retry:
+            if retried < constant.RETRY_TIMES:
                logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
                return await self.download(
                    url=url,
@ -111,7 +110,7 @@ class Downloader(Singleton):
                    proxy=proxy,
                )
            else:
-                logger.warning(f'Download {filename} failed with {self.retry} times retried, skipped')
+                logger.warning(f'Download {filename} failed with {constant.RETRY_TIMES} times retried, skipped')
                return -2, url

        except NHentaiImageNotExistException as e:
--- a/nhentai/parser.py
+++ b/nhentai/parser.py
@ -92,13 +92,27 @@ def favorites_parser(page=None):
        page_range_list = range(1, pages + 1)

    for page in page_range_list:
-        try:
-            logger.info(f'Getting doujinshi ids of page {page}')
-            resp = request('get', f'{constant.FAV_URL}?page={page}').content
+        logger.info(f'Getting doujinshi ids of page {page}')

-            result.extend(_get_title_and_id(resp))
-        except Exception as e:
-            logger.error(f'Error: {e}, continue')
+        i = 0
+        while i <= constant.RETRY_TIMES + 1:
+            i += 1
+            if i > 3:
+                logger.error(f'Failed to get favorites at page {page} after 3 times retried, skipped')
+                break
+
+            try:
+                resp = request('get', f'{constant.FAV_URL}?page={page}').content
+                temp_result = _get_title_and_id(resp)
+                if not temp_result:
+                    logger.warning(f'Failed to get favorites at page {page}, retrying ({i} times) ...')
+                    continue
+                else:
+                    result.extend(temp_result)
+                    break
+
+            except Exception as e:
+                logger.warning(f'Error: {e}, retrying ({i} times) ...')

    return result

@ -151,7 +165,9 @@ def doujinshi_parser(id_, counter=0):
    doujinshi_cover = html.find('div', attrs={'id': 'cover'})
    # img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
    #                   doujinshi_cover.a.img.attrs['data-src'])
-    img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
+
+    # fix cover.webp.webp
+    img_id = re.search(r'/galleries/(\d+)/cover(\.webp|\.jpg|\.png)?\.\w+$', doujinshi_cover.a.img.attrs['data-src'])

    ext = []
    for i in html.find_all('div', attrs={'class': 'thumb-container'}):
@ -261,7 +277,7 @@ def search_parser(keyword, sorting, page, is_page_all=False):
        i = 0

        logger.info(f'Searching doujinshis using keywords "{keyword}" on page {p}{total}')
-        while i < 3:
+        while i < constant.RETRY_TIMES:
            try:
                url = request('get', url=constant.SEARCH_URL, params={'query': keyword,
                                                                      'page': p, 'sort': sorting}).url
--- a/nhentai/serializer.py
+++ b/nhentai/serializer.py
@ -4,6 +4,7 @@ import os

 from nhentai.constant import PATH_SEPARATOR, LANGUAGE_ISO
 from xml.sax.saxutils import escape
+from requests.structures import CaseInsensitiveDict


 def serialize_json(doujinshi, output_dir: str):
@ -77,6 +78,26 @@ def serialize_comic_xml(doujinshi, output_dir):
        f.write('</ComicInfo>')


+def serialize_info_txt(doujinshi, output_dir: str):
+    info_txt_path = os.path.join(output_dir, 'info.txt')
+    f = open(info_txt_path, 'w', encoding='utf-8')
+
+    fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
+              'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
+              'TAGS',  'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
+              'SERIES', 'PARODY', 'URL']
+
+    temp_dict = CaseInsensitiveDict(dict(doujinshi.table))
+    for i in fields:
+        v = temp_dict.get(i)
+        v = temp_dict.get(f'{i}s') if v is None else v
+        v = doujinshi.info.get(i.lower(), None) if v is None else v
+        v = doujinshi.info.get(f'{i.lower()}s', "Unknown") if v is None else v
+        f.write(f'{i}: {v}\n')
+
+    f.close()
+
+
 def xml_write_simple_tag(f, name, val, indent=1):
    f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n')

@ -131,3 +152,4 @@ def set_js_database():
        indexed_json = json.dumps(indexed_json, separators=(',', ':'))
        f.write('var data = ' + indexed_json)
        f.write(';\nvar tags = ' + unique_json)
+
--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@ -11,12 +11,11 @@ import requests
 import sqlite3
 import urllib.parse
 from typing import Tuple
-from requests.structures import CaseInsensitiveDict

 from nhentai import constant
 from nhentai.constant import PATH_SEPARATOR
 from nhentai.logger import logger
-from nhentai.serializer import serialize_comic_xml, serialize_json, set_js_database
+from nhentai.serializer import serialize_comic_xml, serialize_json, serialize_info_txt, set_js_database

 MAX_FIELD_LENGTH = 100
 EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
@ -105,9 +104,6 @@ def parse_doujinshi_obj(
        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
        _filename = f'{doujinshi_obj.filename}.{file_type}'

-        if file_type == 'cbz':
-            serialize_comic_xml(doujinshi_obj, doujinshi_dir)
-
        if file_type == 'pdf':
            _filename = _filename.replace('/', '-')

@ -115,6 +111,9 @@ def parse_doujinshi_obj(
    else:
        doujinshi_dir = f'.{PATH_SEPARATOR}'

+    if not os.path.exists(doujinshi_dir):
+        os.makedirs(doujinshi_dir)
+
    return doujinshi_dir, filename


@ -235,8 +234,20 @@ def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
        logger.warning(f'Writing Main Viewer failed ({e})')


-def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
+def generate_cbz(doujinshi_dir, filename):
+    file_list = os.listdir(doujinshi_dir)
+    file_list.sort()

+    logger.info(f'Writing CBZ file to path: {filename}')
+    with zipfile.ZipFile(filename, 'w') as cbz_pf:
+        for image in file_list:
+            image_path = os.path.join(doujinshi_dir, image)
+            cbz_pf.write(image_path, image)
+
+    logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
+
+
+def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)

    if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
@ -244,16 +255,9 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
        return

    if file_type == 'cbz':
-        file_list = os.listdir(doujinshi_dir)
-        file_list.sort()
+        serialize_comic_xml(doujinshi_obj, doujinshi_dir)
+        generate_cbz(doujinshi_dir, filename)

-        logger.info(f'Writing CBZ file to path: {filename}')
-        with zipfile.ZipFile(filename, 'w') as cbz_pf:
-            for image in file_list:
-                image_path = os.path.join(doujinshi_dir, image)
-                cbz_pf.write(image_path, image)
-
-        logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
    elif file_type == 'pdf':
        try:
            import img2pdf
@ -273,9 +277,16 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa

        except ImportError:
            logger.error("Please install img2pdf package by using pip.")
+    else:
+        raise ValueError('invalid file type')

-    elif file_type == 'json':
-        serialize_json(doujinshi_obj, doujinshi_dir)
+
+def generate_metadata(output_dir, doujinshi_obj=None):
+    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '')
+    serialize_json(doujinshi_obj, doujinshi_dir)
+    serialize_comic_xml(doujinshi_obj, doujinshi_dir)
+    serialize_info_txt(doujinshi_obj, doujinshi_dir)
+    logger.log(16, f'Metadata files have been written to "{doujinshi_dir}"')


 def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
@ -332,29 +343,6 @@ def paging(page_string):
    return page_list


-def generate_metadata_file(output_dir, doujinshi_obj):
-
-    info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
-
-    f = open(info_txt_path, 'w', encoding='utf-8')
-
-    fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
-              'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
-              'TAGS',  'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
-              'SERIES', 'PARODY', 'URL']
-
-    temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
-    for i in fields:
-        v = temp_dict.get(i)
-        v = temp_dict.get(f'{i}s') if v is None else v
-        v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
-        v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
-        f.write(f'{i}: {v}\n')
-
-    f.close()
-    logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
-
-
 class DB(object):
    conn = None
    cur = None
--- a/nhentai/viewer/default/scripts.js
+++ b/nhentai/viewer/default/scripts.js
@ -49,8 +49,8 @@ document.onkeypress = event => {
    switch (event.key.toLowerCase()) {
        // Previous Image
        case 'w':
-	   scrollBy(0, -40);
-	   break;
+            scrollBy(0, -40);
+            break;
        case 'a':
            changePage(currentPage - 1);
            break;
@ -61,7 +61,7 @@ document.onkeypress = event => {
        // Next Image
        case ' ':
        case 's':
-	    scrollBy(0, 40);
+            scrollBy(0, 40);
            break;
        case 'd':
            changePage(currentPage + 1);
@ -75,11 +75,13 @@ document.onkeydown = event =>{
            changePage(currentPage - 1);
            break;
        case 38: //up
+            changePage(currentPage - 1);
            break;
        case 39: //right
            changePage(currentPage + 1);
            break;
        case 40: //down
+            changePage(currentPage + 1);
            break;
    }
 };
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "nhentai"
-version = "0.5.22"
+version = "0.6.0-beta"
 description = "nhentai doujinshi downloader"
 authors = ["Ricter Z <ricterzheng@gmail.com>"]
 license = "MIT"
Author	SHA1	Message	Date
ricterz	0c9b92ce10	0.6.0-beta #394	2025-02-28 00:17:05 +08:00
ricterz	ca71a72747	fix #395	2025-02-27 22:07:40 +08:00
ricterz	1b7f19ee18	0.5.25, fix #393	2025-02-26 00:13:41 +08:00
ricterz	132f4c83da	Merge branch 'master' of github.com:RicterZ/nhentai	2025-02-26 00:12:49 +08:00
ricterz	6789b2b363	fix bug of cover.webp.webp	2025-02-25 23:51:13 +08:00
Ricter Zheng	a6ac725ca7	Merge pull request #392 from akakishi/master Update installation instructions in README.rst	2025-02-23 20:29:15 +08:00
akakishi	b32962bca4	Update README.rst File `setup.py` was removed in a previous commit; updated README to reflect the new installation process.	2025-02-23 01:18:54 -03:00
ricterz	8a7be0e33d	0.5.24	2025-02-09 20:16:44 +08:00
ricterz	0a47527461	optimize logger output #390	2025-02-09 20:15:17 +08:00
ricterz	023c8969eb	add global retry for search, download, fetch favorites	2025-02-09 20:02:52 +08:00
ricterz	29c3abbe5c	Merge branch 'master' of github.com:RicterZ/nhentai	2025-02-08 16:21:08 +08:00
ricterzheng	057fae8a83	0.5.23	2025-02-03 15:47:51 +08:00
ricterz	ba59dcf4db	add up/down arrow	2025-01-16 22:40:53 +08:00