0.6.0-beta #394

fix #395
0.5.25, fix #393
2025-07-01 16:09:28 +02:00 · 2025-02-28 00:17:05 +08:00 · 2025-02-27 22:07:40 +08:00 · 2025-02-26 00:13:41 +08:00 · 2025-02-26 00:12:49 +08:00 · 2025-02-25 23:51:13 +08:00
9 changed files with 75 additions and 66 deletions
--- a/README.rst
+++ b/README.rst
@ -22,7 +22,7 @@ From Github:
    git clone https://github.com/RicterZ/nhentai
    cd nhentai
-    python setup.py install
+    pip install --no-cache-dir .
 Build Docker container:
--- a/nhentai/init.py
+++ b/nhentai/init.py
@ -1,3 +1,3 @@
-__version__ = '0.5.23'
+__version__ = '0.6.0-beta'
 __author__ = 'RicterZ'
 __email__ = 'ricterzheng@gmail.com'
--- a/nhentai/cmdline.py
+++ b/nhentai/cmdline.py
@ -65,6 +65,8 @@ def cmd_parser():
    # operation options
    parser.add_option('--download', '-D', dest='is_download', action='store_true',
                      help='download doujinshi (for search results)')
    parser.add_option('--no-download', dest='no_download', action='store_true', default=False,
                      help='download doujinshi (for search results)')
    parser.add_option('--show', '-S', dest='is_show', action='store_true',
                      help='just show the doujinshi information')
@ -107,7 +109,6 @@ def cmd_parser():
                      help='read gallery IDs from file.')
    parser.add_option('--format', type='string', dest='name_format', action='store',
                      help='format the saved folder name', default='[%i][%a][%t]')
    parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
    parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding',
                      default=False, help='no padding in the images filename, such as \'001.jpg\'')
@ -123,16 +124,19 @@ def cmd_parser():
                      help='generate Comic Book CBZ File')
    parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true',
                      help='generate PDF file')
    parser.add_option('--meta', dest='generate_metadata', action='store_true', default=False,
                      help='generate a metadata file in doujinshi format')
    parser.add_option('--update-meta', dest='update_metadata', action='store_true', default=False,
                      help='update the metadata file of a doujinshi, update CBZ metadata if exists')
    parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
                      help='remove downloaded doujinshi dir when generated CBZ or PDF file')
    parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
                      help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
-    parser.add_option('--meta', dest='generate_metadata', action='store_true',
+
                      help='generate a metadata file in doujinshi format')
    parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
                      help='regenerate the cbz or pdf file if exists')
    parser.add_option('--no-metadata', dest='no_metadata', action='store_true', default=False,
                      help='don\'t generate metadata json file in doujinshi output path')
    # nhentai options
    parser.add_option('--cookie', type='str', dest='cookie', action='store',
@ -188,7 +192,6 @@ def cmd_parser():
    if any([args.cookie, args.useragent, args.language]):
        sys.exit(0)
    # -- end set config
    if args.proxy is not None:
        proxy_url = urlparse(args.proxy)
@ -242,8 +245,4 @@ def cmd_parser():
        logger.critical('Maximum number of used threads is 15')
        sys.exit(1)
    if args.dryrun and (args.is_cbz or args.is_pdf):
        logger.critical('Cannot generate PDF or CBZ during dry-run')
        sys.exit(1)
    return args
--- a/nhentai/command.py
+++ b/nhentai/command.py
@ -13,7 +13,7 @@ from nhentai.doujinshi import Doujinshi
 from nhentai.downloader import Downloader
 from nhentai.logger import logger
 from nhentai.constant import BASE_URL
-from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
+from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \
    paging, check_cookie, signal_handler, DB, move_to_folder
@ -97,17 +97,15 @@ def main():
            else:
                continue
-            if not options.dryrun:
+            doujinshi.downloader = downloader
                doujinshi.downloader = downloader
-                if doujinshi.check_if_need_download(options):
+            if doujinshi.check_if_need_download(options):
-                    doujinshi.download()
+                doujinshi.download()
-                else:
+            else:
-                    logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
+                logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
                    continue
            if options.generate_metadata:
-                generate_metadata_file(options.output_dir, doujinshi)
+                generate_metadata(options.output_dir, doujinshi)
            if options.is_save_download_history:
                with DB() as db:
@ -116,9 +114,6 @@ def main():
            if not options.is_nohtml:
                generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
            if not options.no_metadata:
                generate_doc('json', options.output_dir, doujinshi, options.regenerate)
            if options.is_cbz:
                generate_doc('cbz', options.output_dir, doujinshi, options.regenerate)
--- a/nhentai/doujinshi.py
+++ b/nhentai/doujinshi.py
@ -77,6 +77,9 @@ class Doujinshi(object):
        logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
    def check_if_need_download(self, options):
        if options.no_download:
            return False
        base_path = os.path.join(self.downloader.path, self.filename)
        # regenerate, re-download
--- a/nhentai/parser.py
+++ b/nhentai/parser.py
@ -165,7 +165,9 @@ def doujinshi_parser(id_, counter=0):
    doujinshi_cover = html.find('div', attrs={'id': 'cover'})
    # img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
    #                   doujinshi_cover.a.img.attrs['data-src'])
-    img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
+
    # fix cover.webp.webp
    img_id = re.search(r'/galleries/(\d+)/cover(\.webp|\.jpg|\.png)?\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
    ext = []
    for i in html.find_all('div', attrs={'class': 'thumb-container'}):
--- a/nhentai/serializer.py
+++ b/nhentai/serializer.py
@ -4,6 +4,7 @@ import os
 from nhentai.constant import PATH_SEPARATOR, LANGUAGE_ISO
 from xml.sax.saxutils import escape
 from requests.structures import CaseInsensitiveDict
 def serialize_json(doujinshi, output_dir: str):
@ -77,6 +78,26 @@ def serialize_comic_xml(doujinshi, output_dir):
        f.write('</ComicInfo>')
 def serialize_info_txt(doujinshi, output_dir: str):
    info_txt_path = os.path.join(output_dir, 'info.txt')
    f = open(info_txt_path, 'w', encoding='utf-8')
    fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
              'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
              'TAGS',  'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
              'SERIES', 'PARODY', 'URL']
    temp_dict = CaseInsensitiveDict(dict(doujinshi.table))
    for i in fields:
        v = temp_dict.get(i)
        v = temp_dict.get(f'{i}s') if v is None else v
        v = doujinshi.info.get(i.lower(), None) if v is None else v
        v = doujinshi.info.get(f'{i.lower()}s', "Unknown") if v is None else v
        f.write(f'{i}: {v}\n')
    f.close()
 def xml_write_simple_tag(f, name, val, indent=1):
    f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n')
@ -131,3 +152,4 @@ def set_js_database():
        indexed_json = json.dumps(indexed_json, separators=(',', ':'))
        f.write('var data = ' + indexed_json)
        f.write(';\nvar tags = ' + unique_json)
--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@ -11,12 +11,11 @@ import requests
 import sqlite3
 import urllib.parse
 from typing import Tuple
 from requests.structures import CaseInsensitiveDict
 from nhentai import constant
 from nhentai.constant import PATH_SEPARATOR
 from nhentai.logger import logger
-from nhentai.serializer import serialize_comic_xml, serialize_json, set_js_database
+from nhentai.serializer import serialize_comic_xml, serialize_json, serialize_info_txt, set_js_database
 MAX_FIELD_LENGTH = 100
 EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
@ -105,9 +104,6 @@ def parse_doujinshi_obj(
        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
        _filename = f'{doujinshi_obj.filename}.{file_type}'
        if file_type == 'cbz':
            serialize_comic_xml(doujinshi_obj, doujinshi_dir)
        if file_type == 'pdf':
            _filename = _filename.replace('/', '-')
@ -115,6 +111,9 @@ def parse_doujinshi_obj(
    else:
        doujinshi_dir = f'.{PATH_SEPARATOR}'
    if not os.path.exists(doujinshi_dir):
        os.makedirs(doujinshi_dir)
    return doujinshi_dir, filename
@ -235,8 +234,20 @@ def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
        logger.warning(f'Writing Main Viewer failed ({e})')
-def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
+def generate_cbz(doujinshi_dir, filename):
    file_list = os.listdir(doujinshi_dir)
    file_list.sort()
    logger.info(f'Writing CBZ file to path: {filename}')
    with zipfile.ZipFile(filename, 'w') as cbz_pf:
        for image in file_list:
            image_path = os.path.join(doujinshi_dir, image)
            cbz_pf.write(image_path, image)
    logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
 def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
    if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
@ -244,16 +255,9 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
        return
    if file_type == 'cbz':
-        file_list = os.listdir(doujinshi_dir)
+        serialize_comic_xml(doujinshi_obj, doujinshi_dir)
-        file_list.sort()
+        generate_cbz(doujinshi_dir, filename)
        logger.info(f'Writing CBZ file to path: {filename}')
        with zipfile.ZipFile(filename, 'w') as cbz_pf:
            for image in file_list:
                image_path = os.path.join(doujinshi_dir, image)
                cbz_pf.write(image_path, image)
        logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
    elif file_type == 'pdf':
        try:
            import img2pdf
@ -273,9 +277,16 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
        except ImportError:
            logger.error("Please install img2pdf package by using pip.")
    else:
        raise ValueError('invalid file type')
-    elif file_type == 'json':
+
-        serialize_json(doujinshi_obj, doujinshi_dir)
+def generate_metadata(output_dir, doujinshi_obj=None):
    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '')
    serialize_json(doujinshi_obj, doujinshi_dir)
    serialize_comic_xml(doujinshi_obj, doujinshi_dir)
    serialize_info_txt(doujinshi_obj, doujinshi_dir)
    logger.log(16, f'Metadata files have been written to "{doujinshi_dir}"')
 def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
@ -332,29 +343,6 @@ def paging(page_string):
    return page_list
 def generate_metadata_file(output_dir, doujinshi_obj):
    info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
    f = open(info_txt_path, 'w', encoding='utf-8')
    fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
              'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
              'TAGS',  'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
              'SERIES', 'PARODY', 'URL']
    temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
    for i in fields:
        v = temp_dict.get(i)
        v = temp_dict.get(f'{i}s') if v is None else v
        v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
        v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
        f.write(f'{i}: {v}\n')
    f.close()
    logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
 class DB(object):
    conn = None
    cur = None
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "nhentai"
-version = "0.5.23"
+version = "0.6.0-beta"
 description = "nhentai doujinshi downloader"
 authors = ["Ricter Z <ricterzheng@gmail.com>"]
 license = "MIT"
Author	SHA1	Message	Date
ricterz	0c9b92ce10	0.6.0-beta #394	2025-02-28 00:17:05 +08:00
ricterz	ca71a72747	fix #395	2025-02-27 22:07:40 +08:00
ricterz	1b7f19ee18	0.5.25, fix #393	2025-02-26 00:13:41 +08:00
ricterz	132f4c83da	Merge branch 'master' of github.com:RicterZ/nhentai	2025-02-26 00:12:49 +08:00
ricterz	6789b2b363	fix bug of cover.webp.webp	2025-02-25 23:51:13 +08:00
Ricter Zheng	a6ac725ca7	Merge pull request #392 from akakishi/master Update installation instructions in README.rst	2025-02-23 20:29:15 +08:00
akakishi	b32962bca4	Update README.rst File `setup.py` was removed in a previous commit; updated README to reflect the new installation process.	2025-02-23 01:18:54 -03:00
ricterz	8a7be0e33d	0.5.24	2025-02-09 20:16:44 +08:00