Compare commits

..

8 Commits

Author SHA1 Message Date
0c9b92ce10 0.6.0-beta #394 2025-02-28 00:17:05 +08:00
ca71a72747 fix #395 2025-02-27 22:07:40 +08:00
1b7f19ee18 0.5.25, fix #393 2025-02-26 00:13:41 +08:00
132f4c83da Merge branch 'master' of github.com:RicterZ/nhentai 2025-02-26 00:12:49 +08:00
6789b2b363 fix bug of cover.webp.webp 2025-02-25 23:51:13 +08:00
a6ac725ca7 Merge pull request #392 from akakishi/master
Update installation instructions in README.rst
2025-02-23 20:29:15 +08:00
b32962bca4 Update README.rst
File `setup.py` was removed in a previous commit; updated README to reflect the new installation process.
2025-02-23 01:18:54 -03:00
8a7be0e33d 0.5.24 2025-02-09 20:16:44 +08:00
9 changed files with 75 additions and 66 deletions

View File

@ -22,7 +22,7 @@ From Github:
git clone https://github.com/RicterZ/nhentai git clone https://github.com/RicterZ/nhentai
cd nhentai cd nhentai
python setup.py install pip install --no-cache-dir .
Build Docker container: Build Docker container:

View File

@ -1,3 +1,3 @@
__version__ = '0.5.23' __version__ = '0.6.0-beta'
__author__ = 'RicterZ' __author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com' __email__ = 'ricterzheng@gmail.com'

View File

@ -65,6 +65,8 @@ def cmd_parser():
# operation options # operation options
parser.add_option('--download', '-D', dest='is_download', action='store_true', parser.add_option('--download', '-D', dest='is_download', action='store_true',
help='download doujinshi (for search results)') help='download doujinshi (for search results)')
parser.add_option('--no-download', dest='no_download', action='store_true', default=False,
help='download doujinshi (for search results)')
parser.add_option('--show', '-S', dest='is_show', action='store_true', parser.add_option('--show', '-S', dest='is_show', action='store_true',
help='just show the doujinshi information') help='just show the doujinshi information')
@ -107,7 +109,6 @@ def cmd_parser():
help='read gallery IDs from file.') help='read gallery IDs from file.')
parser.add_option('--format', type='string', dest='name_format', action='store', parser.add_option('--format', type='string', dest='name_format', action='store',
help='format the saved folder name', default='[%i][%a][%t]') help='format the saved folder name', default='[%i][%a][%t]')
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding', parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding',
default=False, help='no padding in the images filename, such as \'001.jpg\'') default=False, help='no padding in the images filename, such as \'001.jpg\'')
@ -123,16 +124,19 @@ def cmd_parser():
help='generate Comic Book CBZ File') help='generate Comic Book CBZ File')
parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true', parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true',
help='generate PDF file') help='generate PDF file')
parser.add_option('--meta', dest='generate_metadata', action='store_true', default=False,
help='generate a metadata file in doujinshi format')
parser.add_option('--update-meta', dest='update_metadata', action='store_true', default=False,
help='update the metadata file of a doujinshi, update CBZ metadata if exists')
parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
help='remove downloaded doujinshi dir when generated CBZ or PDF file') help='remove downloaded doujinshi dir when generated CBZ or PDF file')
parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False, parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file') help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
parser.add_option('--meta', dest='generate_metadata', action='store_true',
help='generate a metadata file in doujinshi format')
parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False, parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
help='regenerate the cbz or pdf file if exists') help='regenerate the cbz or pdf file if exists')
parser.add_option('--no-metadata', dest='no_metadata', action='store_true', default=False,
help='don\'t generate metadata json file in doujinshi output path')
# nhentai options # nhentai options
parser.add_option('--cookie', type='str', dest='cookie', action='store', parser.add_option('--cookie', type='str', dest='cookie', action='store',
@ -188,7 +192,6 @@ def cmd_parser():
if any([args.cookie, args.useragent, args.language]): if any([args.cookie, args.useragent, args.language]):
sys.exit(0) sys.exit(0)
# -- end set config
if args.proxy is not None: if args.proxy is not None:
proxy_url = urlparse(args.proxy) proxy_url = urlparse(args.proxy)
@ -242,8 +245,4 @@ def cmd_parser():
logger.critical('Maximum number of used threads is 15') logger.critical('Maximum number of used threads is 15')
sys.exit(1) sys.exit(1)
if args.dryrun and (args.is_cbz or args.is_pdf):
logger.critical('Cannot generate PDF or CBZ during dry-run')
sys.exit(1)
return args return args

View File

@ -13,7 +13,7 @@ from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader from nhentai.downloader import Downloader
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import BASE_URL from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \ from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \
paging, check_cookie, signal_handler, DB, move_to_folder paging, check_cookie, signal_handler, DB, move_to_folder
@ -97,17 +97,15 @@ def main():
else: else:
continue continue
if not options.dryrun: doujinshi.downloader = downloader
doujinshi.downloader = downloader
if doujinshi.check_if_need_download(options): if doujinshi.check_if_need_download(options):
doujinshi.download() doujinshi.download()
else: else:
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}') logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
continue
if options.generate_metadata: if options.generate_metadata:
generate_metadata_file(options.output_dir, doujinshi) generate_metadata(options.output_dir, doujinshi)
if options.is_save_download_history: if options.is_save_download_history:
with DB() as db: with DB() as db:
@ -116,9 +114,6 @@ def main():
if not options.is_nohtml: if not options.is_nohtml:
generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template']) generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
if not options.no_metadata:
generate_doc('json', options.output_dir, doujinshi, options.regenerate)
if options.is_cbz: if options.is_cbz:
generate_doc('cbz', options.output_dir, doujinshi, options.regenerate) generate_doc('cbz', options.output_dir, doujinshi, options.regenerate)

View File

@ -77,6 +77,9 @@ class Doujinshi(object):
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}') logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
def check_if_need_download(self, options): def check_if_need_download(self, options):
if options.no_download:
return False
base_path = os.path.join(self.downloader.path, self.filename) base_path = os.path.join(self.downloader.path, self.filename)
# regenerate, re-download # regenerate, re-download

View File

@ -165,7 +165,9 @@ def doujinshi_parser(id_, counter=0):
doujinshi_cover = html.find('div', attrs={'id': 'cover'}) doujinshi_cover = html.find('div', attrs={'id': 'cover'})
# img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$', # img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
# doujinshi_cover.a.img.attrs['data-src']) # doujinshi_cover.a.img.attrs['data-src'])
img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
# fix cover.webp.webp
img_id = re.search(r'/galleries/(\d+)/cover(\.webp|\.jpg|\.png)?\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
ext = [] ext = []
for i in html.find_all('div', attrs={'class': 'thumb-container'}): for i in html.find_all('div', attrs={'class': 'thumb-container'}):

View File

@ -4,6 +4,7 @@ import os
from nhentai.constant import PATH_SEPARATOR, LANGUAGE_ISO from nhentai.constant import PATH_SEPARATOR, LANGUAGE_ISO
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from requests.structures import CaseInsensitiveDict
def serialize_json(doujinshi, output_dir: str): def serialize_json(doujinshi, output_dir: str):
@ -77,6 +78,26 @@ def serialize_comic_xml(doujinshi, output_dir):
f.write('</ComicInfo>') f.write('</ComicInfo>')
def serialize_info_txt(doujinshi, output_dir: str):
info_txt_path = os.path.join(output_dir, 'info.txt')
f = open(info_txt_path, 'w', encoding='utf-8')
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
temp_dict = CaseInsensitiveDict(dict(doujinshi.table))
for i in fields:
v = temp_dict.get(i)
v = temp_dict.get(f'{i}s') if v is None else v
v = doujinshi.info.get(i.lower(), None) if v is None else v
v = doujinshi.info.get(f'{i.lower()}s', "Unknown") if v is None else v
f.write(f'{i}: {v}\n')
f.close()
def xml_write_simple_tag(f, name, val, indent=1): def xml_write_simple_tag(f, name, val, indent=1):
f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n') f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n')
@ -131,3 +152,4 @@ def set_js_database():
indexed_json = json.dumps(indexed_json, separators=(',', ':')) indexed_json = json.dumps(indexed_json, separators=(',', ':'))
f.write('var data = ' + indexed_json) f.write('var data = ' + indexed_json)
f.write(';\nvar tags = ' + unique_json) f.write(';\nvar tags = ' + unique_json)

View File

@ -11,12 +11,11 @@ import requests
import sqlite3 import sqlite3
import urllib.parse import urllib.parse
from typing import Tuple from typing import Tuple
from requests.structures import CaseInsensitiveDict
from nhentai import constant from nhentai import constant
from nhentai.constant import PATH_SEPARATOR from nhentai.constant import PATH_SEPARATOR
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.serializer import serialize_comic_xml, serialize_json, set_js_database from nhentai.serializer import serialize_comic_xml, serialize_json, serialize_info_txt, set_js_database
MAX_FIELD_LENGTH = 100 MAX_FIELD_LENGTH = 100
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp') EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
@ -105,9 +104,6 @@ def parse_doujinshi_obj(
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
_filename = f'{doujinshi_obj.filename}.{file_type}' _filename = f'{doujinshi_obj.filename}.{file_type}'
if file_type == 'cbz':
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
if file_type == 'pdf': if file_type == 'pdf':
_filename = _filename.replace('/', '-') _filename = _filename.replace('/', '-')
@ -115,6 +111,9 @@ def parse_doujinshi_obj(
else: else:
doujinshi_dir = f'.{PATH_SEPARATOR}' doujinshi_dir = f'.{PATH_SEPARATOR}'
if not os.path.exists(doujinshi_dir):
os.makedirs(doujinshi_dir)
return doujinshi_dir, filename return doujinshi_dir, filename
@ -235,8 +234,20 @@ def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
logger.warning(f'Writing Main Viewer failed ({e})') logger.warning(f'Writing Main Viewer failed ({e})')
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False): def generate_cbz(doujinshi_dir, filename):
file_list = os.listdir(doujinshi_dir)
file_list.sort()
logger.info(f'Writing CBZ file to path: {filename}')
with zipfile.ZipFile(filename, 'w') as cbz_pf:
for image in file_list:
image_path = os.path.join(doujinshi_dir, image)
cbz_pf.write(image_path, image)
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type) doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate: if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
@ -244,16 +255,9 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
return return
if file_type == 'cbz': if file_type == 'cbz':
file_list = os.listdir(doujinshi_dir) serialize_comic_xml(doujinshi_obj, doujinshi_dir)
file_list.sort() generate_cbz(doujinshi_dir, filename)
logger.info(f'Writing CBZ file to path: {filename}')
with zipfile.ZipFile(filename, 'w') as cbz_pf:
for image in file_list:
image_path = os.path.join(doujinshi_dir, image)
cbz_pf.write(image_path, image)
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
elif file_type == 'pdf': elif file_type == 'pdf':
try: try:
import img2pdf import img2pdf
@ -273,9 +277,16 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
except ImportError: except ImportError:
logger.error("Please install img2pdf package by using pip.") logger.error("Please install img2pdf package by using pip.")
else:
raise ValueError('invalid file type')
elif file_type == 'json':
serialize_json(doujinshi_obj, doujinshi_dir) def generate_metadata(output_dir, doujinshi_obj=None):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '')
serialize_json(doujinshi_obj, doujinshi_dir)
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
serialize_info_txt(doujinshi_obj, doujinshi_dir)
logger.log(16, f'Metadata files have been written to "{doujinshi_dir}"')
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False): def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
@ -332,29 +343,6 @@ def paging(page_string):
return page_list return page_list
def generate_metadata_file(output_dir, doujinshi_obj):
info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
f = open(info_txt_path, 'w', encoding='utf-8')
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
for i in fields:
v = temp_dict.get(i)
v = temp_dict.get(f'{i}s') if v is None else v
v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
f.write(f'{i}: {v}\n')
f.close()
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
class DB(object): class DB(object):
conn = None conn = None
cur = None cur = None

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "nhentai" name = "nhentai"
version = "0.5.23" version = "0.6.0-beta"
description = "nhentai doujinshi downloader" description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"] authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT" license = "MIT"