mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-07-01 07:59:29 +02:00
Compare commits
13 Commits
0.5.23
...
0.6.0-beta
Author | SHA1 | Date | |
---|---|---|---|
0c9b92ce10 | |||
ca71a72747 | |||
1b7f19ee18 | |||
132f4c83da | |||
6789b2b363 | |||
a6ac725ca7 | |||
b32962bca4 | |||
8a7be0e33d | |||
0a47527461 | |||
023c8969eb | |||
29c3abbe5c | |||
057fae8a83 | |||
ba59dcf4db |
@ -22,7 +22,7 @@ From Github:
|
||||
|
||||
git clone https://github.com/RicterZ/nhentai
|
||||
cd nhentai
|
||||
python setup.py install
|
||||
pip install --no-cache-dir .
|
||||
|
||||
Build Docker container:
|
||||
|
||||
|
@ -1,3 +1,3 @@
|
||||
__version__ = '0.5.22'
|
||||
__version__ = '0.6.0-beta'
|
||||
__author__ = 'RicterZ'
|
||||
__email__ = 'ricterzheng@gmail.com'
|
||||
|
@ -65,6 +65,8 @@ def cmd_parser():
|
||||
# operation options
|
||||
parser.add_option('--download', '-D', dest='is_download', action='store_true',
|
||||
help='download doujinshi (for search results)')
|
||||
parser.add_option('--no-download', dest='no_download', action='store_true', default=False,
|
||||
help='download doujinshi (for search results)')
|
||||
parser.add_option('--show', '-S', dest='is_show', action='store_true',
|
||||
help='just show the doujinshi information')
|
||||
|
||||
@ -107,7 +109,6 @@ def cmd_parser():
|
||||
help='read gallery IDs from file.')
|
||||
parser.add_option('--format', type='string', dest='name_format', action='store',
|
||||
help='format the saved folder name', default='[%i][%a][%t]')
|
||||
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
|
||||
|
||||
parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding',
|
||||
default=False, help='no padding in the images filename, such as \'001.jpg\'')
|
||||
@ -123,16 +124,19 @@ def cmd_parser():
|
||||
help='generate Comic Book CBZ File')
|
||||
parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true',
|
||||
help='generate PDF file')
|
||||
|
||||
parser.add_option('--meta', dest='generate_metadata', action='store_true', default=False,
|
||||
help='generate a metadata file in doujinshi format')
|
||||
parser.add_option('--update-meta', dest='update_metadata', action='store_true', default=False,
|
||||
help='update the metadata file of a doujinshi, update CBZ metadata if exists')
|
||||
|
||||
parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
|
||||
help='remove downloaded doujinshi dir when generated CBZ or PDF file')
|
||||
parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
|
||||
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
|
||||
parser.add_option('--meta', dest='generate_metadata', action='store_true',
|
||||
help='generate a metadata file in doujinshi format')
|
||||
|
||||
parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
|
||||
help='regenerate the cbz or pdf file if exists')
|
||||
parser.add_option('--no-metadata', dest='no_metadata', action='store_true', default=False,
|
||||
help='don\'t generate metadata json file in doujinshi output path')
|
||||
|
||||
# nhentai options
|
||||
parser.add_option('--cookie', type='str', dest='cookie', action='store',
|
||||
@ -171,22 +175,24 @@ def cmd_parser():
|
||||
|
||||
# --- set config ---
|
||||
if args.cookie is not None:
|
||||
constant.CONFIG['cookie'] = args.cookie
|
||||
constant.CONFIG['cookie'] = args.cookie.strip()
|
||||
write_config()
|
||||
logger.info('Cookie saved.')
|
||||
sys.exit(0)
|
||||
elif args.useragent is not None:
|
||||
constant.CONFIG['useragent'] = args.useragent
|
||||
|
||||
if args.useragent is not None:
|
||||
constant.CONFIG['useragent'] = args.useragent.strip()
|
||||
write_config()
|
||||
logger.info('User-Agent saved.')
|
||||
sys.exit(0)
|
||||
elif args.language is not None:
|
||||
|
||||
if args.language is not None:
|
||||
constant.CONFIG['language'] = args.language
|
||||
write_config()
|
||||
logger.info(f'Default language now set to "{args.language}"')
|
||||
sys.exit(0)
|
||||
# TODO: search without language
|
||||
|
||||
if any([args.cookie, args.useragent, args.language]):
|
||||
sys.exit(0)
|
||||
|
||||
if args.proxy is not None:
|
||||
proxy_url = urlparse(args.proxy)
|
||||
if not args.proxy == '' and proxy_url.scheme not in ('http', 'https', 'socks5', 'socks5h',
|
||||
@ -239,8 +245,4 @@ def cmd_parser():
|
||||
logger.critical('Maximum number of used threads is 15')
|
||||
sys.exit(1)
|
||||
|
||||
if args.dryrun and (args.is_cbz or args.is_pdf):
|
||||
logger.critical('Cannot generate PDF or CBZ during dry-run')
|
||||
sys.exit(1)
|
||||
|
||||
return args
|
||||
|
@ -4,8 +4,6 @@ import shutil
|
||||
import sys
|
||||
import signal
|
||||
import platform
|
||||
import urllib
|
||||
|
||||
import urllib3.exceptions
|
||||
|
||||
from nhentai import constant
|
||||
@ -15,8 +13,7 @@ from nhentai.doujinshi import Doujinshi
|
||||
from nhentai.downloader import Downloader
|
||||
from nhentai.logger import logger
|
||||
from nhentai.constant import BASE_URL
|
||||
from nhentai.serializer import serialize_json
|
||||
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
|
||||
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \
|
||||
paging, check_cookie, signal_handler, DB, move_to_folder
|
||||
|
||||
|
||||
@ -52,6 +49,9 @@ def main():
|
||||
|
||||
page_list = paging(options.page)
|
||||
|
||||
if options.retry:
|
||||
constant.RETRY_TIMES = int(options.retry)
|
||||
|
||||
if options.favorites:
|
||||
if not options.is_download:
|
||||
logger.warning('You do not specify --download option')
|
||||
@ -87,7 +87,7 @@ def main():
|
||||
if not options.is_show:
|
||||
downloader = Downloader(path=options.output_dir, threads=options.threads,
|
||||
timeout=options.timeout, delay=options.delay,
|
||||
retry=options.retry, exit_on_fail=options.exit_on_fail,
|
||||
exit_on_fail=options.exit_on_fail,
|
||||
no_filename_padding=options.no_filename_padding)
|
||||
|
||||
for doujinshi_id in doujinshi_ids:
|
||||
@ -97,17 +97,15 @@ def main():
|
||||
else:
|
||||
continue
|
||||
|
||||
if not options.dryrun:
|
||||
doujinshi.downloader = downloader
|
||||
doujinshi.downloader = downloader
|
||||
|
||||
if doujinshi.check_if_need_download(options):
|
||||
doujinshi.download()
|
||||
else:
|
||||
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
|
||||
continue
|
||||
if doujinshi.check_if_need_download(options):
|
||||
doujinshi.download()
|
||||
else:
|
||||
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
|
||||
|
||||
if options.generate_metadata:
|
||||
generate_metadata_file(options.output_dir, doujinshi)
|
||||
generate_metadata(options.output_dir, doujinshi)
|
||||
|
||||
if options.is_save_download_history:
|
||||
with DB() as db:
|
||||
@ -116,9 +114,6 @@ def main():
|
||||
if not options.is_nohtml:
|
||||
generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
|
||||
|
||||
if not options.no_metadata:
|
||||
generate_doc('json', options.output_dir, doujinshi, options.regenerate)
|
||||
|
||||
if options.is_cbz:
|
||||
generate_doc('cbz', options.output_dir, doujinshi, options.regenerate)
|
||||
|
||||
|
@ -37,6 +37,8 @@ FAV_URL = f'{BASE_URL}/favorites/'
|
||||
|
||||
PATH_SEPARATOR = os.path.sep
|
||||
|
||||
RETRY_TIMES = 3
|
||||
|
||||
|
||||
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
|
||||
IMAGE_URL_MIRRORS = [
|
||||
|
@ -77,6 +77,9 @@ class Doujinshi(object):
|
||||
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
|
||||
|
||||
def check_if_need_download(self, options):
|
||||
if options.no_download:
|
||||
return False
|
||||
|
||||
base_path = os.path.join(self.downloader.path, self.filename)
|
||||
|
||||
# regenerate, re-download
|
||||
|
@ -34,13 +34,12 @@ def download_callback(result):
|
||||
|
||||
|
||||
class Downloader(Singleton):
|
||||
def __init__(self, path='', threads=5, timeout=30, delay=0, retry=3, exit_on_fail=False,
|
||||
def __init__(self, path='', threads=5, timeout=30, delay=0, exit_on_fail=False,
|
||||
no_filename_padding=False):
|
||||
self.threads = threads
|
||||
self.path = str(path)
|
||||
self.timeout = timeout
|
||||
self.delay = delay
|
||||
self.retry = retry
|
||||
self.exit_on_fail = exit_on_fail
|
||||
self.folder = None
|
||||
self.semaphore = None
|
||||
@ -101,7 +100,7 @@ class Downloader(Singleton):
|
||||
return -1, url
|
||||
|
||||
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
|
||||
if retried < self.retry:
|
||||
if retried < constant.RETRY_TIMES:
|
||||
logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
|
||||
return await self.download(
|
||||
url=url,
|
||||
@ -111,7 +110,7 @@ class Downloader(Singleton):
|
||||
proxy=proxy,
|
||||
)
|
||||
else:
|
||||
logger.warning(f'Download {filename} failed with {self.retry} times retried, skipped')
|
||||
logger.warning(f'Download {filename} failed with {constant.RETRY_TIMES} times retried, skipped')
|
||||
return -2, url
|
||||
|
||||
except NHentaiImageNotExistException as e:
|
||||
|
@ -92,13 +92,27 @@ def favorites_parser(page=None):
|
||||
page_range_list = range(1, pages + 1)
|
||||
|
||||
for page in page_range_list:
|
||||
try:
|
||||
logger.info(f'Getting doujinshi ids of page {page}')
|
||||
resp = request('get', f'{constant.FAV_URL}?page={page}').content
|
||||
logger.info(f'Getting doujinshi ids of page {page}')
|
||||
|
||||
result.extend(_get_title_and_id(resp))
|
||||
except Exception as e:
|
||||
logger.error(f'Error: {e}, continue')
|
||||
i = 0
|
||||
while i <= constant.RETRY_TIMES + 1:
|
||||
i += 1
|
||||
if i > 3:
|
||||
logger.error(f'Failed to get favorites at page {page} after 3 times retried, skipped')
|
||||
break
|
||||
|
||||
try:
|
||||
resp = request('get', f'{constant.FAV_URL}?page={page}').content
|
||||
temp_result = _get_title_and_id(resp)
|
||||
if not temp_result:
|
||||
logger.warning(f'Failed to get favorites at page {page}, retrying ({i} times) ...')
|
||||
continue
|
||||
else:
|
||||
result.extend(temp_result)
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f'Error: {e}, retrying ({i} times) ...')
|
||||
|
||||
return result
|
||||
|
||||
@ -151,7 +165,9 @@ def doujinshi_parser(id_, counter=0):
|
||||
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
|
||||
# img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
|
||||
# doujinshi_cover.a.img.attrs['data-src'])
|
||||
img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
|
||||
|
||||
# fix cover.webp.webp
|
||||
img_id = re.search(r'/galleries/(\d+)/cover(\.webp|\.jpg|\.png)?\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
|
||||
|
||||
ext = []
|
||||
for i in html.find_all('div', attrs={'class': 'thumb-container'}):
|
||||
@ -261,7 +277,7 @@ def search_parser(keyword, sorting, page, is_page_all=False):
|
||||
i = 0
|
||||
|
||||
logger.info(f'Searching doujinshis using keywords "{keyword}" on page {p}{total}')
|
||||
while i < 3:
|
||||
while i < constant.RETRY_TIMES:
|
||||
try:
|
||||
url = request('get', url=constant.SEARCH_URL, params={'query': keyword,
|
||||
'page': p, 'sort': sorting}).url
|
||||
|
@ -4,6 +4,7 @@ import os
|
||||
|
||||
from nhentai.constant import PATH_SEPARATOR, LANGUAGE_ISO
|
||||
from xml.sax.saxutils import escape
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
|
||||
def serialize_json(doujinshi, output_dir: str):
|
||||
@ -77,6 +78,26 @@ def serialize_comic_xml(doujinshi, output_dir):
|
||||
f.write('</ComicInfo>')
|
||||
|
||||
|
||||
def serialize_info_txt(doujinshi, output_dir: str):
|
||||
info_txt_path = os.path.join(output_dir, 'info.txt')
|
||||
f = open(info_txt_path, 'w', encoding='utf-8')
|
||||
|
||||
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
|
||||
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
|
||||
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
||||
'SERIES', 'PARODY', 'URL']
|
||||
|
||||
temp_dict = CaseInsensitiveDict(dict(doujinshi.table))
|
||||
for i in fields:
|
||||
v = temp_dict.get(i)
|
||||
v = temp_dict.get(f'{i}s') if v is None else v
|
||||
v = doujinshi.info.get(i.lower(), None) if v is None else v
|
||||
v = doujinshi.info.get(f'{i.lower()}s', "Unknown") if v is None else v
|
||||
f.write(f'{i}: {v}\n')
|
||||
|
||||
f.close()
|
||||
|
||||
|
||||
def xml_write_simple_tag(f, name, val, indent=1):
|
||||
f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n')
|
||||
|
||||
@ -131,3 +152,4 @@ def set_js_database():
|
||||
indexed_json = json.dumps(indexed_json, separators=(',', ':'))
|
||||
f.write('var data = ' + indexed_json)
|
||||
f.write(';\nvar tags = ' + unique_json)
|
||||
|
||||
|
@ -11,12 +11,11 @@ import requests
|
||||
import sqlite3
|
||||
import urllib.parse
|
||||
from typing import Tuple
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from nhentai import constant
|
||||
from nhentai.constant import PATH_SEPARATOR
|
||||
from nhentai.logger import logger
|
||||
from nhentai.serializer import serialize_comic_xml, serialize_json, set_js_database
|
||||
from nhentai.serializer import serialize_comic_xml, serialize_json, serialize_info_txt, set_js_database
|
||||
|
||||
MAX_FIELD_LENGTH = 100
|
||||
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
|
||||
@ -105,9 +104,6 @@ def parse_doujinshi_obj(
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
_filename = f'{doujinshi_obj.filename}.{file_type}'
|
||||
|
||||
if file_type == 'cbz':
|
||||
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
|
||||
|
||||
if file_type == 'pdf':
|
||||
_filename = _filename.replace('/', '-')
|
||||
|
||||
@ -115,6 +111,9 @@ def parse_doujinshi_obj(
|
||||
else:
|
||||
doujinshi_dir = f'.{PATH_SEPARATOR}'
|
||||
|
||||
if not os.path.exists(doujinshi_dir):
|
||||
os.makedirs(doujinshi_dir)
|
||||
|
||||
return doujinshi_dir, filename
|
||||
|
||||
|
||||
@ -235,8 +234,20 @@ def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
|
||||
logger.warning(f'Writing Main Viewer failed ({e})')
|
||||
|
||||
|
||||
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
|
||||
def generate_cbz(doujinshi_dir, filename):
|
||||
file_list = os.listdir(doujinshi_dir)
|
||||
file_list.sort()
|
||||
|
||||
logger.info(f'Writing CBZ file to path: {filename}')
|
||||
with zipfile.ZipFile(filename, 'w') as cbz_pf:
|
||||
for image in file_list:
|
||||
image_path = os.path.join(doujinshi_dir, image)
|
||||
cbz_pf.write(image_path, image)
|
||||
|
||||
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
|
||||
|
||||
|
||||
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
|
||||
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
|
||||
|
||||
if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
|
||||
@ -244,16 +255,9 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
|
||||
return
|
||||
|
||||
if file_type == 'cbz':
|
||||
file_list = os.listdir(doujinshi_dir)
|
||||
file_list.sort()
|
||||
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
|
||||
generate_cbz(doujinshi_dir, filename)
|
||||
|
||||
logger.info(f'Writing CBZ file to path: {filename}')
|
||||
with zipfile.ZipFile(filename, 'w') as cbz_pf:
|
||||
for image in file_list:
|
||||
image_path = os.path.join(doujinshi_dir, image)
|
||||
cbz_pf.write(image_path, image)
|
||||
|
||||
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
|
||||
elif file_type == 'pdf':
|
||||
try:
|
||||
import img2pdf
|
||||
@ -273,9 +277,16 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
|
||||
|
||||
except ImportError:
|
||||
logger.error("Please install img2pdf package by using pip.")
|
||||
else:
|
||||
raise ValueError('invalid file type')
|
||||
|
||||
elif file_type == 'json':
|
||||
serialize_json(doujinshi_obj, doujinshi_dir)
|
||||
|
||||
def generate_metadata(output_dir, doujinshi_obj=None):
|
||||
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '')
|
||||
serialize_json(doujinshi_obj, doujinshi_dir)
|
||||
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
|
||||
serialize_info_txt(doujinshi_obj, doujinshi_dir)
|
||||
logger.log(16, f'Metadata files have been written to "{doujinshi_dir}"')
|
||||
|
||||
|
||||
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
|
||||
@ -332,29 +343,6 @@ def paging(page_string):
|
||||
return page_list
|
||||
|
||||
|
||||
def generate_metadata_file(output_dir, doujinshi_obj):
|
||||
|
||||
info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
|
||||
|
||||
f = open(info_txt_path, 'w', encoding='utf-8')
|
||||
|
||||
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
|
||||
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
|
||||
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
||||
'SERIES', 'PARODY', 'URL']
|
||||
|
||||
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
|
||||
for i in fields:
|
||||
v = temp_dict.get(i)
|
||||
v = temp_dict.get(f'{i}s') if v is None else v
|
||||
v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
|
||||
v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
|
||||
f.write(f'{i}: {v}\n')
|
||||
|
||||
f.close()
|
||||
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
|
||||
|
||||
|
||||
class DB(object):
|
||||
conn = None
|
||||
cur = None
|
||||
|
@ -49,8 +49,8 @@ document.onkeypress = event => {
|
||||
switch (event.key.toLowerCase()) {
|
||||
// Previous Image
|
||||
case 'w':
|
||||
scrollBy(0, -40);
|
||||
break;
|
||||
scrollBy(0, -40);
|
||||
break;
|
||||
case 'a':
|
||||
changePage(currentPage - 1);
|
||||
break;
|
||||
@ -61,7 +61,7 @@ document.onkeypress = event => {
|
||||
// Next Image
|
||||
case ' ':
|
||||
case 's':
|
||||
scrollBy(0, 40);
|
||||
scrollBy(0, 40);
|
||||
break;
|
||||
case 'd':
|
||||
changePage(currentPage + 1);
|
||||
@ -75,11 +75,13 @@ document.onkeydown = event =>{
|
||||
changePage(currentPage - 1);
|
||||
break;
|
||||
case 38: //up
|
||||
changePage(currentPage - 1);
|
||||
break;
|
||||
case 39: //right
|
||||
changePage(currentPage + 1);
|
||||
break;
|
||||
case 40: //down
|
||||
changePage(currentPage + 1);
|
||||
break;
|
||||
}
|
||||
};
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "nhentai"
|
||||
version = "0.5.22"
|
||||
version = "0.6.0-beta"
|
||||
description = "nhentai doujinshi downloader"
|
||||
authors = ["Ricter Z <ricterzheng@gmail.com>"]
|
||||
license = "MIT"
|
||||
|
Reference in New Issue
Block a user