mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-04-18 18:01:24 +02:00
Compare commits
31 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
6752edfc9d | ||
|
9a5fcd7d23 | ||
|
b4cc498a5f | ||
|
a4eb7f3b5f | ||
|
36aa321ade | ||
|
aa84b57a43 | ||
|
a3c70a0c30 | ||
|
86060ae0a6 | ||
|
9648c21b32 | ||
|
625feb5d21 | ||
|
6efbc73c10 | ||
|
34c1ea8952 | ||
|
2e895d8d0f | ||
|
0c9b92ce10 | ||
|
ca71a72747 | ||
|
1b7f19ee18 | ||
|
132f4c83da | ||
|
6789b2b363 | ||
|
a6ac725ca7 | ||
|
b32962bca4 | ||
|
8a7be0e33d | ||
|
0a47527461 | ||
|
023c8969eb | ||
|
29c3abbe5c | ||
|
057fae8a83 | ||
|
248d31edf0 | ||
|
4bfe0de078 | ||
|
780a6c82b2 | ||
|
8791e7af55 | ||
|
b434c4d58d | ||
|
ba59dcf4db |
@ -22,7 +22,7 @@ From Github:
|
||||
|
||||
git clone https://github.com/RicterZ/nhentai
|
||||
cd nhentai
|
||||
python setup.py install
|
||||
pip install --no-cache-dir .
|
||||
|
||||
Build Docker container:
|
||||
|
||||
@ -136,6 +136,8 @@ Format output doujinshi folder name:
|
||||
.. code-block:: bash
|
||||
|
||||
nhentai --id 261100 --format '[%i]%s'
|
||||
# for Windows
|
||||
nhentai --id 261100 --format "[%%i]%%s"
|
||||
|
||||
Supported doujinshi folder formatter:
|
||||
|
||||
@ -148,6 +150,7 @@ Supported doujinshi folder formatter:
|
||||
- %p: Doujinshi pretty name
|
||||
- %ag: Doujinshi authors name or groups name
|
||||
|
||||
Note: for Windows operation system, please use double "%", such as "%%i".
|
||||
|
||||
Other options:
|
||||
|
||||
|
@ -1,3 +1,3 @@
|
||||
__version__ = '0.5.20'
|
||||
__version__ = '0.6.0-beta'
|
||||
__author__ = 'RicterZ'
|
||||
__email__ = 'ricterzheng@gmail.com'
|
||||
|
@ -6,10 +6,10 @@ import json
|
||||
import nhentai.constant as constant
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from optparse import OptionParser
|
||||
from argparse import ArgumentParser
|
||||
|
||||
from nhentai import __version__
|
||||
from nhentai.utils import generate_html, generate_main_html, DB
|
||||
from nhentai.utils import generate_html, generate_main_html, DB, EXTENSIONS
|
||||
from nhentai.logger import logger
|
||||
from nhentai.constant import PATH_SEPARATOR
|
||||
|
||||
@ -57,103 +57,133 @@ def callback(option, _opt_str, _value, parser):
|
||||
def cmd_parser():
|
||||
load_config()
|
||||
|
||||
parser = OptionParser('\n nhentai --search [keyword] --download'
|
||||
'\n NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]'
|
||||
'\n nhentai --file [filename]'
|
||||
'\n\nEnvironment Variable:\n'
|
||||
' NHENTAI nhentai mirror url')
|
||||
parser = ArgumentParser(
|
||||
description='\n nhentai --search [keyword] --download'
|
||||
'\n NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]'
|
||||
'\n nhentai --file [filename]'
|
||||
'\n\nEnvironment Variable:\n'
|
||||
' NHENTAI nhentai mirror url'
|
||||
)
|
||||
|
||||
# operation options
|
||||
parser.add_option('--download', '-D', dest='is_download', action='store_true',
|
||||
help='download doujinshi (for search results)')
|
||||
parser.add_option('--show', '-S', dest='is_show', action='store_true',
|
||||
help='just show the doujinshi information')
|
||||
parser.add_argument('--download', '-D', dest='is_download', action='store_true',
|
||||
help='download doujinshi (for search results)')
|
||||
parser.add_argument('--no-download', dest='no_download', action='store_true', default=False,
|
||||
help='download doujinshi (for search results)')
|
||||
parser.add_argument('--show', '-S', dest='is_show', action='store_true',
|
||||
help='just show the doujinshi information')
|
||||
|
||||
# doujinshi options
|
||||
parser.add_option('--id', dest='id', action='callback', callback=callback,
|
||||
help='doujinshi ids set, e.g. 167680 167681 167682')
|
||||
parser.add_option('--search', '-s', type='string', dest='keyword', action='store',
|
||||
help='search doujinshi by keyword')
|
||||
parser.add_option('--favorites', '-F', action='store_true', dest='favorites',
|
||||
help='list or download your favorites')
|
||||
parser.add_option('--artist', '-a', action='store', dest='artist',
|
||||
help='list doujinshi by artist name')
|
||||
parser.add_argument('--id', dest='id', nargs='+', type=int,
|
||||
help='doujinshi ids set, e.g. 167680 167681 167682')
|
||||
parser.add_argument('--search', '-s', type=str, dest='keyword',
|
||||
help='search doujinshi by keyword')
|
||||
parser.add_argument('--favorites', '-F', action='store_true', dest='favorites',
|
||||
help='list or download your favorites')
|
||||
parser.add_argument('--artist', '-a', type=str, dest='artist',
|
||||
help='list doujinshi by artist name')
|
||||
|
||||
# page options
|
||||
parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
|
||||
help='all search results')
|
||||
parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
|
||||
help='page number of search results. e.g. 1,2-5,14')
|
||||
parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
|
||||
help='sorting of doujinshi (recent / popular / popular-[today|week])',
|
||||
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
|
||||
parser.add_argument('--page-all', dest='page_all', action='store_true', default=False,
|
||||
help='all search results')
|
||||
parser.add_argument('--page', '--page-range', type=str, dest='page',
|
||||
help='page number of search results. e.g. 1,2-5,14')
|
||||
parser.add_argument('--sorting', '--sort', dest='sorting', type=str, default='popular',
|
||||
help='sorting of doujinshi (recent / popular / popular-[today|week])',
|
||||
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
|
||||
|
||||
# download options
|
||||
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store',
|
||||
default=f'.{PATH_SEPARATOR}',
|
||||
help='output dir')
|
||||
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
|
||||
help='thread count for downloading doujinshi')
|
||||
parser.add_option('--timeout', '-T', type='int', dest='timeout', action='store', default=30,
|
||||
help='timeout for downloading doujinshi')
|
||||
parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0,
|
||||
help='slow down between downloading every doujinshi')
|
||||
parser.add_option('--retry', type='int', dest='retry', action='store', default=3,
|
||||
help='retry times when downloading failed')
|
||||
parser.add_option('--exit-on-fail', dest='exit_on_fail', action='store_true', default=False,
|
||||
help='exit on fail to prevent generating incomplete files')
|
||||
parser.add_option('--proxy', type='string', dest='proxy', action='store',
|
||||
help='store a proxy, for example: -p "http://127.0.0.1:1080"')
|
||||
parser.add_option('--file', '-f', type='string', dest='file', action='store',
|
||||
help='read gallery IDs from file.')
|
||||
parser.add_option('--format', type='string', dest='name_format', action='store',
|
||||
help='format the saved folder name', default='[%i][%a][%t]')
|
||||
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
|
||||
parser.add_argument('--output', '-o', type=str, dest='output_dir', default='.',
|
||||
help='output dir')
|
||||
parser.add_argument('--threads', '-t', type=int, dest='threads', default=5,
|
||||
help='thread count for downloading doujinshi')
|
||||
parser.add_argument('--timeout', '-T', type=int, dest='timeout', default=30,
|
||||
help='timeout for downloading doujinshi')
|
||||
parser.add_argument('--delay', '-d', type=int, dest='delay', default=0,
|
||||
help='slow down between downloading every doujinshi')
|
||||
parser.add_argument('--retry', type=int, dest='retry', default=3,
|
||||
help='retry times when downloading failed')
|
||||
parser.add_argument('--exit-on-fail', dest='exit_on_fail', action='store_true', default=False,
|
||||
help='exit on fail to prevent generating incomplete files')
|
||||
parser.add_argument('--proxy', type=str, dest='proxy',
|
||||
help='store a proxy, for example: -p "http://127.0.0.1:1080"')
|
||||
parser.add_argument('--file', '-f', type=str, dest='file',
|
||||
help='read gallery IDs from file.')
|
||||
parser.add_argument('--format', type=str, dest='name_format', default='[%i][%a][%t]',
|
||||
help='format the saved folder name')
|
||||
|
||||
parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding',
|
||||
default=False, help='no padding in the images filename, such as \'001.jpg\'')
|
||||
parser.add_argument('--no-filename-padding', action='store_true', dest='no_filename_padding',
|
||||
default=False, help='no padding in the images filename, such as \'001.jpg\'')
|
||||
|
||||
# generate options
|
||||
parser.add_option('--html', dest='html_viewer', action='store_true',
|
||||
help='generate a html viewer at current directory')
|
||||
parser.add_option('--no-html', dest='is_nohtml', action='store_true',
|
||||
help='don\'t generate HTML after downloading')
|
||||
parser.add_option('--gen-main', dest='main_viewer', action='store_true',
|
||||
help='generate a main viewer contain all the doujin in the folder')
|
||||
parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true',
|
||||
help='generate Comic Book CBZ File')
|
||||
parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true',
|
||||
help='generate PDF file')
|
||||
parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
|
||||
help='remove downloaded doujinshi dir when generated CBZ or PDF file')
|
||||
parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
|
||||
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
|
||||
parser.add_option('--meta', dest='generate_metadata', action='store_true',
|
||||
help='generate a metadata file in doujinshi format')
|
||||
parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
|
||||
help='regenerate the cbz or pdf file if exists')
|
||||
parser.add_argument('--html', dest='html_viewer', type=str, nargs='?', const='.',
|
||||
help='generate an HTML viewer in the specified directory, or scan all subfolders '
|
||||
'within the entire directory to generate the HTML viewer. By default, current '
|
||||
'working directory is used.')
|
||||
parser.add_argument('--no-html', dest='is_nohtml', action='store_true',
|
||||
help='don\'t generate HTML after downloading')
|
||||
parser.add_argument('--gen-main', dest='main_viewer', action='store_true',
|
||||
help='generate a main viewer contain all the doujin in the folder')
|
||||
parser.add_argument('--cbz', '-C', dest='is_cbz', action='store_true',
|
||||
help='generate Comic Book CBZ File')
|
||||
parser.add_argument('--pdf', '-P', dest='is_pdf', action='store_true',
|
||||
help='generate PDF file')
|
||||
|
||||
parser.add_argument('--meta', dest='generate_metadata', action='store_true', default=False,
|
||||
help='generate a metadata file in doujinshi format')
|
||||
parser.add_argument('--update-meta', dest='update_metadata', action='store_true', default=False,
|
||||
help='update the metadata file of a doujinshi, update CBZ metadata if exists')
|
||||
|
||||
parser.add_argument('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
|
||||
help='remove downloaded doujinshi dir when generated CBZ or PDF file')
|
||||
parser.add_argument('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
|
||||
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
|
||||
|
||||
parser.add_argument('--regenerate', dest='regenerate', action='store_true', default=False,
|
||||
help='regenerate the cbz or pdf file if exists')
|
||||
parser.add_argument('--zip', action='store_true', help='Package into a single zip file')
|
||||
|
||||
# nhentai options
|
||||
parser.add_option('--cookie', type='str', dest='cookie', action='store',
|
||||
help='set cookie of nhentai to bypass Cloudflare captcha')
|
||||
parser.add_option('--useragent', '--user-agent', type='str', dest='useragent', action='store',
|
||||
help='set useragent to bypass Cloudflare captcha')
|
||||
parser.add_option('--language', type='str', dest='language', action='store',
|
||||
help='set default language to parse doujinshis')
|
||||
parser.add_option('--clean-language', dest='clean_language', action='store_true', default=False,
|
||||
help='set DEFAULT as language to parse doujinshis')
|
||||
parser.add_option('--save-download-history', dest='is_save_download_history', action='store_true',
|
||||
default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them')
|
||||
parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history',
|
||||
help='clean download history')
|
||||
parser.add_option('--template', dest='viewer_template', action='store',
|
||||
help='set viewer template', default='')
|
||||
parser.add_option('--legacy', dest='legacy', action='store_true', default=False,
|
||||
help='use legacy searching method')
|
||||
parser.add_argument('--cookie', type=str, dest='cookie',
|
||||
help='set cookie of nhentai to bypass Cloudflare captcha')
|
||||
parser.add_argument('--useragent', '--user-agent', type=str, dest='useragent',
|
||||
help='set useragent to bypass Cloudflare captcha')
|
||||
parser.add_argument('--language', type=str, dest='language',
|
||||
help='set default language to parse doujinshis')
|
||||
parser.add_argument('--clean-language', dest='clean_language', action='store_true', default=False,
|
||||
help='set DEFAULT as language to parse doujinshis')
|
||||
parser.add_argument('--save-download-history', dest='is_save_download_history', action='store_true',
|
||||
default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them')
|
||||
parser.add_argument('--clean-download-history', action='store_true', default=False, dest='clean_download_history',
|
||||
help='clean download history')
|
||||
parser.add_argument('--template', dest='viewer_template', type=str, default='',
|
||||
help='set viewer template')
|
||||
parser.add_argument('--legacy', dest='legacy', action='store_true', default=False,
|
||||
help='use legacy searching method')
|
||||
|
||||
args, _ = parser.parse_args(sys.argv[1:])
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.html_viewer:
|
||||
generate_html(template=constant.CONFIG['template'])
|
||||
if not os.path.exists(args.html_viewer):
|
||||
logger.error(f'Path \'{args.html_viewer}\' not exists')
|
||||
sys.exit(1)
|
||||
|
||||
for root, dirs, files in os.walk(args.html_viewer):
|
||||
if not dirs:
|
||||
generate_html(output_dir=args.html_viewer, template=constant.CONFIG['template'])
|
||||
sys.exit(0)
|
||||
|
||||
for dir_name in dirs:
|
||||
# it will scan the entire subdirectories
|
||||
doujinshi_dir = os.path.join(root, dir_name)
|
||||
items = set(map(lambda s: os.path.splitext(s)[1], os.listdir(doujinshi_dir)))
|
||||
|
||||
# skip directory without any images
|
||||
if items & set(EXTENSIONS):
|
||||
generate_html(output_dir=doujinshi_dir, template=constant.CONFIG['template'])
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
if args.main_viewer and not args.id and not args.keyword and not args.favorites:
|
||||
@ -169,22 +199,24 @@ def cmd_parser():
|
||||
|
||||
# --- set config ---
|
||||
if args.cookie is not None:
|
||||
constant.CONFIG['cookie'] = args.cookie
|
||||
constant.CONFIG['cookie'] = args.cookie.strip()
|
||||
write_config()
|
||||
logger.info('Cookie saved.')
|
||||
sys.exit(0)
|
||||
elif args.useragent is not None:
|
||||
constant.CONFIG['useragent'] = args.useragent
|
||||
|
||||
if args.useragent is not None:
|
||||
constant.CONFIG['useragent'] = args.useragent.strip()
|
||||
write_config()
|
||||
logger.info('User-Agent saved.')
|
||||
sys.exit(0)
|
||||
elif args.language is not None:
|
||||
|
||||
if args.language is not None:
|
||||
constant.CONFIG['language'] = args.language
|
||||
write_config()
|
||||
logger.info(f'Default language now set to "{args.language}"')
|
||||
sys.exit(0)
|
||||
# TODO: search without language
|
||||
|
||||
if any([args.cookie, args.useragent, args.language]):
|
||||
sys.exit(0)
|
||||
|
||||
if args.proxy is not None:
|
||||
proxy_url = urlparse(args.proxy)
|
||||
if not args.proxy == '' and proxy_url.scheme not in ('http', 'https', 'socks5', 'socks5h',
|
||||
@ -237,8 +269,4 @@ def cmd_parser():
|
||||
logger.critical('Maximum number of used threads is 15')
|
||||
sys.exit(1)
|
||||
|
||||
if args.dryrun and (args.is_cbz or args.is_pdf):
|
||||
logger.critical('Cannot generate PDF or CBZ during dry-run')
|
||||
sys.exit(1)
|
||||
|
||||
return args
|
||||
|
@ -4,18 +4,16 @@ import shutil
|
||||
import sys
|
||||
import signal
|
||||
import platform
|
||||
import urllib
|
||||
|
||||
import urllib3.exceptions
|
||||
|
||||
from nhentai import constant
|
||||
from nhentai.cmdline import cmd_parser, banner, write_config
|
||||
from nhentai.parser import doujinshi_parser, search_parser, legacy_search_parser, print_doujinshi, favorites_parser
|
||||
from nhentai.doujinshi import Doujinshi
|
||||
from nhentai.downloader import Downloader
|
||||
from nhentai.downloader import Downloader, CompressedDownloader
|
||||
from nhentai.logger import logger
|
||||
from nhentai.constant import BASE_URL
|
||||
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
|
||||
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \
|
||||
paging, check_cookie, signal_handler, DB, move_to_folder
|
||||
|
||||
|
||||
@ -51,6 +49,9 @@ def main():
|
||||
|
||||
page_list = paging(options.page)
|
||||
|
||||
if options.retry:
|
||||
constant.RETRY_TIMES = int(options.retry)
|
||||
|
||||
if options.favorites:
|
||||
if not options.is_download:
|
||||
logger.warning('You do not specify --download option')
|
||||
@ -79,14 +80,18 @@ def main():
|
||||
|
||||
if options.is_save_download_history:
|
||||
with DB() as db:
|
||||
data = map(int, db.get_all())
|
||||
data = set(map(int, db.get_all()))
|
||||
|
||||
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
|
||||
logger.info(f'New doujinshis account: {len(doujinshi_ids)}')
|
||||
|
||||
if options.zip:
|
||||
options.is_nohtml = True
|
||||
|
||||
if not options.is_show:
|
||||
downloader = Downloader(path=options.output_dir, threads=options.threads,
|
||||
downloader = (CompressedDownloader if options.zip else Downloader)(path=options.output_dir, threads=options.threads,
|
||||
timeout=options.timeout, delay=options.delay,
|
||||
retry=options.retry, exit_on_fail=options.exit_on_fail,
|
||||
exit_on_fail=options.exit_on_fail,
|
||||
no_filename_padding=options.no_filename_padding)
|
||||
|
||||
for doujinshi_id in doujinshi_ids:
|
||||
@ -96,17 +101,15 @@ def main():
|
||||
else:
|
||||
continue
|
||||
|
||||
if not options.dryrun:
|
||||
doujinshi.downloader = downloader
|
||||
doujinshi.downloader = downloader
|
||||
|
||||
if doujinshi.check_if_need_download(options):
|
||||
doujinshi.download()
|
||||
else:
|
||||
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
|
||||
continue
|
||||
if doujinshi.check_if_need_download(options):
|
||||
doujinshi.download()
|
||||
else:
|
||||
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
|
||||
|
||||
if options.generate_metadata:
|
||||
generate_metadata_file(options.output_dir, doujinshi)
|
||||
generate_metadata(options.output_dir, doujinshi)
|
||||
|
||||
if options.is_save_download_history:
|
||||
with DB() as db:
|
||||
|
@ -37,6 +37,8 @@ FAV_URL = f'{BASE_URL}/favorites/'
|
||||
|
||||
PATH_SEPARATOR = os.path.sep
|
||||
|
||||
RETRY_TIMES = 3
|
||||
|
||||
|
||||
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
|
||||
IMAGE_URL_MIRRORS = [
|
||||
|
@ -77,6 +77,9 @@ class Doujinshi(object):
|
||||
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
|
||||
|
||||
def check_if_need_download(self, options):
|
||||
if options.no_download:
|
||||
return False
|
||||
|
||||
base_path = os.path.join(self.downloader.path, self.filename)
|
||||
|
||||
# regenerate, re-download
|
||||
|
@ -4,6 +4,8 @@ import os
|
||||
import asyncio
|
||||
import httpx
|
||||
import urllib3.exceptions
|
||||
import zipfile
|
||||
import io
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from nhentai import constant
|
||||
@ -13,11 +15,6 @@ from nhentai.utils import Singleton, async_request
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
class NHentaiImageNotExistException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def download_callback(result):
|
||||
result, data = result
|
||||
if result == 0:
|
||||
@ -34,13 +31,12 @@ def download_callback(result):
|
||||
|
||||
|
||||
class Downloader(Singleton):
|
||||
def __init__(self, path='', threads=5, timeout=30, delay=0, retry=3, exit_on_fail=False,
|
||||
def __init__(self, path='', threads=5, timeout=30, delay=0, exit_on_fail=False,
|
||||
no_filename_padding=False):
|
||||
self.threads = threads
|
||||
self.path = str(path)
|
||||
self.timeout = timeout
|
||||
self.delay = delay
|
||||
self.retry = retry
|
||||
self.exit_on_fail = exit_on_fail
|
||||
self.folder = None
|
||||
self.semaphore = None
|
||||
@ -78,13 +74,7 @@ class Downloader(Singleton):
|
||||
else:
|
||||
filename = base_filename + extension
|
||||
|
||||
save_file_path = os.path.join(self.folder, filename)
|
||||
|
||||
try:
|
||||
if os.path.exists(save_file_path):
|
||||
logger.warning(f'Skipped download: {save_file_path} already exists')
|
||||
return 1, url
|
||||
|
||||
response = await async_request('GET', url, timeout=self.timeout, proxy=proxy)
|
||||
|
||||
if response.status_code != 200:
|
||||
@ -101,7 +91,7 @@ class Downloader(Singleton):
|
||||
return -1, url
|
||||
|
||||
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
|
||||
if retried < self.retry:
|
||||
if retried < constant.RETRY_TIMES:
|
||||
logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
|
||||
return await self.download(
|
||||
url=url,
|
||||
@ -111,13 +101,9 @@ class Downloader(Singleton):
|
||||
proxy=proxy,
|
||||
)
|
||||
else:
|
||||
logger.warning(f'Download {filename} failed with {self.retry} times retried, skipped')
|
||||
logger.warning(f'Download {filename} failed with {constant.RETRY_TIMES} times retried, skipped')
|
||||
return -2, url
|
||||
|
||||
except NHentaiImageNotExistException as e:
|
||||
os.remove(save_file_path)
|
||||
return -3, url
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
@ -131,11 +117,11 @@ class Downloader(Singleton):
|
||||
|
||||
return 1, url
|
||||
|
||||
async def save(self, save_file_path, response) -> bool:
|
||||
async def save(self, filename, response) -> bool:
|
||||
if response is None:
|
||||
logger.error('Error: Response is None')
|
||||
return False
|
||||
save_file_path = os.path.join(self.folder, save_file_path)
|
||||
save_file_path = os.path.join(self.folder, filename)
|
||||
with open(save_file_path, 'wb') as f:
|
||||
if response is not None:
|
||||
length = response.headers.get('content-length')
|
||||
@ -146,6 +132,15 @@ class Downloader(Singleton):
|
||||
f.write(chunk)
|
||||
return True
|
||||
|
||||
def create_storage_object(self, folder:str):
|
||||
if not os.path.exists(folder):
|
||||
try:
|
||||
os.makedirs(folder)
|
||||
except EnvironmentError as e:
|
||||
logger.critical(str(e))
|
||||
self.folder:str = folder
|
||||
self.close = lambda: None # Only available in class CompressedDownloader
|
||||
|
||||
def start_download(self, queue, folder='') -> bool:
|
||||
if not isinstance(folder, (str,)):
|
||||
folder = str(folder)
|
||||
@ -154,12 +149,7 @@ class Downloader(Singleton):
|
||||
folder = os.path.join(self.path, folder)
|
||||
|
||||
logger.info(f'Doujinshi will be saved at "{folder}"')
|
||||
if not os.path.exists(folder):
|
||||
try:
|
||||
os.makedirs(folder)
|
||||
except EnvironmentError as e:
|
||||
logger.critical(str(e))
|
||||
self.folder = folder
|
||||
self.create_storage_object(folder)
|
||||
|
||||
if os.getenv('DEBUG', None) == 'NODOWNLOAD':
|
||||
# Assuming we want to continue with rest of process.
|
||||
@ -175,4 +165,31 @@ class Downloader(Singleton):
|
||||
# Prevent coroutines infection
|
||||
asyncio.run(self.fiber(coroutines))
|
||||
|
||||
self.close()
|
||||
|
||||
return True
|
||||
|
||||
class CompressedDownloader(Downloader):
|
||||
def create_storage_object(self, folder):
|
||||
filename = f'{folder}.zip'
|
||||
print(filename)
|
||||
self.zipfile = zipfile.ZipFile(filename,'w')
|
||||
self.close = lambda: self.zipfile.close()
|
||||
|
||||
async def save(self, filename, response) -> bool:
|
||||
if response is None:
|
||||
logger.error('Error: Response is None')
|
||||
return False
|
||||
|
||||
image_data = io.BytesIO()
|
||||
length = response.headers.get('content-length')
|
||||
if length is None:
|
||||
content = await response.read()
|
||||
image_data.write(content)
|
||||
else:
|
||||
async for chunk in response.aiter_bytes(2048):
|
||||
image_data.write(chunk)
|
||||
|
||||
image_data.seek(0)
|
||||
self.zipfile.writestr(filename, image_data.read())
|
||||
return True
|
||||
|
@ -92,13 +92,27 @@ def favorites_parser(page=None):
|
||||
page_range_list = range(1, pages + 1)
|
||||
|
||||
for page in page_range_list:
|
||||
try:
|
||||
logger.info(f'Getting doujinshi ids of page {page}')
|
||||
resp = request('get', f'{constant.FAV_URL}?page={page}').content
|
||||
logger.info(f'Getting doujinshi ids of page {page}')
|
||||
|
||||
result.extend(_get_title_and_id(resp))
|
||||
except Exception as e:
|
||||
logger.error(f'Error: {e}, continue')
|
||||
i = 0
|
||||
while i <= constant.RETRY_TIMES + 1:
|
||||
i += 1
|
||||
if i > 3:
|
||||
logger.error(f'Failed to get favorites at page {page} after 3 times retried, skipped')
|
||||
break
|
||||
|
||||
try:
|
||||
resp = request('get', f'{constant.FAV_URL}?page={page}').content
|
||||
temp_result = _get_title_and_id(resp)
|
||||
if not temp_result:
|
||||
logger.warning(f'Failed to get favorites at page {page}, retrying ({i} times) ...')
|
||||
continue
|
||||
else:
|
||||
result.extend(temp_result)
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f'Error: {e}, retrying ({i} times) ...')
|
||||
|
||||
return result
|
||||
|
||||
@ -141,17 +155,19 @@ def doujinshi_parser(id_, counter=0):
|
||||
title = doujinshi_info.find('h1').text
|
||||
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
|
||||
subtitle = doujinshi_info.find('h2')
|
||||
favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')
|
||||
favorite_counts = doujinshi_info.find('span', class_='nobold').text.strip('(').strip(')')
|
||||
|
||||
doujinshi['name'] = title
|
||||
doujinshi['pretty_name'] = pretty_name
|
||||
doujinshi['subtitle'] = subtitle.text if subtitle else ''
|
||||
doujinshi['favorite_counts'] = int(favorite_counts.text.strip()) if favorite_counts else 0
|
||||
doujinshi['favorite_counts'] = int(favorite_counts) if favorite_counts and favorite_counts.isdigit() else 0
|
||||
|
||||
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
|
||||
# img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
|
||||
# doujinshi_cover.a.img.attrs['data-src'])
|
||||
img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
|
||||
|
||||
# fix cover.webp.webp
|
||||
img_id = re.search(r'/galleries/(\d+)/cover(\.webp|\.jpg|\.png)?\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
|
||||
|
||||
ext = []
|
||||
for i in html.find_all('div', attrs={'class': 'thumb-container'}):
|
||||
@ -261,7 +277,7 @@ def search_parser(keyword, sorting, page, is_page_all=False):
|
||||
i = 0
|
||||
|
||||
logger.info(f'Searching doujinshis using keywords "{keyword}" on page {p}{total}')
|
||||
while i < 3:
|
||||
while i < constant.RETRY_TIMES:
|
||||
try:
|
||||
url = request('get', url=constant.SEARCH_URL, params={'query': keyword,
|
||||
'page': p, 'sort': sorting}).url
|
||||
|
@ -2,12 +2,12 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from nhentai.constant import PATH_SEPARATOR
|
||||
from nhentai.constant import PATH_SEPARATOR, LANGUAGE_ISO
|
||||
from xml.sax.saxutils import escape
|
||||
from nhentai.constant import LANGUAGE_ISO
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
|
||||
def serialize_json(doujinshi, output_dir):
|
||||
def serialize_json(doujinshi, output_dir: str):
|
||||
metadata = {'title': doujinshi.name,
|
||||
'subtitle': doujinshi.info.subtitle}
|
||||
if doujinshi.info.favorite_counts:
|
||||
@ -78,6 +78,26 @@ def serialize_comic_xml(doujinshi, output_dir):
|
||||
f.write('</ComicInfo>')
|
||||
|
||||
|
||||
def serialize_info_txt(doujinshi, output_dir: str):
|
||||
info_txt_path = os.path.join(output_dir, 'info.txt')
|
||||
f = open(info_txt_path, 'w', encoding='utf-8')
|
||||
|
||||
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
|
||||
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
|
||||
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
||||
'SERIES', 'PARODY', 'URL']
|
||||
|
||||
temp_dict = CaseInsensitiveDict(dict(doujinshi.table))
|
||||
for i in fields:
|
||||
v = temp_dict.get(i)
|
||||
v = temp_dict.get(f'{i}s') if v is None else v
|
||||
v = doujinshi.info.get(i.lower(), None) if v is None else v
|
||||
v = doujinshi.info.get(f'{i.lower()}s', "Unknown") if v is None else v
|
||||
f.write(f'{i}: {v}\n')
|
||||
|
||||
f.close()
|
||||
|
||||
|
||||
def xml_write_simple_tag(f, name, val, indent=1):
|
||||
f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n')
|
||||
|
||||
@ -132,3 +152,4 @@ def set_js_database():
|
||||
indexed_json = json.dumps(indexed_json, separators=(',', ':'))
|
||||
f.write('var data = ' + indexed_json)
|
||||
f.write(';\nvar tags = ' + unique_json)
|
||||
|
||||
|
110
nhentai/utils.py
110
nhentai/utils.py
@ -1,5 +1,5 @@
|
||||
# coding: utf-8
|
||||
|
||||
import json
|
||||
import sys
|
||||
import re
|
||||
import os
|
||||
@ -11,24 +11,33 @@ import requests
|
||||
import sqlite3
|
||||
import urllib.parse
|
||||
from typing import Tuple
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from nhentai import constant
|
||||
from nhentai.constant import PATH_SEPARATOR
|
||||
from nhentai.logger import logger
|
||||
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
|
||||
from nhentai.serializer import serialize_comic_xml, serialize_json, serialize_info_txt, set_js_database
|
||||
|
||||
MAX_FIELD_LENGTH = 100
|
||||
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
|
||||
|
||||
def get_headers():
|
||||
headers = {
|
||||
'Referer': constant.LOGIN_URL
|
||||
}
|
||||
|
||||
user_agent = constant.CONFIG.get('useragent')
|
||||
if user_agent and user_agent.strip():
|
||||
headers['User-Agent'] = user_agent
|
||||
|
||||
cookie = constant.CONFIG.get('cookie')
|
||||
if cookie and cookie.strip():
|
||||
headers['Cookie'] = cookie
|
||||
|
||||
return headers
|
||||
|
||||
def request(method, url, **kwargs):
|
||||
session = requests.Session()
|
||||
session.headers.update({
|
||||
'Referer': constant.LOGIN_URL,
|
||||
'User-Agent': constant.CONFIG['useragent'],
|
||||
'Cookie': constant.CONFIG['cookie']
|
||||
})
|
||||
session.headers.update(get_headers())
|
||||
|
||||
if not kwargs.get('proxies', None):
|
||||
kwargs['proxies'] = {
|
||||
@ -40,11 +49,7 @@ def request(method, url, **kwargs):
|
||||
|
||||
|
||||
async def async_request(method, url, proxy = None, **kwargs):
|
||||
headers = {
|
||||
'Referer': constant.LOGIN_URL,
|
||||
'User-Agent': constant.CONFIG['useragent'],
|
||||
'Cookie': constant.CONFIG['cookie'],
|
||||
}
|
||||
headers=get_headers()
|
||||
|
||||
if proxy is None:
|
||||
proxy = constant.CONFIG['proxy']
|
||||
@ -105,21 +110,24 @@ def parse_doujinshi_obj(
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
_filename = f'{doujinshi_obj.filename}.{file_type}'
|
||||
|
||||
if file_type == 'cbz':
|
||||
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
|
||||
|
||||
if file_type == 'pdf':
|
||||
_filename = _filename.replace('/', '-')
|
||||
|
||||
filename = os.path.join(output_dir, _filename)
|
||||
else:
|
||||
if file_type == 'html':
|
||||
return output_dir, 'index.html'
|
||||
|
||||
doujinshi_dir = f'.{PATH_SEPARATOR}'
|
||||
|
||||
if not os.path.exists(doujinshi_dir):
|
||||
os.makedirs(doujinshi_dir)
|
||||
|
||||
return doujinshi_dir, filename
|
||||
|
||||
|
||||
def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
||||
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
|
||||
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, 'html')
|
||||
image_html = ''
|
||||
|
||||
if not os.path.exists(doujinshi_dir):
|
||||
@ -142,10 +150,16 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
||||
js = readfile(f'viewer/{template}/scripts.js')
|
||||
|
||||
if doujinshi_obj is not None:
|
||||
serialize_json(doujinshi_obj, doujinshi_dir)
|
||||
# serialize_json(doujinshi_obj, doujinshi_dir)
|
||||
name = doujinshi_obj.name
|
||||
else:
|
||||
name = {'title': 'nHentai HTML Viewer'}
|
||||
metadata_path = os.path.join(doujinshi_dir, "metadata.json")
|
||||
if os.path.exists(metadata_path):
|
||||
with open(metadata_path, 'r') as file:
|
||||
doujinshi_info = json.loads(file.read())
|
||||
name = doujinshi_info.get("title")
|
||||
else:
|
||||
name = 'nHentai HTML Viewer'
|
||||
|
||||
data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css)
|
||||
try:
|
||||
@ -235,8 +249,20 @@ def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
|
||||
logger.warning(f'Writing Main Viewer failed ({e})')
|
||||
|
||||
|
||||
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
|
||||
def generate_cbz(doujinshi_dir, filename):
|
||||
file_list = os.listdir(doujinshi_dir)
|
||||
file_list.sort()
|
||||
|
||||
logger.info(f'Writing CBZ file to path: {filename}')
|
||||
with zipfile.ZipFile(filename, 'w') as cbz_pf:
|
||||
for image in file_list:
|
||||
image_path = os.path.join(doujinshi_dir, image)
|
||||
cbz_pf.write(image_path, image)
|
||||
|
||||
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
|
||||
|
||||
|
||||
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
|
||||
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
|
||||
|
||||
if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
|
||||
@ -244,16 +270,9 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
|
||||
return
|
||||
|
||||
if file_type == 'cbz':
|
||||
file_list = os.listdir(doujinshi_dir)
|
||||
file_list.sort()
|
||||
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
|
||||
generate_cbz(doujinshi_dir, filename)
|
||||
|
||||
logger.info(f'Writing CBZ file to path: {filename}')
|
||||
with zipfile.ZipFile(filename, 'w') as cbz_pf:
|
||||
for image in file_list:
|
||||
image_path = os.path.join(doujinshi_dir, image)
|
||||
cbz_pf.write(image_path, image)
|
||||
|
||||
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
|
||||
elif file_type == 'pdf':
|
||||
try:
|
||||
import img2pdf
|
||||
@ -273,6 +292,16 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
|
||||
|
||||
except ImportError:
|
||||
logger.error("Please install img2pdf package by using pip.")
|
||||
else:
|
||||
raise ValueError('invalid file type')
|
||||
|
||||
|
||||
def generate_metadata(output_dir, doujinshi_obj=None):
|
||||
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '')
|
||||
serialize_json(doujinshi_obj, doujinshi_dir)
|
||||
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
|
||||
serialize_info_txt(doujinshi_obj, doujinshi_dir)
|
||||
logger.log(16, f'Metadata files have been written to "{doujinshi_dir}"')
|
||||
|
||||
|
||||
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
|
||||
@ -329,29 +358,6 @@ def paging(page_string):
|
||||
return page_list
|
||||
|
||||
|
||||
def generate_metadata_file(output_dir, doujinshi_obj):
|
||||
|
||||
info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
|
||||
|
||||
f = open(info_txt_path, 'w', encoding='utf-8')
|
||||
|
||||
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
|
||||
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
|
||||
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
||||
'SERIES', 'PARODY', 'URL']
|
||||
|
||||
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
|
||||
for i in fields:
|
||||
v = temp_dict.get(i)
|
||||
v = temp_dict.get(f'{i}s') if v is None else v
|
||||
v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
|
||||
v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
|
||||
f.write(f'{i}: {v}\n')
|
||||
|
||||
f.close()
|
||||
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
|
||||
|
||||
|
||||
class DB(object):
|
||||
conn = None
|
||||
cur = None
|
||||
|
@ -49,8 +49,8 @@ document.onkeypress = event => {
|
||||
switch (event.key.toLowerCase()) {
|
||||
// Previous Image
|
||||
case 'w':
|
||||
scrollBy(0, -40);
|
||||
break;
|
||||
scrollBy(0, -40);
|
||||
break;
|
||||
case 'a':
|
||||
changePage(currentPage - 1);
|
||||
break;
|
||||
@ -61,7 +61,7 @@ document.onkeypress = event => {
|
||||
// Next Image
|
||||
case ' ':
|
||||
case 's':
|
||||
scrollBy(0, 40);
|
||||
scrollBy(0, 40);
|
||||
break;
|
||||
case 'd':
|
||||
changePage(currentPage + 1);
|
||||
@ -75,11 +75,13 @@ document.onkeydown = event =>{
|
||||
changePage(currentPage - 1);
|
||||
break;
|
||||
case 38: //up
|
||||
changePage(currentPage - 1);
|
||||
break;
|
||||
case 39: //right
|
||||
changePage(currentPage + 1);
|
||||
break;
|
||||
case 40: //down
|
||||
changePage(currentPage + 1);
|
||||
break;
|
||||
}
|
||||
};
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "nhentai"
|
||||
version = "0.5.20"
|
||||
version = "0.6.0-beta"
|
||||
description = "nhentai doujinshi downloader"
|
||||
authors = ["Ricter Z <ricterzheng@gmail.com>"]
|
||||
license = "MIT"
|
||||
|
@ -1,14 +1,27 @@
|
||||
import unittest
|
||||
import os
|
||||
import zipfile
|
||||
import urllib3.exceptions
|
||||
|
||||
from nhentai import constant
|
||||
from nhentai.cmdline import load_config
|
||||
from nhentai.downloader import Downloader
|
||||
from nhentai.downloader import Downloader, CompressedDownloader
|
||||
from nhentai.parser import doujinshi_parser
|
||||
from nhentai.doujinshi import Doujinshi
|
||||
from nhentai.utils import generate_html
|
||||
|
||||
did = 440546
|
||||
|
||||
def has_jepg_file(path):
|
||||
with zipfile.ZipFile(path, 'r') as zf:
|
||||
return '01.jpg' in zf.namelist()
|
||||
|
||||
def is_zip_file(path):
|
||||
try:
|
||||
with zipfile.ZipFile(path, 'r') as _:
|
||||
return True
|
||||
except (zipfile.BadZipFile, FileNotFoundError):
|
||||
return False
|
||||
|
||||
class TestDownload(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
@ -17,17 +30,27 @@ class TestDownload(unittest.TestCase):
|
||||
constant.CONFIG['cookie'] = os.getenv('NHENTAI_COOKIE')
|
||||
constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA')
|
||||
|
||||
self.info = Doujinshi(**doujinshi_parser(did), name_format='%i')
|
||||
|
||||
def test_download(self):
|
||||
did = 440546
|
||||
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
|
||||
info = self.info
|
||||
info.downloader = Downloader(path='/tmp', threads=5)
|
||||
info.download()
|
||||
|
||||
self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
|
||||
self.assertTrue(os.path.exists(f'/tmp/{did}/01.jpg'))
|
||||
|
||||
generate_html('/tmp', info)
|
||||
self.assertTrue(os.path.exists(f'/tmp/{did}/index.html'))
|
||||
|
||||
def test_zipfile_download(self):
|
||||
info = self.info
|
||||
info.downloader = CompressedDownloader(path='/tmp', threads=5)
|
||||
info.download()
|
||||
|
||||
zipfile_path = f'/tmp/{did}.zip'
|
||||
self.assertTrue(os.path.exists(zipfile_path))
|
||||
self.assertTrue(is_zip_file(zipfile_path))
|
||||
self.assertTrue(has_jepg_file(zipfile_path))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user