Compare commits

...

34 Commits

Author SHA1 Message Date
Ricter Zheng
6752edfc9d
Merge pull request #402 from hzxjy1/zipTest
Close zipfile hander manually and add a test
2025-03-26 22:57:29 +08:00
Ricter Zheng
9a5fcd7d23
Merge pull request #401 from hzxjy1/NoneType
Fix none attributes in session headers
2025-03-26 22:56:11 +08:00
Hellagur4225
b4cc498a5f add a test for zipfile download 2025-03-26 15:14:15 +08:00
Hellagur4225
a4eb7f3b5f fix the uncontrollable zipfile closing function 2025-03-26 15:11:26 +08:00
Hellagur4225
36aa321ade Fix none attributes in session headers 2025-03-24 10:13:42 +08:00
Ricter Zheng
aa84b57a43 use argparse, fix #396 2025-03-12 02:50:22 +08:00
ricterz
a3c70a0c30 fix #396 2025-03-11 22:23:17 +08:00
Ricter Zheng
86060ae0a6
Merge pull request #398 from hzxjy1/zipfile
feat: add compress option
2025-03-11 22:04:09 +08:00
ricterz
9648c21b32 tiny fix of #397 2025-03-11 22:02:37 +08:00
Hellagur4225
625feb5d21 Remove unused option 2025-03-08 17:37:42 +08:00
Hellagur4225
6efbc73c10 feat: add compress option 2025-03-08 17:31:56 +08:00
ricterz
34c1ea8952 new feature #396 2025-02-28 18:59:32 +08:00
ricterz
2e895d8d0f fix title #396 2025-02-28 18:24:56 +08:00
ricterz
0c9b92ce10 0.6.0-beta #394 2025-02-28 00:17:05 +08:00
ricterz
ca71a72747 fix #395 2025-02-27 22:07:40 +08:00
ricterz
1b7f19ee18 0.5.25, fix #393 2025-02-26 00:13:41 +08:00
ricterz
132f4c83da Merge branch 'master' of github.com:RicterZ/nhentai 2025-02-26 00:12:49 +08:00
ricterz
6789b2b363 fix bug of cover.webp.webp 2025-02-25 23:51:13 +08:00
Ricter Zheng
a6ac725ca7
Merge pull request #392 from akakishi/master
Update installation instructions in README.rst
2025-02-23 20:29:15 +08:00
akakishi
b32962bca4
Update README.rst
File `setup.py` was removed in a previous commit; updated README to reflect the new installation process.
2025-02-23 01:18:54 -03:00
ricterz
8a7be0e33d 0.5.24 2025-02-09 20:16:44 +08:00
ricterz
0a47527461 optimize logger output #390 2025-02-09 20:15:17 +08:00
ricterz
023c8969eb add global retry for search, download, fetch favorites 2025-02-09 20:02:52 +08:00
ricterz
29c3abbe5c Merge branch 'master' of github.com:RicterZ/nhentai 2025-02-08 16:21:08 +08:00
ricterzheng
057fae8a83 0.5.23 2025-02-03 15:47:51 +08:00
ricterzheng
248d31edf0 get favorite count #386 even if not login 2025-02-03 15:45:39 +08:00
ricterzheng
4bfe0de078 0.5.22 2025-02-03 15:29:34 +08:00
ricterzheng
780a6c82b2 split metadata.json out from html generate function #386 2025-02-03 15:26:14 +08:00
ricterzheng
8791e7af55 update README to fix #367 2025-02-03 14:53:09 +08:00
ricterzheng
b434c4d58d 0.5.21 2025-02-03 14:34:14 +08:00
ricterzheng
fc69f94505 add --no-filename-padding options to fix #381 2025-01-29 22:59:28 +08:00
ricterzheng
571fba2259 fix RequestsDependencyWarning 2025-01-29 22:46:11 +08:00
ricterzheng
fa977fee04 0.5.20 2025-01-29 00:31:40 +08:00
ricterz
ba59dcf4db add up/down arrow 2025-01-16 22:40:53 +08:00
14 changed files with 702 additions and 570 deletions

View File

@ -22,7 +22,7 @@ From Github:
git clone https://github.com/RicterZ/nhentai git clone https://github.com/RicterZ/nhentai
cd nhentai cd nhentai
python setup.py install pip install --no-cache-dir .
Build Docker container: Build Docker container:
@ -136,6 +136,8 @@ Format output doujinshi folder name:
.. code-block:: bash .. code-block:: bash
nhentai --id 261100 --format '[%i]%s' nhentai --id 261100 --format '[%i]%s'
# for Windows
nhentai --id 261100 --format "[%%i]%%s"
Supported doujinshi folder formatter: Supported doujinshi folder formatter:
@ -148,6 +150,7 @@ Supported doujinshi folder formatter:
- %p: Doujinshi pretty name - %p: Doujinshi pretty name
- %ag: Doujinshi authors name or groups name - %ag: Doujinshi authors name or groups name
Note: for Windows operation system, please use double "%", such as "%%i".
Other options: Other options:

View File

@ -1,3 +1,3 @@
__version__ = '0.5.19' __version__ = '0.6.0-beta'
__author__ = 'RicterZ' __author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com' __email__ = 'ricterzheng@gmail.com'

View File

@ -6,10 +6,10 @@ import json
import nhentai.constant as constant import nhentai.constant as constant
from urllib.parse import urlparse from urllib.parse import urlparse
from optparse import OptionParser from argparse import ArgumentParser
from nhentai import __version__ from nhentai import __version__
from nhentai.utils import generate_html, generate_main_html, DB from nhentai.utils import generate_html, generate_main_html, DB, EXTENSIONS
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import PATH_SEPARATOR from nhentai.constant import PATH_SEPARATOR
@ -57,100 +57,133 @@ def callback(option, _opt_str, _value, parser):
def cmd_parser(): def cmd_parser():
load_config() load_config()
parser = OptionParser('\n nhentai --search [keyword] --download' parser = ArgumentParser(
description='\n nhentai --search [keyword] --download'
'\n NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]' '\n NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]'
'\n nhentai --file [filename]' '\n nhentai --file [filename]'
'\n\nEnvironment Variable:\n' '\n\nEnvironment Variable:\n'
' NHENTAI nhentai mirror url') ' NHENTAI nhentai mirror url'
)
# operation options # operation options
parser.add_option('--download', '-D', dest='is_download', action='store_true', parser.add_argument('--download', '-D', dest='is_download', action='store_true',
help='download doujinshi (for search results)') help='download doujinshi (for search results)')
parser.add_option('--show', '-S', dest='is_show', action='store_true', parser.add_argument('--no-download', dest='no_download', action='store_true', default=False,
help='download doujinshi (for search results)')
parser.add_argument('--show', '-S', dest='is_show', action='store_true',
help='just show the doujinshi information') help='just show the doujinshi information')
# doujinshi options # doujinshi options
parser.add_option('--id', dest='id', action='callback', callback=callback, parser.add_argument('--id', dest='id', nargs='+', type=int,
help='doujinshi ids set, e.g. 167680 167681 167682') help='doujinshi ids set, e.g. 167680 167681 167682')
parser.add_option('--search', '-s', type='string', dest='keyword', action='store', parser.add_argument('--search', '-s', type=str, dest='keyword',
help='search doujinshi by keyword') help='search doujinshi by keyword')
parser.add_option('--favorites', '-F', action='store_true', dest='favorites', parser.add_argument('--favorites', '-F', action='store_true', dest='favorites',
help='list or download your favorites') help='list or download your favorites')
parser.add_option('--artist', '-a', action='store', dest='artist', parser.add_argument('--artist', '-a', type=str, dest='artist',
help='list doujinshi by artist name') help='list doujinshi by artist name')
# page options # page options
parser.add_option('--page-all', dest='page_all', action='store_true', default=False, parser.add_argument('--page-all', dest='page_all', action='store_true', default=False,
help='all search results') help='all search results')
parser.add_option('--page', '--page-range', type='string', dest='page', action='store', parser.add_argument('--page', '--page-range', type=str, dest='page',
help='page number of search results. e.g. 1,2-5,14') help='page number of search results. e.g. 1,2-5,14')
parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular', parser.add_argument('--sorting', '--sort', dest='sorting', type=str, default='popular',
help='sorting of doujinshi (recent / popular / popular-[today|week])', help='sorting of doujinshi (recent / popular / popular-[today|week])',
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date']) choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
# download options # download options
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', parser.add_argument('--output', '-o', type=str, dest='output_dir', default='.',
default=f'.{PATH_SEPARATOR}',
help='output dir') help='output dir')
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, parser.add_argument('--threads', '-t', type=int, dest='threads', default=5,
help='thread count for downloading doujinshi') help='thread count for downloading doujinshi')
parser.add_option('--timeout', '-T', type='int', dest='timeout', action='store', default=30, parser.add_argument('--timeout', '-T', type=int, dest='timeout', default=30,
help='timeout for downloading doujinshi') help='timeout for downloading doujinshi')
parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0, parser.add_argument('--delay', '-d', type=int, dest='delay', default=0,
help='slow down between downloading every doujinshi') help='slow down between downloading every doujinshi')
parser.add_option('--retry', type='int', dest='retry', action='store', default=3, parser.add_argument('--retry', type=int, dest='retry', default=3,
help='retry times when downloading failed') help='retry times when downloading failed')
parser.add_option('--exit-on-fail', dest='exit_on_fail', action='store_true', default=False, parser.add_argument('--exit-on-fail', dest='exit_on_fail', action='store_true', default=False,
help='exit on fail to prevent generating incomplete files') help='exit on fail to prevent generating incomplete files')
parser.add_option('--proxy', type='string', dest='proxy', action='store', parser.add_argument('--proxy', type=str, dest='proxy',
help='store a proxy, for example: -p "http://127.0.0.1:1080"') help='store a proxy, for example: -p "http://127.0.0.1:1080"')
parser.add_option('--file', '-f', type='string', dest='file', action='store', parser.add_argument('--file', '-f', type=str, dest='file',
help='read gallery IDs from file.') help='read gallery IDs from file.')
parser.add_option('--format', type='string', dest='name_format', action='store', parser.add_argument('--format', type=str, dest='name_format', default='[%i][%a][%t]',
help='format the saved folder name', default='[%i][%a][%t]') help='format the saved folder name')
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
parser.add_argument('--no-filename-padding', action='store_true', dest='no_filename_padding',
default=False, help='no padding in the images filename, such as \'001.jpg\'')
# generate options # generate options
parser.add_option('--html', dest='html_viewer', action='store_true', parser.add_argument('--html', dest='html_viewer', type=str, nargs='?', const='.',
help='generate a html viewer at current directory') help='generate an HTML viewer in the specified directory, or scan all subfolders '
parser.add_option('--no-html', dest='is_nohtml', action='store_true', 'within the entire directory to generate the HTML viewer. By default, current '
'working directory is used.')
parser.add_argument('--no-html', dest='is_nohtml', action='store_true',
help='don\'t generate HTML after downloading') help='don\'t generate HTML after downloading')
parser.add_option('--gen-main', dest='main_viewer', action='store_true', parser.add_argument('--gen-main', dest='main_viewer', action='store_true',
help='generate a main viewer contain all the doujin in the folder') help='generate a main viewer contain all the doujin in the folder')
parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true', parser.add_argument('--cbz', '-C', dest='is_cbz', action='store_true',
help='generate Comic Book CBZ File') help='generate Comic Book CBZ File')
parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true', parser.add_argument('--pdf', '-P', dest='is_pdf', action='store_true',
help='generate PDF file') help='generate PDF file')
parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
help='remove downloaded doujinshi dir when generated CBZ or PDF file') parser.add_argument('--meta', dest='generate_metadata', action='store_true', default=False,
parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
parser.add_option('--meta', dest='generate_metadata', action='store_true',
help='generate a metadata file in doujinshi format') help='generate a metadata file in doujinshi format')
parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False, parser.add_argument('--update-meta', dest='update_metadata', action='store_true', default=False,
help='update the metadata file of a doujinshi, update CBZ metadata if exists')
parser.add_argument('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
help='remove downloaded doujinshi dir when generated CBZ or PDF file')
parser.add_argument('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
parser.add_argument('--regenerate', dest='regenerate', action='store_true', default=False,
help='regenerate the cbz or pdf file if exists') help='regenerate the cbz or pdf file if exists')
parser.add_argument('--zip', action='store_true', help='Package into a single zip file')
# nhentai options # nhentai options
parser.add_option('--cookie', type='str', dest='cookie', action='store', parser.add_argument('--cookie', type=str, dest='cookie',
help='set cookie of nhentai to bypass Cloudflare captcha') help='set cookie of nhentai to bypass Cloudflare captcha')
parser.add_option('--useragent', '--user-agent', type='str', dest='useragent', action='store', parser.add_argument('--useragent', '--user-agent', type=str, dest='useragent',
help='set useragent to bypass Cloudflare captcha') help='set useragent to bypass Cloudflare captcha')
parser.add_option('--language', type='str', dest='language', action='store', parser.add_argument('--language', type=str, dest='language',
help='set default language to parse doujinshis') help='set default language to parse doujinshis')
parser.add_option('--clean-language', dest='clean_language', action='store_true', default=False, parser.add_argument('--clean-language', dest='clean_language', action='store_true', default=False,
help='set DEFAULT as language to parse doujinshis') help='set DEFAULT as language to parse doujinshis')
parser.add_option('--save-download-history', dest='is_save_download_history', action='store_true', parser.add_argument('--save-download-history', dest='is_save_download_history', action='store_true',
default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them') default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them')
parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history', parser.add_argument('--clean-download-history', action='store_true', default=False, dest='clean_download_history',
help='clean download history') help='clean download history')
parser.add_option('--template', dest='viewer_template', action='store', parser.add_argument('--template', dest='viewer_template', type=str, default='',
help='set viewer template', default='') help='set viewer template')
parser.add_option('--legacy', dest='legacy', action='store_true', default=False, parser.add_argument('--legacy', dest='legacy', action='store_true', default=False,
help='use legacy searching method') help='use legacy searching method')
args, _ = parser.parse_args(sys.argv[1:]) args = parser.parse_args()
if args.html_viewer: if args.html_viewer:
generate_html(template=constant.CONFIG['template']) if not os.path.exists(args.html_viewer):
logger.error(f'Path \'{args.html_viewer}\' not exists')
sys.exit(1)
for root, dirs, files in os.walk(args.html_viewer):
if not dirs:
generate_html(output_dir=args.html_viewer, template=constant.CONFIG['template'])
sys.exit(0)
for dir_name in dirs:
# it will scan the entire subdirectories
doujinshi_dir = os.path.join(root, dir_name)
items = set(map(lambda s: os.path.splitext(s)[1], os.listdir(doujinshi_dir)))
# skip directory without any images
if items & set(EXTENSIONS):
generate_html(output_dir=doujinshi_dir, template=constant.CONFIG['template'])
sys.exit(0)
sys.exit(0) sys.exit(0)
if args.main_viewer and not args.id and not args.keyword and not args.favorites: if args.main_viewer and not args.id and not args.keyword and not args.favorites:
@ -166,22 +199,24 @@ def cmd_parser():
# --- set config --- # --- set config ---
if args.cookie is not None: if args.cookie is not None:
constant.CONFIG['cookie'] = args.cookie constant.CONFIG['cookie'] = args.cookie.strip()
write_config() write_config()
logger.info('Cookie saved.') logger.info('Cookie saved.')
sys.exit(0)
elif args.useragent is not None: if args.useragent is not None:
constant.CONFIG['useragent'] = args.useragent constant.CONFIG['useragent'] = args.useragent.strip()
write_config() write_config()
logger.info('User-Agent saved.') logger.info('User-Agent saved.')
sys.exit(0)
elif args.language is not None: if args.language is not None:
constant.CONFIG['language'] = args.language constant.CONFIG['language'] = args.language
write_config() write_config()
logger.info(f'Default language now set to "{args.language}"') logger.info(f'Default language now set to "{args.language}"')
sys.exit(0)
# TODO: search without language # TODO: search without language
if any([args.cookie, args.useragent, args.language]):
sys.exit(0)
if args.proxy is not None: if args.proxy is not None:
proxy_url = urlparse(args.proxy) proxy_url = urlparse(args.proxy)
if not args.proxy == '' and proxy_url.scheme not in ('http', 'https', 'socks5', 'socks5h', if not args.proxy == '' and proxy_url.scheme not in ('http', 'https', 'socks5', 'socks5h',
@ -234,8 +269,4 @@ def cmd_parser():
logger.critical('Maximum number of used threads is 15') logger.critical('Maximum number of used threads is 15')
sys.exit(1) sys.exit(1)
if args.dryrun and (args.is_cbz or args.is_pdf):
logger.critical('Cannot generate PDF or CBZ during dry-run')
sys.exit(1)
return args return args

View File

@ -7,13 +7,13 @@ import platform
import urllib3.exceptions import urllib3.exceptions
from nhentai import constant from nhentai import constant
from nhentai.cmdline import cmd_parser, banner from nhentai.cmdline import cmd_parser, banner, write_config
from nhentai.parser import doujinshi_parser, search_parser, legacy_search_parser, print_doujinshi, favorites_parser from nhentai.parser import doujinshi_parser, search_parser, legacy_search_parser, print_doujinshi, favorites_parser
from nhentai.doujinshi import Doujinshi from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader from nhentai.downloader import Downloader, CompressedDownloader
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import BASE_URL from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \ from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \
paging, check_cookie, signal_handler, DB, move_to_folder paging, check_cookie, signal_handler, DB, move_to_folder
@ -29,6 +29,11 @@ def main():
# CONFIG['proxy'] will be changed after cmd_parser() # CONFIG['proxy'] will be changed after cmd_parser()
if constant.CONFIG['proxy']: if constant.CONFIG['proxy']:
if isinstance(constant.CONFIG['proxy'], dict):
constant.CONFIG['proxy'] = constant.CONFIG['proxy'].get('http', '')
logger.warning(f'Update proxy config to: {constant.CONFIG["proxy"]}')
write_config()
logger.info(f'Using proxy: {constant.CONFIG["proxy"]}') logger.info(f'Using proxy: {constant.CONFIG["proxy"]}')
if not constant.CONFIG['template']: if not constant.CONFIG['template']:
@ -44,6 +49,9 @@ def main():
page_list = paging(options.page) page_list = paging(options.page)
if options.retry:
constant.RETRY_TIMES = int(options.retry)
if options.favorites: if options.favorites:
if not options.is_download: if not options.is_download:
logger.warning('You do not specify --download option') logger.warning('You do not specify --download option')
@ -72,14 +80,19 @@ def main():
if options.is_save_download_history: if options.is_save_download_history:
with DB() as db: with DB() as db:
data = map(int, db.get_all()) data = set(map(int, db.get_all()))
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data)) doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
logger.info(f'New doujinshis account: {len(doujinshi_ids)}')
if options.zip:
options.is_nohtml = True
if not options.is_show: if not options.is_show:
downloader = Downloader(path=options.output_dir, threads=options.threads, downloader = (CompressedDownloader if options.zip else Downloader)(path=options.output_dir, threads=options.threads,
timeout=options.timeout, delay=options.delay, timeout=options.timeout, delay=options.delay,
retry=options.retry, exit_on_fail=options.exit_on_fail) exit_on_fail=options.exit_on_fail,
no_filename_padding=options.no_filename_padding)
for doujinshi_id in doujinshi_ids: for doujinshi_id in doujinshi_ids:
doujinshi_info = doujinshi_parser(doujinshi_id) doujinshi_info = doujinshi_parser(doujinshi_id)
@ -88,17 +101,15 @@ def main():
else: else:
continue continue
if not options.dryrun:
doujinshi.downloader = downloader doujinshi.downloader = downloader
if doujinshi.check_if_need_download(options): if doujinshi.check_if_need_download(options):
doujinshi.download() doujinshi.download()
else: else:
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}') logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
continue
if options.generate_metadata: if options.generate_metadata:
generate_metadata_file(options.output_dir, doujinshi) generate_metadata(options.output_dir, doujinshi)
if options.is_save_download_history: if options.is_save_download_history:
with DB() as db: with DB() as db:

View File

@ -37,6 +37,8 @@ FAV_URL = f'{BASE_URL}/favorites/'
PATH_SEPARATOR = os.path.sep PATH_SEPARATOR = os.path.sep
RETRY_TIMES = 3
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries' IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
IMAGE_URL_MIRRORS = [ IMAGE_URL_MIRRORS = [

View File

@ -77,6 +77,9 @@ class Doujinshi(object):
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}') logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
def check_if_need_download(self, options): def check_if_need_download(self, options):
if options.no_download:
return False
base_path = os.path.join(self.downloader.path, self.filename) base_path = os.path.join(self.downloader.path, self.filename)
# regenerate, re-download # regenerate, re-download

View File

@ -4,6 +4,8 @@ import os
import asyncio import asyncio
import httpx import httpx
import urllib3.exceptions import urllib3.exceptions
import zipfile
import io
from urllib.parse import urlparse from urllib.parse import urlparse
from nhentai import constant from nhentai import constant
@ -13,11 +15,6 @@ from nhentai.utils import Singleton, async_request
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class NHentaiImageNotExistException(Exception):
pass
def download_callback(result): def download_callback(result):
result, data = result result, data = result
if result == 0: if result == 0:
@ -34,15 +31,16 @@ def download_callback(result):
class Downloader(Singleton): class Downloader(Singleton):
def __init__(self, path='', threads=5, timeout=30, delay=0, retry=3, exit_on_fail=False): def __init__(self, path='', threads=5, timeout=30, delay=0, exit_on_fail=False,
no_filename_padding=False):
self.threads = threads self.threads = threads
self.path = str(path) self.path = str(path)
self.timeout = timeout self.timeout = timeout
self.delay = delay self.delay = delay
self.retry = retry
self.exit_on_fail = exit_on_fail self.exit_on_fail = exit_on_fail
self.folder = None self.folder = None
self.semaphore = None self.semaphore = None
self.no_filename_padding = no_filename_padding
async def fiber(self, tasks): async def fiber(self, tasks):
self.semaphore = asyncio.Semaphore(self.threads) self.semaphore = asyncio.Semaphore(self.threads)
@ -70,15 +68,13 @@ class Downloader(Singleton):
filename = filename if filename else os.path.basename(urlparse(url).path) filename = filename if filename else os.path.basename(urlparse(url).path)
base_filename, extension = os.path.splitext(filename) base_filename, extension = os.path.splitext(filename)
filename = base_filename.zfill(length) + extension
save_file_path = os.path.join(self.folder, filename) if not self.no_filename_padding:
filename = base_filename.zfill(length) + extension
else:
filename = base_filename + extension
try: try:
if os.path.exists(save_file_path):
logger.warning(f'Skipped download: {save_file_path} already exists')
return 1, url
response = await async_request('GET', url, timeout=self.timeout, proxy=proxy) response = await async_request('GET', url, timeout=self.timeout, proxy=proxy)
if response.status_code != 200: if response.status_code != 200:
@ -95,7 +91,7 @@ class Downloader(Singleton):
return -1, url return -1, url
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e: except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
if retried < self.retry: if retried < constant.RETRY_TIMES:
logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...') logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
return await self.download( return await self.download(
url=url, url=url,
@ -105,13 +101,9 @@ class Downloader(Singleton):
proxy=proxy, proxy=proxy,
) )
else: else:
logger.warning(f'Download {filename} failed with {self.retry} times retried, skipped') logger.warning(f'Download {filename} failed with {constant.RETRY_TIMES} times retried, skipped')
return -2, url return -2, url
except NHentaiImageNotExistException as e:
os.remove(save_file_path)
return -3, url
except Exception as e: except Exception as e:
import traceback import traceback
@ -125,11 +117,11 @@ class Downloader(Singleton):
return 1, url return 1, url
async def save(self, save_file_path, response) -> bool: async def save(self, filename, response) -> bool:
if response is None: if response is None:
logger.error('Error: Response is None') logger.error('Error: Response is None')
return False return False
save_file_path = os.path.join(self.folder, save_file_path) save_file_path = os.path.join(self.folder, filename)
with open(save_file_path, 'wb') as f: with open(save_file_path, 'wb') as f:
if response is not None: if response is not None:
length = response.headers.get('content-length') length = response.headers.get('content-length')
@ -140,6 +132,15 @@ class Downloader(Singleton):
f.write(chunk) f.write(chunk)
return True return True
def create_storage_object(self, folder:str):
if not os.path.exists(folder):
try:
os.makedirs(folder)
except EnvironmentError as e:
logger.critical(str(e))
self.folder:str = folder
self.close = lambda: None # Only available in class CompressedDownloader
def start_download(self, queue, folder='') -> bool: def start_download(self, queue, folder='') -> bool:
if not isinstance(folder, (str,)): if not isinstance(folder, (str,)):
folder = str(folder) folder = str(folder)
@ -148,12 +149,7 @@ class Downloader(Singleton):
folder = os.path.join(self.path, folder) folder = os.path.join(self.path, folder)
logger.info(f'Doujinshi will be saved at "{folder}"') logger.info(f'Doujinshi will be saved at "{folder}"')
if not os.path.exists(folder): self.create_storage_object(folder)
try:
os.makedirs(folder)
except EnvironmentError as e:
logger.critical(str(e))
self.folder = folder
if os.getenv('DEBUG', None) == 'NODOWNLOAD': if os.getenv('DEBUG', None) == 'NODOWNLOAD':
# Assuming we want to continue with rest of process. # Assuming we want to continue with rest of process.
@ -169,4 +165,31 @@ class Downloader(Singleton):
# Prevent coroutines infection # Prevent coroutines infection
asyncio.run(self.fiber(coroutines)) asyncio.run(self.fiber(coroutines))
self.close()
return True
class CompressedDownloader(Downloader):
def create_storage_object(self, folder):
filename = f'{folder}.zip'
print(filename)
self.zipfile = zipfile.ZipFile(filename,'w')
self.close = lambda: self.zipfile.close()
async def save(self, filename, response) -> bool:
if response is None:
logger.error('Error: Response is None')
return False
image_data = io.BytesIO()
length = response.headers.get('content-length')
if length is None:
content = await response.read()
image_data.write(content)
else:
async for chunk in response.aiter_bytes(2048):
image_data.write(chunk)
image_data.seek(0)
self.zipfile.writestr(filename, image_data.read())
return True return True

View File

@ -92,13 +92,27 @@ def favorites_parser(page=None):
page_range_list = range(1, pages + 1) page_range_list = range(1, pages + 1)
for page in page_range_list: for page in page_range_list:
try:
logger.info(f'Getting doujinshi ids of page {page}') logger.info(f'Getting doujinshi ids of page {page}')
resp = request('get', f'{constant.FAV_URL}?page={page}').content
result.extend(_get_title_and_id(resp)) i = 0
while i <= constant.RETRY_TIMES + 1:
i += 1
if i > 3:
logger.error(f'Failed to get favorites at page {page} after 3 times retried, skipped')
break
try:
resp = request('get', f'{constant.FAV_URL}?page={page}').content
temp_result = _get_title_and_id(resp)
if not temp_result:
logger.warning(f'Failed to get favorites at page {page}, retrying ({i} times) ...')
continue
else:
result.extend(temp_result)
break
except Exception as e: except Exception as e:
logger.error(f'Error: {e}, continue') logger.warning(f'Error: {e}, retrying ({i} times) ...')
return result return result
@ -141,17 +155,19 @@ def doujinshi_parser(id_, counter=0):
title = doujinshi_info.find('h1').text title = doujinshi_info.find('h1').text
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
subtitle = doujinshi_info.find('h2') subtitle = doujinshi_info.find('h2')
favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count') favorite_counts = doujinshi_info.find('span', class_='nobold').text.strip('(').strip(')')
doujinshi['name'] = title doujinshi['name'] = title
doujinshi['pretty_name'] = pretty_name doujinshi['pretty_name'] = pretty_name
doujinshi['subtitle'] = subtitle.text if subtitle else '' doujinshi['subtitle'] = subtitle.text if subtitle else ''
doujinshi['favorite_counts'] = int(favorite_counts.text.strip()) if favorite_counts else 0 doujinshi['favorite_counts'] = int(favorite_counts) if favorite_counts and favorite_counts.isdigit() else 0
doujinshi_cover = html.find('div', attrs={'id': 'cover'}) doujinshi_cover = html.find('div', attrs={'id': 'cover'})
# img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$', # img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
# doujinshi_cover.a.img.attrs['data-src']) # doujinshi_cover.a.img.attrs['data-src'])
img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
# fix cover.webp.webp
img_id = re.search(r'/galleries/(\d+)/cover(\.webp|\.jpg|\.png)?\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
ext = [] ext = []
for i in html.find_all('div', attrs={'class': 'thumb-container'}): for i in html.find_all('div', attrs={'class': 'thumb-container'}):
@ -261,7 +277,7 @@ def search_parser(keyword, sorting, page, is_page_all=False):
i = 0 i = 0
logger.info(f'Searching doujinshis using keywords "{keyword}" on page {p}{total}') logger.info(f'Searching doujinshis using keywords "{keyword}" on page {p}{total}')
while i < 3: while i < constant.RETRY_TIMES:
try: try:
url = request('get', url=constant.SEARCH_URL, params={'query': keyword, url = request('get', url=constant.SEARCH_URL, params={'query': keyword,
'page': p, 'sort': sorting}).url 'page': p, 'sort': sorting}).url

View File

@ -2,12 +2,12 @@
import json import json
import os import os
from nhentai.constant import PATH_SEPARATOR from nhentai.constant import PATH_SEPARATOR, LANGUAGE_ISO
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from nhentai.constant import LANGUAGE_ISO from requests.structures import CaseInsensitiveDict
def serialize_json(doujinshi, output_dir): def serialize_json(doujinshi, output_dir: str):
metadata = {'title': doujinshi.name, metadata = {'title': doujinshi.name,
'subtitle': doujinshi.info.subtitle} 'subtitle': doujinshi.info.subtitle}
if doujinshi.info.favorite_counts: if doujinshi.info.favorite_counts:
@ -78,6 +78,26 @@ def serialize_comic_xml(doujinshi, output_dir):
f.write('</ComicInfo>') f.write('</ComicInfo>')
def serialize_info_txt(doujinshi, output_dir: str):
info_txt_path = os.path.join(output_dir, 'info.txt')
f = open(info_txt_path, 'w', encoding='utf-8')
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
temp_dict = CaseInsensitiveDict(dict(doujinshi.table))
for i in fields:
v = temp_dict.get(i)
v = temp_dict.get(f'{i}s') if v is None else v
v = doujinshi.info.get(i.lower(), None) if v is None else v
v = doujinshi.info.get(f'{i.lower()}s', "Unknown") if v is None else v
f.write(f'{i}: {v}\n')
f.close()
def xml_write_simple_tag(f, name, val, indent=1): def xml_write_simple_tag(f, name, val, indent=1):
f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n') f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n')
@ -132,3 +152,4 @@ def set_js_database():
indexed_json = json.dumps(indexed_json, separators=(',', ':')) indexed_json = json.dumps(indexed_json, separators=(',', ':'))
f.write('var data = ' + indexed_json) f.write('var data = ' + indexed_json)
f.write(';\nvar tags = ' + unique_json) f.write(';\nvar tags = ' + unique_json)

View File

@ -1,5 +1,5 @@
# coding: utf-8 # coding: utf-8
import json
import sys import sys
import re import re
import os import os
@ -11,24 +11,33 @@ import requests
import sqlite3 import sqlite3
import urllib.parse import urllib.parse
from typing import Tuple from typing import Tuple
from requests.structures import CaseInsensitiveDict
from nhentai import constant from nhentai import constant
from nhentai.constant import PATH_SEPARATOR from nhentai.constant import PATH_SEPARATOR
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database from nhentai.serializer import serialize_comic_xml, serialize_json, serialize_info_txt, set_js_database
MAX_FIELD_LENGTH = 100 MAX_FIELD_LENGTH = 100
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp') EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
def get_headers():
headers = {
'Referer': constant.LOGIN_URL
}
user_agent = constant.CONFIG.get('useragent')
if user_agent and user_agent.strip():
headers['User-Agent'] = user_agent
cookie = constant.CONFIG.get('cookie')
if cookie and cookie.strip():
headers['Cookie'] = cookie
return headers
def request(method, url, **kwargs): def request(method, url, **kwargs):
session = requests.Session() session = requests.Session()
session.headers.update({ session.headers.update(get_headers())
'Referer': constant.LOGIN_URL,
'User-Agent': constant.CONFIG['useragent'],
'Cookie': constant.CONFIG['cookie']
})
if not kwargs.get('proxies', None): if not kwargs.get('proxies', None):
kwargs['proxies'] = { kwargs['proxies'] = {
@ -40,11 +49,7 @@ def request(method, url, **kwargs):
async def async_request(method, url, proxy = None, **kwargs): async def async_request(method, url, proxy = None, **kwargs):
headers = { headers=get_headers()
'Referer': constant.LOGIN_URL,
'User-Agent': constant.CONFIG['useragent'],
'Cookie': constant.CONFIG['cookie'],
}
if proxy is None: if proxy is None:
proxy = constant.CONFIG['proxy'] proxy = constant.CONFIG['proxy']
@ -105,21 +110,24 @@ def parse_doujinshi_obj(
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
_filename = f'{doujinshi_obj.filename}.{file_type}' _filename = f'{doujinshi_obj.filename}.{file_type}'
if file_type == 'cbz':
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
if file_type == 'pdf': if file_type == 'pdf':
_filename = _filename.replace('/', '-') _filename = _filename.replace('/', '-')
filename = os.path.join(output_dir, _filename) filename = os.path.join(output_dir, _filename)
else: else:
if file_type == 'html':
return output_dir, 'index.html'
doujinshi_dir = f'.{PATH_SEPARATOR}' doujinshi_dir = f'.{PATH_SEPARATOR}'
if not os.path.exists(doujinshi_dir):
os.makedirs(doujinshi_dir)
return doujinshi_dir, filename return doujinshi_dir, filename
def generate_html(output_dir='.', doujinshi_obj=None, template='default'): def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html') doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, 'html')
image_html = '' image_html = ''
if not os.path.exists(doujinshi_dir): if not os.path.exists(doujinshi_dir):
@ -142,10 +150,16 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
js = readfile(f'viewer/{template}/scripts.js') js = readfile(f'viewer/{template}/scripts.js')
if doujinshi_obj is not None: if doujinshi_obj is not None:
serialize_json(doujinshi_obj, doujinshi_dir) # serialize_json(doujinshi_obj, doujinshi_dir)
name = doujinshi_obj.name name = doujinshi_obj.name
else: else:
name = {'title': 'nHentai HTML Viewer'} metadata_path = os.path.join(doujinshi_dir, "metadata.json")
if os.path.exists(metadata_path):
with open(metadata_path, 'r') as file:
doujinshi_info = json.loads(file.read())
name = doujinshi_info.get("title")
else:
name = 'nHentai HTML Viewer'
data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css) data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css)
try: try:
@ -235,15 +249,7 @@ def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
logger.warning(f'Writing Main Viewer failed ({e})') logger.warning(f'Writing Main Viewer failed ({e})')
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False): def generate_cbz(doujinshi_dir, filename):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
logger.info(f'Skipped {file_type} file generation: {doujinshi_dir}.{file_type} already exists')
return
if file_type == 'cbz':
file_list = os.listdir(doujinshi_dir) file_list = os.listdir(doujinshi_dir)
file_list.sort() file_list.sort()
@ -254,6 +260,19 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
cbz_pf.write(image_path, image) cbz_pf.write(image_path, image)
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"') logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
logger.info(f'Skipped {file_type} file generation: {doujinshi_dir}.{file_type} already exists')
return
if file_type == 'cbz':
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
generate_cbz(doujinshi_dir, filename)
elif file_type == 'pdf': elif file_type == 'pdf':
try: try:
import img2pdf import img2pdf
@ -273,6 +292,16 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
except ImportError: except ImportError:
logger.error("Please install img2pdf package by using pip.") logger.error("Please install img2pdf package by using pip.")
else:
raise ValueError('invalid file type')
def generate_metadata(output_dir, doujinshi_obj=None):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '')
serialize_json(doujinshi_obj, doujinshi_dir)
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
serialize_info_txt(doujinshi_obj, doujinshi_dir)
logger.log(16, f'Metadata files have been written to "{doujinshi_dir}"')
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False): def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
@ -329,29 +358,6 @@ def paging(page_string):
return page_list return page_list
def generate_metadata_file(output_dir, doujinshi_obj):
info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
f = open(info_txt_path, 'w', encoding='utf-8')
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
for i in fields:
v = temp_dict.get(i)
v = temp_dict.get(f'{i}s') if v is None else v
v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
f.write(f'{i}: {v}\n')
f.close()
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
class DB(object): class DB(object):
conn = None conn = None
cur = None cur = None

View File

@ -75,11 +75,13 @@ document.onkeydown = event =>{
changePage(currentPage - 1); changePage(currentPage - 1);
break; break;
case 38: //up case 38: //up
changePage(currentPage - 1);
break; break;
case 39: //right case 39: //right
changePage(currentPage + 1); changePage(currentPage + 1);
break; break;
case 40: //down case 40: //down
changePage(currentPage + 1);
break; break;
} }
}; };

35
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]] [[package]]
name = "anyio" name = "anyio"
@ -6,7 +6,6 @@ version = "4.5.2"
description = "High level compatibility layer for multiple asynchronous event loop implementations" description = "High level compatibility layer for multiple asynchronous event loop implementations"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"]
files = [ files = [
{file = "anyio-4.5.2-py3-none-any.whl", hash = "sha256:c011ee36bc1e8ba40e5a81cb9df91925c218fe9b778554e0b56a21e1b5d4716f"}, {file = "anyio-4.5.2-py3-none-any.whl", hash = "sha256:c011ee36bc1e8ba40e5a81cb9df91925c218fe9b778554e0b56a21e1b5d4716f"},
{file = "anyio-4.5.2.tar.gz", hash = "sha256:23009af4ed04ce05991845451e11ef02fc7c5ed29179ac9a420e5ad0ac7ddc5b"}, {file = "anyio-4.5.2.tar.gz", hash = "sha256:23009af4ed04ce05991845451e11ef02fc7c5ed29179ac9a420e5ad0ac7ddc5b"},
@ -29,7 +28,6 @@ version = "4.12.3"
description = "Screen-scraping library" description = "Screen-scraping library"
optional = false optional = false
python-versions = ">=3.6.0" python-versions = ">=3.6.0"
groups = ["main"]
files = [ files = [
{file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
{file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
@ -51,19 +49,28 @@ version = "2024.12.14"
description = "Python package for providing Mozilla's CA Bundle." description = "Python package for providing Mozilla's CA Bundle."
optional = false optional = false
python-versions = ">=3.6" python-versions = ">=3.6"
groups = ["main"]
files = [ files = [
{file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"}, {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"},
{file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"}, {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"},
] ]
[[package]]
name = "chardet"
version = "5.2.0"
description = "Universal encoding detector for Python 3"
optional = false
python-versions = ">=3.7"
files = [
{file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"},
{file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"},
]
[[package]] [[package]]
name = "charset-normalizer" name = "charset-normalizer"
version = "3.4.1" version = "3.4.1"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
groups = ["main"]
files = [ files = [
{file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"},
{file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"},
@ -165,8 +172,6 @@ version = "1.2.2"
description = "Backport of PEP 654 (exception groups)" description = "Backport of PEP 654 (exception groups)"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
groups = ["main"]
markers = "python_version < \"3.11\""
files = [ files = [
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
@ -181,7 +186,6 @@ version = "0.14.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
groups = ["main"]
files = [ files = [
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
@ -193,7 +197,6 @@ version = "1.0.7"
description = "A minimal low-level HTTP client." description = "A minimal low-level HTTP client."
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"]
files = [ files = [
{file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"}, {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
{file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"}, {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
@ -215,7 +218,6 @@ version = "0.28.1"
description = "The next generation HTTP client." description = "The next generation HTTP client."
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"]
files = [ files = [
{file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"},
{file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"},
@ -240,7 +242,6 @@ version = "3.10"
description = "Internationalized Domain Names in Applications (IDNA)" description = "Internationalized Domain Names in Applications (IDNA)"
optional = false optional = false
python-versions = ">=3.6" python-versions = ">=3.6"
groups = ["main"]
files = [ files = [
{file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
{file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
@ -255,7 +256,6 @@ version = "1.1.0"
description = "Simple module to parse ISO 8601 dates" description = "Simple module to parse ISO 8601 dates"
optional = false optional = false
python-versions = ">=3.6.2,<4.0" python-versions = ">=3.6.2,<4.0"
groups = ["main"]
files = [ files = [
{file = "iso8601-1.1.0-py3-none-any.whl", hash = "sha256:8400e90141bf792bce2634df533dc57e3bee19ea120a87bebcd3da89a58ad73f"}, {file = "iso8601-1.1.0-py3-none-any.whl", hash = "sha256:8400e90141bf792bce2634df533dc57e3bee19ea120a87bebcd3da89a58ad73f"},
{file = "iso8601-1.1.0.tar.gz", hash = "sha256:32811e7b81deee2063ea6d2e94f8819a86d1f3811e49d23623a41fa832bef03f"}, {file = "iso8601-1.1.0.tar.gz", hash = "sha256:32811e7b81deee2063ea6d2e94f8819a86d1f3811e49d23623a41fa832bef03f"},
@ -267,7 +267,6 @@ version = "2.32.3"
description = "Python HTTP for Humans." description = "Python HTTP for Humans."
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"]
files = [ files = [
{file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
{file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
@ -289,7 +288,6 @@ version = "1.3.1"
description = "Sniff out which async library your code is running under" description = "Sniff out which async library your code is running under"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
groups = ["main"]
files = [ files = [
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@ -301,7 +299,6 @@ version = "2.6"
description = "A modern CSS selector implementation for Beautiful Soup." description = "A modern CSS selector implementation for Beautiful Soup."
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"]
files = [ files = [
{file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
{file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
@ -313,7 +310,6 @@ version = "0.9.0"
description = "Pretty-print tabular data" description = "Pretty-print tabular data"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
groups = ["main"]
files = [ files = [
{file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"},
{file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"},
@ -328,8 +324,6 @@ version = "4.12.2"
description = "Backported and Experimental Type Hints for Python 3.8+" description = "Backported and Experimental Type Hints for Python 3.8+"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"]
markers = "python_version < \"3.11\""
files = [ files = [
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
@ -341,7 +335,6 @@ version = "1.26.20"
description = "HTTP library with thread-safe connection pooling, file post, and more." description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
groups = ["main"]
files = [ files = [
{file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"}, {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"},
{file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"}, {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"},
@ -353,6 +346,6 @@ secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "p
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[metadata] [metadata]
lock-version = "2.1" lock-version = "2.0"
python-versions = "^3.8" python-versions = "^3.8"
content-hash = "a5a2e54efa8c3a930e53127f07d657669e065a4b5ded293a09e8105bb8cc4bc2" content-hash = "b17c2cdd4b140f2ab8081bca7d94630e821fa2e882ac768b1bd8cf3ec58726ce"

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "nhentai" name = "nhentai"
version = "0.5.19" version = "0.6.0-beta"
description = "nhentai doujinshi downloader" description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"] authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT" license = "MIT"
@ -19,6 +19,7 @@ urllib3 = "^1.26.20"
httpx = "^0.28.1" httpx = "^0.28.1"
chardet = "^5.2.0"
[build-system] [build-system]
requires = ["poetry-core"] requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"

View File

@ -1,14 +1,27 @@
import unittest import unittest
import os import os
import zipfile
import urllib3.exceptions import urllib3.exceptions
from nhentai import constant from nhentai import constant
from nhentai.cmdline import load_config from nhentai.cmdline import load_config
from nhentai.downloader import Downloader from nhentai.downloader import Downloader, CompressedDownloader
from nhentai.parser import doujinshi_parser from nhentai.parser import doujinshi_parser
from nhentai.doujinshi import Doujinshi from nhentai.doujinshi import Doujinshi
from nhentai.utils import generate_html, generate_cbz from nhentai.utils import generate_html
did = 440546
def has_jepg_file(path):
with zipfile.ZipFile(path, 'r') as zf:
return '01.jpg' in zf.namelist()
def is_zip_file(path):
try:
with zipfile.ZipFile(path, 'r') as _:
return True
except (zipfile.BadZipFile, FileNotFoundError):
return False
class TestDownload(unittest.TestCase): class TestDownload(unittest.TestCase):
def setUp(self) -> None: def setUp(self) -> None:
@ -17,20 +30,27 @@ class TestDownload(unittest.TestCase):
constant.CONFIG['cookie'] = os.getenv('NHENTAI_COOKIE') constant.CONFIG['cookie'] = os.getenv('NHENTAI_COOKIE')
constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA') constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA')
self.info = Doujinshi(**doujinshi_parser(did), name_format='%i')
def test_download(self): def test_download(self):
did = 440546 info = self.info
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
info.downloader = Downloader(path='/tmp', threads=5) info.downloader = Downloader(path='/tmp', threads=5)
info.download() info.download()
self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg')) self.assertTrue(os.path.exists(f'/tmp/{did}/01.jpg'))
generate_html('/tmp', info) generate_html('/tmp', info)
self.assertTrue(os.path.exists(f'/tmp/{did}/index.html')) self.assertTrue(os.path.exists(f'/tmp/{did}/index.html'))
generate_cbz('/tmp', info) def test_zipfile_download(self):
self.assertTrue(os.path.exists(f'/tmp/{did}.cbz')) info = self.info
info.downloader = CompressedDownloader(path='/tmp', threads=5)
info.download()
zipfile_path = f'/tmp/{did}.zip'
self.assertTrue(os.path.exists(zipfile_path))
self.assertTrue(is_zip_file(zipfile_path))
self.assertTrue(has_jepg_file(zipfile_path))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()