Compare commits

..

No commits in common. "master" and "0.5.21" have entirely different histories.

13 changed files with 212 additions and 336 deletions

View File

@ -22,7 +22,7 @@ From Github:
git clone https://github.com/RicterZ/nhentai git clone https://github.com/RicterZ/nhentai
cd nhentai cd nhentai
pip install --no-cache-dir . python setup.py install
Build Docker container: Build Docker container:
@ -136,8 +136,6 @@ Format output doujinshi folder name:
.. code-block:: bash .. code-block:: bash
nhentai --id 261100 --format '[%i]%s' nhentai --id 261100 --format '[%i]%s'
# for Windows
nhentai --id 261100 --format "[%%i]%%s"
Supported doujinshi folder formatter: Supported doujinshi folder formatter:
@ -150,7 +148,6 @@ Supported doujinshi folder formatter:
- %p: Doujinshi pretty name - %p: Doujinshi pretty name
- %ag: Doujinshi authors name or groups name - %ag: Doujinshi authors name or groups name
Note: for Windows operation system, please use double "%", such as "%%i".
Other options: Other options:

View File

@ -1,3 +1,3 @@
__version__ = '0.6.0-beta' __version__ = '0.5.20'
__author__ = 'RicterZ' __author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com' __email__ = 'ricterzheng@gmail.com'

View File

@ -6,10 +6,10 @@ import json
import nhentai.constant as constant import nhentai.constant as constant
from urllib.parse import urlparse from urllib.parse import urlparse
from argparse import ArgumentParser from optparse import OptionParser
from nhentai import __version__ from nhentai import __version__
from nhentai.utils import generate_html, generate_main_html, DB, EXTENSIONS from nhentai.utils import generate_html, generate_main_html, DB
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import PATH_SEPARATOR from nhentai.constant import PATH_SEPARATOR
@ -57,133 +57,103 @@ def callback(option, _opt_str, _value, parser):
def cmd_parser(): def cmd_parser():
load_config() load_config()
parser = ArgumentParser( parser = OptionParser('\n nhentai --search [keyword] --download'
description='\n nhentai --search [keyword] --download'
'\n NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]' '\n NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]'
'\n nhentai --file [filename]' '\n nhentai --file [filename]'
'\n\nEnvironment Variable:\n' '\n\nEnvironment Variable:\n'
' NHENTAI nhentai mirror url' ' NHENTAI nhentai mirror url')
)
# operation options # operation options
parser.add_argument('--download', '-D', dest='is_download', action='store_true', parser.add_option('--download', '-D', dest='is_download', action='store_true',
help='download doujinshi (for search results)') help='download doujinshi (for search results)')
parser.add_argument('--no-download', dest='no_download', action='store_true', default=False, parser.add_option('--show', '-S', dest='is_show', action='store_true',
help='download doujinshi (for search results)')
parser.add_argument('--show', '-S', dest='is_show', action='store_true',
help='just show the doujinshi information') help='just show the doujinshi information')
# doujinshi options # doujinshi options
parser.add_argument('--id', dest='id', nargs='+', type=int, parser.add_option('--id', dest='id', action='callback', callback=callback,
help='doujinshi ids set, e.g. 167680 167681 167682') help='doujinshi ids set, e.g. 167680 167681 167682')
parser.add_argument('--search', '-s', type=str, dest='keyword', parser.add_option('--search', '-s', type='string', dest='keyword', action='store',
help='search doujinshi by keyword') help='search doujinshi by keyword')
parser.add_argument('--favorites', '-F', action='store_true', dest='favorites', parser.add_option('--favorites', '-F', action='store_true', dest='favorites',
help='list or download your favorites') help='list or download your favorites')
parser.add_argument('--artist', '-a', type=str, dest='artist', parser.add_option('--artist', '-a', action='store', dest='artist',
help='list doujinshi by artist name') help='list doujinshi by artist name')
# page options # page options
parser.add_argument('--page-all', dest='page_all', action='store_true', default=False, parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
help='all search results') help='all search results')
parser.add_argument('--page', '--page-range', type=str, dest='page', parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
help='page number of search results. e.g. 1,2-5,14') help='page number of search results. e.g. 1,2-5,14')
parser.add_argument('--sorting', '--sort', dest='sorting', type=str, default='popular', parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
help='sorting of doujinshi (recent / popular / popular-[today|week])', help='sorting of doujinshi (recent / popular / popular-[today|week])',
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date']) choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
# download options # download options
parser.add_argument('--output', '-o', type=str, dest='output_dir', default='.', parser.add_option('--output', '-o', type='string', dest='output_dir', action='store',
default=f'.{PATH_SEPARATOR}',
help='output dir') help='output dir')
parser.add_argument('--threads', '-t', type=int, dest='threads', default=5, parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
help='thread count for downloading doujinshi') help='thread count for downloading doujinshi')
parser.add_argument('--timeout', '-T', type=int, dest='timeout', default=30, parser.add_option('--timeout', '-T', type='int', dest='timeout', action='store', default=30,
help='timeout for downloading doujinshi') help='timeout for downloading doujinshi')
parser.add_argument('--delay', '-d', type=int, dest='delay', default=0, parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0,
help='slow down between downloading every doujinshi') help='slow down between downloading every doujinshi')
parser.add_argument('--retry', type=int, dest='retry', default=3, parser.add_option('--retry', type='int', dest='retry', action='store', default=3,
help='retry times when downloading failed') help='retry times when downloading failed')
parser.add_argument('--exit-on-fail', dest='exit_on_fail', action='store_true', default=False, parser.add_option('--exit-on-fail', dest='exit_on_fail', action='store_true', default=False,
help='exit on fail to prevent generating incomplete files') help='exit on fail to prevent generating incomplete files')
parser.add_argument('--proxy', type=str, dest='proxy', parser.add_option('--proxy', type='string', dest='proxy', action='store',
help='store a proxy, for example: -p "http://127.0.0.1:1080"') help='store a proxy, for example: -p "http://127.0.0.1:1080"')
parser.add_argument('--file', '-f', type=str, dest='file', parser.add_option('--file', '-f', type='string', dest='file', action='store',
help='read gallery IDs from file.') help='read gallery IDs from file.')
parser.add_argument('--format', type=str, dest='name_format', default='[%i][%a][%t]', parser.add_option('--format', type='string', dest='name_format', action='store',
help='format the saved folder name') help='format the saved folder name', default='[%i][%a][%t]')
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
parser.add_argument('--no-filename-padding', action='store_true', dest='no_filename_padding', parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding',
default=False, help='no padding in the images filename, such as \'001.jpg\'') default=False, help='no padding in the images filename, such as \'001.jpg\'')
# generate options # generate options
parser.add_argument('--html', dest='html_viewer', type=str, nargs='?', const='.', parser.add_option('--html', dest='html_viewer', action='store_true',
help='generate an HTML viewer in the specified directory, or scan all subfolders ' help='generate a html viewer at current directory')
'within the entire directory to generate the HTML viewer. By default, current ' parser.add_option('--no-html', dest='is_nohtml', action='store_true',
'working directory is used.')
parser.add_argument('--no-html', dest='is_nohtml', action='store_true',
help='don\'t generate HTML after downloading') help='don\'t generate HTML after downloading')
parser.add_argument('--gen-main', dest='main_viewer', action='store_true', parser.add_option('--gen-main', dest='main_viewer', action='store_true',
help='generate a main viewer contain all the doujin in the folder') help='generate a main viewer contain all the doujin in the folder')
parser.add_argument('--cbz', '-C', dest='is_cbz', action='store_true', parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true',
help='generate Comic Book CBZ File') help='generate Comic Book CBZ File')
parser.add_argument('--pdf', '-P', dest='is_pdf', action='store_true', parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true',
help='generate PDF file') help='generate PDF file')
parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
parser.add_argument('--meta', dest='generate_metadata', action='store_true', default=False,
help='generate a metadata file in doujinshi format')
parser.add_argument('--update-meta', dest='update_metadata', action='store_true', default=False,
help='update the metadata file of a doujinshi, update CBZ metadata if exists')
parser.add_argument('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
help='remove downloaded doujinshi dir when generated CBZ or PDF file') help='remove downloaded doujinshi dir when generated CBZ or PDF file')
parser.add_argument('--move-to-folder', dest='move_to_folder', action='store_true', default=False, parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file') help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
parser.add_option('--meta', dest='generate_metadata', action='store_true',
parser.add_argument('--regenerate', dest='regenerate', action='store_true', default=False, help='generate a metadata file in doujinshi format')
parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
help='regenerate the cbz or pdf file if exists') help='regenerate the cbz or pdf file if exists')
parser.add_argument('--zip', action='store_true', help='Package into a single zip file')
# nhentai options # nhentai options
parser.add_argument('--cookie', type=str, dest='cookie', parser.add_option('--cookie', type='str', dest='cookie', action='store',
help='set cookie of nhentai to bypass Cloudflare captcha') help='set cookie of nhentai to bypass Cloudflare captcha')
parser.add_argument('--useragent', '--user-agent', type=str, dest='useragent', parser.add_option('--useragent', '--user-agent', type='str', dest='useragent', action='store',
help='set useragent to bypass Cloudflare captcha') help='set useragent to bypass Cloudflare captcha')
parser.add_argument('--language', type=str, dest='language', parser.add_option('--language', type='str', dest='language', action='store',
help='set default language to parse doujinshis') help='set default language to parse doujinshis')
parser.add_argument('--clean-language', dest='clean_language', action='store_true', default=False, parser.add_option('--clean-language', dest='clean_language', action='store_true', default=False,
help='set DEFAULT as language to parse doujinshis') help='set DEFAULT as language to parse doujinshis')
parser.add_argument('--save-download-history', dest='is_save_download_history', action='store_true', parser.add_option('--save-download-history', dest='is_save_download_history', action='store_true',
default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them') default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them')
parser.add_argument('--clean-download-history', action='store_true', default=False, dest='clean_download_history', parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history',
help='clean download history') help='clean download history')
parser.add_argument('--template', dest='viewer_template', type=str, default='', parser.add_option('--template', dest='viewer_template', action='store',
help='set viewer template') help='set viewer template', default='')
parser.add_argument('--legacy', dest='legacy', action='store_true', default=False, parser.add_option('--legacy', dest='legacy', action='store_true', default=False,
help='use legacy searching method') help='use legacy searching method')
args = parser.parse_args() args, _ = parser.parse_args(sys.argv[1:])
if args.html_viewer: if args.html_viewer:
if not os.path.exists(args.html_viewer): generate_html(template=constant.CONFIG['template'])
logger.error(f'Path \'{args.html_viewer}\' not exists')
sys.exit(1)
for root, dirs, files in os.walk(args.html_viewer):
if not dirs:
generate_html(output_dir=args.html_viewer, template=constant.CONFIG['template'])
sys.exit(0)
for dir_name in dirs:
# it will scan the entire subdirectories
doujinshi_dir = os.path.join(root, dir_name)
items = set(map(lambda s: os.path.splitext(s)[1], os.listdir(doujinshi_dir)))
# skip directory without any images
if items & set(EXTENSIONS):
generate_html(output_dir=doujinshi_dir, template=constant.CONFIG['template'])
sys.exit(0)
sys.exit(0) sys.exit(0)
if args.main_viewer and not args.id and not args.keyword and not args.favorites: if args.main_viewer and not args.id and not args.keyword and not args.favorites:
@ -199,23 +169,21 @@ def cmd_parser():
# --- set config --- # --- set config ---
if args.cookie is not None: if args.cookie is not None:
constant.CONFIG['cookie'] = args.cookie.strip() constant.CONFIG['cookie'] = args.cookie
write_config() write_config()
logger.info('Cookie saved.') logger.info('Cookie saved.')
sys.exit(0)
if args.useragent is not None: elif args.useragent is not None:
constant.CONFIG['useragent'] = args.useragent.strip() constant.CONFIG['useragent'] = args.useragent
write_config() write_config()
logger.info('User-Agent saved.') logger.info('User-Agent saved.')
sys.exit(0)
if args.language is not None: elif args.language is not None:
constant.CONFIG['language'] = args.language constant.CONFIG['language'] = args.language
write_config() write_config()
logger.info(f'Default language now set to "{args.language}"') logger.info(f'Default language now set to "{args.language}"')
# TODO: search without language
if any([args.cookie, args.useragent, args.language]):
sys.exit(0) sys.exit(0)
# TODO: search without language
if args.proxy is not None: if args.proxy is not None:
proxy_url = urlparse(args.proxy) proxy_url = urlparse(args.proxy)
@ -269,4 +237,8 @@ def cmd_parser():
logger.critical('Maximum number of used threads is 15') logger.critical('Maximum number of used threads is 15')
sys.exit(1) sys.exit(1)
if args.dryrun and (args.is_cbz or args.is_pdf):
logger.critical('Cannot generate PDF or CBZ during dry-run')
sys.exit(1)
return args return args

View File

@ -4,16 +4,18 @@ import shutil
import sys import sys
import signal import signal
import platform import platform
import urllib
import urllib3.exceptions import urllib3.exceptions
from nhentai import constant from nhentai import constant
from nhentai.cmdline import cmd_parser, banner, write_config from nhentai.cmdline import cmd_parser, banner, write_config
from nhentai.parser import doujinshi_parser, search_parser, legacy_search_parser, print_doujinshi, favorites_parser from nhentai.parser import doujinshi_parser, search_parser, legacy_search_parser, print_doujinshi, favorites_parser
from nhentai.doujinshi import Doujinshi from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader, CompressedDownloader from nhentai.downloader import Downloader
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import BASE_URL from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \ from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
paging, check_cookie, signal_handler, DB, move_to_folder paging, check_cookie, signal_handler, DB, move_to_folder
@ -49,9 +51,6 @@ def main():
page_list = paging(options.page) page_list = paging(options.page)
if options.retry:
constant.RETRY_TIMES = int(options.retry)
if options.favorites: if options.favorites:
if not options.is_download: if not options.is_download:
logger.warning('You do not specify --download option') logger.warning('You do not specify --download option')
@ -80,18 +79,14 @@ def main():
if options.is_save_download_history: if options.is_save_download_history:
with DB() as db: with DB() as db:
data = set(map(int, db.get_all())) data = map(int, db.get_all())
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data)) doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
logger.info(f'New doujinshis account: {len(doujinshi_ids)}')
if options.zip:
options.is_nohtml = True
if not options.is_show: if not options.is_show:
downloader = (CompressedDownloader if options.zip else Downloader)(path=options.output_dir, threads=options.threads, downloader = Downloader(path=options.output_dir, threads=options.threads,
timeout=options.timeout, delay=options.delay, timeout=options.timeout, delay=options.delay,
exit_on_fail=options.exit_on_fail, retry=options.retry, exit_on_fail=options.exit_on_fail,
no_filename_padding=options.no_filename_padding) no_filename_padding=options.no_filename_padding)
for doujinshi_id in doujinshi_ids: for doujinshi_id in doujinshi_ids:
@ -101,15 +96,17 @@ def main():
else: else:
continue continue
if not options.dryrun:
doujinshi.downloader = downloader doujinshi.downloader = downloader
if doujinshi.check_if_need_download(options): if doujinshi.check_if_need_download(options):
doujinshi.download() doujinshi.download()
else: else:
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}') logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
continue
if options.generate_metadata: if options.generate_metadata:
generate_metadata(options.output_dir, doujinshi) generate_metadata_file(options.output_dir, doujinshi)
if options.is_save_download_history: if options.is_save_download_history:
with DB() as db: with DB() as db:

View File

@ -37,8 +37,6 @@ FAV_URL = f'{BASE_URL}/favorites/'
PATH_SEPARATOR = os.path.sep PATH_SEPARATOR = os.path.sep
RETRY_TIMES = 3
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries' IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
IMAGE_URL_MIRRORS = [ IMAGE_URL_MIRRORS = [

View File

@ -77,9 +77,6 @@ class Doujinshi(object):
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}') logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
def check_if_need_download(self, options): def check_if_need_download(self, options):
if options.no_download:
return False
base_path = os.path.join(self.downloader.path, self.filename) base_path = os.path.join(self.downloader.path, self.filename)
# regenerate, re-download # regenerate, re-download

View File

@ -4,8 +4,6 @@ import os
import asyncio import asyncio
import httpx import httpx
import urllib3.exceptions import urllib3.exceptions
import zipfile
import io
from urllib.parse import urlparse from urllib.parse import urlparse
from nhentai import constant from nhentai import constant
@ -15,6 +13,11 @@ from nhentai.utils import Singleton, async_request
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class NHentaiImageNotExistException(Exception):
pass
def download_callback(result): def download_callback(result):
result, data = result result, data = result
if result == 0: if result == 0:
@ -31,12 +34,13 @@ def download_callback(result):
class Downloader(Singleton): class Downloader(Singleton):
def __init__(self, path='', threads=5, timeout=30, delay=0, exit_on_fail=False, def __init__(self, path='', threads=5, timeout=30, delay=0, retry=3, exit_on_fail=False,
no_filename_padding=False): no_filename_padding=False):
self.threads = threads self.threads = threads
self.path = str(path) self.path = str(path)
self.timeout = timeout self.timeout = timeout
self.delay = delay self.delay = delay
self.retry = retry
self.exit_on_fail = exit_on_fail self.exit_on_fail = exit_on_fail
self.folder = None self.folder = None
self.semaphore = None self.semaphore = None
@ -74,7 +78,13 @@ class Downloader(Singleton):
else: else:
filename = base_filename + extension filename = base_filename + extension
save_file_path = os.path.join(self.folder, filename)
try: try:
if os.path.exists(save_file_path):
logger.warning(f'Skipped download: {save_file_path} already exists')
return 1, url
response = await async_request('GET', url, timeout=self.timeout, proxy=proxy) response = await async_request('GET', url, timeout=self.timeout, proxy=proxy)
if response.status_code != 200: if response.status_code != 200:
@ -91,7 +101,7 @@ class Downloader(Singleton):
return -1, url return -1, url
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e: except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
if retried < constant.RETRY_TIMES: if retried < self.retry:
logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...') logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
return await self.download( return await self.download(
url=url, url=url,
@ -101,9 +111,13 @@ class Downloader(Singleton):
proxy=proxy, proxy=proxy,
) )
else: else:
logger.warning(f'Download {filename} failed with {constant.RETRY_TIMES} times retried, skipped') logger.warning(f'Download {filename} failed with {self.retry} times retried, skipped')
return -2, url return -2, url
except NHentaiImageNotExistException as e:
os.remove(save_file_path)
return -3, url
except Exception as e: except Exception as e:
import traceback import traceback
@ -117,11 +131,11 @@ class Downloader(Singleton):
return 1, url return 1, url
async def save(self, filename, response) -> bool: async def save(self, save_file_path, response) -> bool:
if response is None: if response is None:
logger.error('Error: Response is None') logger.error('Error: Response is None')
return False return False
save_file_path = os.path.join(self.folder, filename) save_file_path = os.path.join(self.folder, save_file_path)
with open(save_file_path, 'wb') as f: with open(save_file_path, 'wb') as f:
if response is not None: if response is not None:
length = response.headers.get('content-length') length = response.headers.get('content-length')
@ -132,15 +146,6 @@ class Downloader(Singleton):
f.write(chunk) f.write(chunk)
return True return True
def create_storage_object(self, folder:str):
if not os.path.exists(folder):
try:
os.makedirs(folder)
except EnvironmentError as e:
logger.critical(str(e))
self.folder:str = folder
self.close = lambda: None # Only available in class CompressedDownloader
def start_download(self, queue, folder='') -> bool: def start_download(self, queue, folder='') -> bool:
if not isinstance(folder, (str,)): if not isinstance(folder, (str,)):
folder = str(folder) folder = str(folder)
@ -149,7 +154,12 @@ class Downloader(Singleton):
folder = os.path.join(self.path, folder) folder = os.path.join(self.path, folder)
logger.info(f'Doujinshi will be saved at "{folder}"') logger.info(f'Doujinshi will be saved at "{folder}"')
self.create_storage_object(folder) if not os.path.exists(folder):
try:
os.makedirs(folder)
except EnvironmentError as e:
logger.critical(str(e))
self.folder = folder
if os.getenv('DEBUG', None) == 'NODOWNLOAD': if os.getenv('DEBUG', None) == 'NODOWNLOAD':
# Assuming we want to continue with rest of process. # Assuming we want to continue with rest of process.
@ -165,31 +175,4 @@ class Downloader(Singleton):
# Prevent coroutines infection # Prevent coroutines infection
asyncio.run(self.fiber(coroutines)) asyncio.run(self.fiber(coroutines))
self.close()
return True
class CompressedDownloader(Downloader):
def create_storage_object(self, folder):
filename = f'{folder}.zip'
print(filename)
self.zipfile = zipfile.ZipFile(filename,'w')
self.close = lambda: self.zipfile.close()
async def save(self, filename, response) -> bool:
if response is None:
logger.error('Error: Response is None')
return False
image_data = io.BytesIO()
length = response.headers.get('content-length')
if length is None:
content = await response.read()
image_data.write(content)
else:
async for chunk in response.aiter_bytes(2048):
image_data.write(chunk)
image_data.seek(0)
self.zipfile.writestr(filename, image_data.read())
return True return True

View File

@ -92,27 +92,13 @@ def favorites_parser(page=None):
page_range_list = range(1, pages + 1) page_range_list = range(1, pages + 1)
for page in page_range_list: for page in page_range_list:
logger.info(f'Getting doujinshi ids of page {page}')
i = 0
while i <= constant.RETRY_TIMES + 1:
i += 1
if i > 3:
logger.error(f'Failed to get favorites at page {page} after 3 times retried, skipped')
break
try: try:
logger.info(f'Getting doujinshi ids of page {page}')
resp = request('get', f'{constant.FAV_URL}?page={page}').content resp = request('get', f'{constant.FAV_URL}?page={page}').content
temp_result = _get_title_and_id(resp)
if not temp_result:
logger.warning(f'Failed to get favorites at page {page}, retrying ({i} times) ...')
continue
else:
result.extend(temp_result)
break
result.extend(_get_title_and_id(resp))
except Exception as e: except Exception as e:
logger.warning(f'Error: {e}, retrying ({i} times) ...') logger.error(f'Error: {e}, continue')
return result return result
@ -155,19 +141,17 @@ def doujinshi_parser(id_, counter=0):
title = doujinshi_info.find('h1').text title = doujinshi_info.find('h1').text
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
subtitle = doujinshi_info.find('h2') subtitle = doujinshi_info.find('h2')
favorite_counts = doujinshi_info.find('span', class_='nobold').text.strip('(').strip(')') favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')
doujinshi['name'] = title doujinshi['name'] = title
doujinshi['pretty_name'] = pretty_name doujinshi['pretty_name'] = pretty_name
doujinshi['subtitle'] = subtitle.text if subtitle else '' doujinshi['subtitle'] = subtitle.text if subtitle else ''
doujinshi['favorite_counts'] = int(favorite_counts) if favorite_counts and favorite_counts.isdigit() else 0 doujinshi['favorite_counts'] = int(favorite_counts.text.strip()) if favorite_counts else 0
doujinshi_cover = html.find('div', attrs={'id': 'cover'}) doujinshi_cover = html.find('div', attrs={'id': 'cover'})
# img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$', # img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
# doujinshi_cover.a.img.attrs['data-src']) # doujinshi_cover.a.img.attrs['data-src'])
img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
# fix cover.webp.webp
img_id = re.search(r'/galleries/(\d+)/cover(\.webp|\.jpg|\.png)?\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
ext = [] ext = []
for i in html.find_all('div', attrs={'class': 'thumb-container'}): for i in html.find_all('div', attrs={'class': 'thumb-container'}):
@ -277,7 +261,7 @@ def search_parser(keyword, sorting, page, is_page_all=False):
i = 0 i = 0
logger.info(f'Searching doujinshis using keywords "{keyword}" on page {p}{total}') logger.info(f'Searching doujinshis using keywords "{keyword}" on page {p}{total}')
while i < constant.RETRY_TIMES: while i < 3:
try: try:
url = request('get', url=constant.SEARCH_URL, params={'query': keyword, url = request('get', url=constant.SEARCH_URL, params={'query': keyword,
'page': p, 'sort': sorting}).url 'page': p, 'sort': sorting}).url

View File

@ -2,12 +2,12 @@
import json import json
import os import os
from nhentai.constant import PATH_SEPARATOR, LANGUAGE_ISO from nhentai.constant import PATH_SEPARATOR
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from requests.structures import CaseInsensitiveDict from nhentai.constant import LANGUAGE_ISO
def serialize_json(doujinshi, output_dir: str): def serialize_json(doujinshi, output_dir):
metadata = {'title': doujinshi.name, metadata = {'title': doujinshi.name,
'subtitle': doujinshi.info.subtitle} 'subtitle': doujinshi.info.subtitle}
if doujinshi.info.favorite_counts: if doujinshi.info.favorite_counts:
@ -78,26 +78,6 @@ def serialize_comic_xml(doujinshi, output_dir):
f.write('</ComicInfo>') f.write('</ComicInfo>')
def serialize_info_txt(doujinshi, output_dir: str):
info_txt_path = os.path.join(output_dir, 'info.txt')
f = open(info_txt_path, 'w', encoding='utf-8')
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
temp_dict = CaseInsensitiveDict(dict(doujinshi.table))
for i in fields:
v = temp_dict.get(i)
v = temp_dict.get(f'{i}s') if v is None else v
v = doujinshi.info.get(i.lower(), None) if v is None else v
v = doujinshi.info.get(f'{i.lower()}s', "Unknown") if v is None else v
f.write(f'{i}: {v}\n')
f.close()
def xml_write_simple_tag(f, name, val, indent=1): def xml_write_simple_tag(f, name, val, indent=1):
f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n') f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n')
@ -152,4 +132,3 @@ def set_js_database():
indexed_json = json.dumps(indexed_json, separators=(',', ':')) indexed_json = json.dumps(indexed_json, separators=(',', ':'))
f.write('var data = ' + indexed_json) f.write('var data = ' + indexed_json)
f.write(';\nvar tags = ' + unique_json) f.write(';\nvar tags = ' + unique_json)

View File

@ -1,5 +1,5 @@
# coding: utf-8 # coding: utf-8
import json
import sys import sys
import re import re
import os import os
@ -11,33 +11,24 @@ import requests
import sqlite3 import sqlite3
import urllib.parse import urllib.parse
from typing import Tuple from typing import Tuple
from requests.structures import CaseInsensitiveDict
from nhentai import constant from nhentai import constant
from nhentai.constant import PATH_SEPARATOR from nhentai.constant import PATH_SEPARATOR
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.serializer import serialize_comic_xml, serialize_json, serialize_info_txt, set_js_database from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
MAX_FIELD_LENGTH = 100 MAX_FIELD_LENGTH = 100
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp') EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
def get_headers():
headers = {
'Referer': constant.LOGIN_URL
}
user_agent = constant.CONFIG.get('useragent')
if user_agent and user_agent.strip():
headers['User-Agent'] = user_agent
cookie = constant.CONFIG.get('cookie')
if cookie and cookie.strip():
headers['Cookie'] = cookie
return headers
def request(method, url, **kwargs): def request(method, url, **kwargs):
session = requests.Session() session = requests.Session()
session.headers.update(get_headers()) session.headers.update({
'Referer': constant.LOGIN_URL,
'User-Agent': constant.CONFIG['useragent'],
'Cookie': constant.CONFIG['cookie']
})
if not kwargs.get('proxies', None): if not kwargs.get('proxies', None):
kwargs['proxies'] = { kwargs['proxies'] = {
@ -49,7 +40,11 @@ def request(method, url, **kwargs):
async def async_request(method, url, proxy = None, **kwargs): async def async_request(method, url, proxy = None, **kwargs):
headers=get_headers() headers = {
'Referer': constant.LOGIN_URL,
'User-Agent': constant.CONFIG['useragent'],
'Cookie': constant.CONFIG['cookie'],
}
if proxy is None: if proxy is None:
proxy = constant.CONFIG['proxy'] proxy = constant.CONFIG['proxy']
@ -110,24 +105,21 @@ def parse_doujinshi_obj(
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
_filename = f'{doujinshi_obj.filename}.{file_type}' _filename = f'{doujinshi_obj.filename}.{file_type}'
if file_type == 'cbz':
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
if file_type == 'pdf': if file_type == 'pdf':
_filename = _filename.replace('/', '-') _filename = _filename.replace('/', '-')
filename = os.path.join(output_dir, _filename) filename = os.path.join(output_dir, _filename)
else: else:
if file_type == 'html':
return output_dir, 'index.html'
doujinshi_dir = f'.{PATH_SEPARATOR}' doujinshi_dir = f'.{PATH_SEPARATOR}'
if not os.path.exists(doujinshi_dir):
os.makedirs(doujinshi_dir)
return doujinshi_dir, filename return doujinshi_dir, filename
def generate_html(output_dir='.', doujinshi_obj=None, template='default'): def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, 'html') doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
image_html = '' image_html = ''
if not os.path.exists(doujinshi_dir): if not os.path.exists(doujinshi_dir):
@ -150,16 +142,10 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
js = readfile(f'viewer/{template}/scripts.js') js = readfile(f'viewer/{template}/scripts.js')
if doujinshi_obj is not None: if doujinshi_obj is not None:
# serialize_json(doujinshi_obj, doujinshi_dir) serialize_json(doujinshi_obj, doujinshi_dir)
name = doujinshi_obj.name name = doujinshi_obj.name
else: else:
metadata_path = os.path.join(doujinshi_dir, "metadata.json") name = {'title': 'nHentai HTML Viewer'}
if os.path.exists(metadata_path):
with open(metadata_path, 'r') as file:
doujinshi_info = json.loads(file.read())
name = doujinshi_info.get("title")
else:
name = 'nHentai HTML Viewer'
data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css) data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css)
try: try:
@ -249,7 +235,15 @@ def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
logger.warning(f'Writing Main Viewer failed ({e})') logger.warning(f'Writing Main Viewer failed ({e})')
def generate_cbz(doujinshi_dir, filename): def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
logger.info(f'Skipped {file_type} file generation: {doujinshi_dir}.{file_type} already exists')
return
if file_type == 'cbz':
file_list = os.listdir(doujinshi_dir) file_list = os.listdir(doujinshi_dir)
file_list.sort() file_list.sort()
@ -260,19 +254,6 @@ def generate_cbz(doujinshi_dir, filename):
cbz_pf.write(image_path, image) cbz_pf.write(image_path, image)
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"') logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
logger.info(f'Skipped {file_type} file generation: {doujinshi_dir}.{file_type} already exists')
return
if file_type == 'cbz':
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
generate_cbz(doujinshi_dir, filename)
elif file_type == 'pdf': elif file_type == 'pdf':
try: try:
import img2pdf import img2pdf
@ -292,16 +273,6 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
except ImportError: except ImportError:
logger.error("Please install img2pdf package by using pip.") logger.error("Please install img2pdf package by using pip.")
else:
raise ValueError('invalid file type')
def generate_metadata(output_dir, doujinshi_obj=None):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '')
serialize_json(doujinshi_obj, doujinshi_dir)
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
serialize_info_txt(doujinshi_obj, doujinshi_dir)
logger.log(16, f'Metadata files have been written to "{doujinshi_dir}"')
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False): def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
@ -358,6 +329,29 @@ def paging(page_string):
return page_list return page_list
def generate_metadata_file(output_dir, doujinshi_obj):
info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
f = open(info_txt_path, 'w', encoding='utf-8')
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
for i in fields:
v = temp_dict.get(i)
v = temp_dict.get(f'{i}s') if v is None else v
v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
f.write(f'{i}: {v}\n')
f.close()
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
class DB(object): class DB(object):
conn = None conn = None
cur = None cur = None

View File

@ -75,13 +75,11 @@ document.onkeydown = event =>{
changePage(currentPage - 1); changePage(currentPage - 1);
break; break;
case 38: //up case 38: //up
changePage(currentPage - 1);
break; break;
case 39: //right case 39: //right
changePage(currentPage + 1); changePage(currentPage + 1);
break; break;
case 40: //down case 40: //down
changePage(currentPage + 1);
break; break;
} }
}; };

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "nhentai" name = "nhentai"
version = "0.6.0-beta" version = "0.5.20"
description = "nhentai doujinshi downloader" description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"] authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT" license = "MIT"

View File

@ -1,27 +1,14 @@
import unittest import unittest
import os import os
import zipfile
import urllib3.exceptions import urllib3.exceptions
from nhentai import constant from nhentai import constant
from nhentai.cmdline import load_config from nhentai.cmdline import load_config
from nhentai.downloader import Downloader, CompressedDownloader from nhentai.downloader import Downloader
from nhentai.parser import doujinshi_parser from nhentai.parser import doujinshi_parser
from nhentai.doujinshi import Doujinshi from nhentai.doujinshi import Doujinshi
from nhentai.utils import generate_html from nhentai.utils import generate_html
did = 440546
def has_jepg_file(path):
with zipfile.ZipFile(path, 'r') as zf:
return '01.jpg' in zf.namelist()
def is_zip_file(path):
try:
with zipfile.ZipFile(path, 'r') as _:
return True
except (zipfile.BadZipFile, FileNotFoundError):
return False
class TestDownload(unittest.TestCase): class TestDownload(unittest.TestCase):
def setUp(self) -> None: def setUp(self) -> None:
@ -30,27 +17,17 @@ class TestDownload(unittest.TestCase):
constant.CONFIG['cookie'] = os.getenv('NHENTAI_COOKIE') constant.CONFIG['cookie'] = os.getenv('NHENTAI_COOKIE')
constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA') constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA')
self.info = Doujinshi(**doujinshi_parser(did), name_format='%i')
def test_download(self): def test_download(self):
info = self.info did = 440546
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
info.downloader = Downloader(path='/tmp', threads=5) info.downloader = Downloader(path='/tmp', threads=5)
info.download() info.download()
self.assertTrue(os.path.exists(f'/tmp/{did}/01.jpg')) self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
generate_html('/tmp', info) generate_html('/tmp', info)
self.assertTrue(os.path.exists(f'/tmp/{did}/index.html')) self.assertTrue(os.path.exists(f'/tmp/{did}/index.html'))
def test_zipfile_download(self):
info = self.info
info.downloader = CompressedDownloader(path='/tmp', threads=5)
info.download()
zipfile_path = f'/tmp/{did}.zip'
self.assertTrue(os.path.exists(zipfile_path))
self.assertTrue(is_zip_file(zipfile_path))
self.assertTrue(has_jepg_file(zipfile_path))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()