Compare commits

...

13 Commits

Author SHA1 Message Date
Ricter Zheng
6752edfc9d
Merge pull request #402 from hzxjy1/zipTest
Close zipfile hander manually and add a test
2025-03-26 22:57:29 +08:00
Ricter Zheng
9a5fcd7d23
Merge pull request #401 from hzxjy1/NoneType
Fix none attributes in session headers
2025-03-26 22:56:11 +08:00
Hellagur4225
b4cc498a5f add a test for zipfile download 2025-03-26 15:14:15 +08:00
Hellagur4225
a4eb7f3b5f fix the uncontrollable zipfile closing function 2025-03-26 15:11:26 +08:00
Hellagur4225
36aa321ade Fix none attributes in session headers 2025-03-24 10:13:42 +08:00
Ricter Zheng
aa84b57a43 use argparse, fix #396 2025-03-12 02:50:22 +08:00
ricterz
a3c70a0c30 fix #396 2025-03-11 22:23:17 +08:00
Ricter Zheng
86060ae0a6
Merge pull request #398 from hzxjy1/zipfile
feat: add compress option
2025-03-11 22:04:09 +08:00
ricterz
9648c21b32 tiny fix of #397 2025-03-11 22:02:37 +08:00
Hellagur4225
625feb5d21 Remove unused option 2025-03-08 17:37:42 +08:00
Hellagur4225
6efbc73c10 feat: add compress option 2025-03-08 17:31:56 +08:00
ricterz
34c1ea8952 new feature #396 2025-02-28 18:59:32 +08:00
ricterz
2e895d8d0f fix title #396 2025-02-28 18:24:56 +08:00
5 changed files with 214 additions and 130 deletions

View File

@ -6,10 +6,10 @@ import json
import nhentai.constant as constant
from urllib.parse import urlparse
from optparse import OptionParser
from argparse import ArgumentParser
from nhentai import __version__
from nhentai.utils import generate_html, generate_main_html, DB
from nhentai.utils import generate_html, generate_main_html, DB, EXTENSIONS
from nhentai.logger import logger
from nhentai.constant import PATH_SEPARATOR
@ -57,109 +57,133 @@ def callback(option, _opt_str, _value, parser):
def cmd_parser():
load_config()
parser = OptionParser('\n nhentai --search [keyword] --download'
'\n NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]'
'\n nhentai --file [filename]'
'\n\nEnvironment Variable:\n'
' NHENTAI nhentai mirror url')
parser = ArgumentParser(
description='\n nhentai --search [keyword] --download'
'\n NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]'
'\n nhentai --file [filename]'
'\n\nEnvironment Variable:\n'
' NHENTAI nhentai mirror url'
)
# operation options
parser.add_option('--download', '-D', dest='is_download', action='store_true',
help='download doujinshi (for search results)')
parser.add_option('--no-download', dest='no_download', action='store_true', default=False,
help='download doujinshi (for search results)')
parser.add_option('--show', '-S', dest='is_show', action='store_true',
help='just show the doujinshi information')
parser.add_argument('--download', '-D', dest='is_download', action='store_true',
help='download doujinshi (for search results)')
parser.add_argument('--no-download', dest='no_download', action='store_true', default=False,
help='download doujinshi (for search results)')
parser.add_argument('--show', '-S', dest='is_show', action='store_true',
help='just show the doujinshi information')
# doujinshi options
parser.add_option('--id', dest='id', action='callback', callback=callback,
help='doujinshi ids set, e.g. 167680 167681 167682')
parser.add_option('--search', '-s', type='string', dest='keyword', action='store',
help='search doujinshi by keyword')
parser.add_option('--favorites', '-F', action='store_true', dest='favorites',
help='list or download your favorites')
parser.add_option('--artist', '-a', action='store', dest='artist',
help='list doujinshi by artist name')
parser.add_argument('--id', dest='id', nargs='+', type=int,
help='doujinshi ids set, e.g. 167680 167681 167682')
parser.add_argument('--search', '-s', type=str, dest='keyword',
help='search doujinshi by keyword')
parser.add_argument('--favorites', '-F', action='store_true', dest='favorites',
help='list or download your favorites')
parser.add_argument('--artist', '-a', type=str, dest='artist',
help='list doujinshi by artist name')
# page options
parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
help='all search results')
parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
help='page number of search results. e.g. 1,2-5,14')
parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
help='sorting of doujinshi (recent / popular / popular-[today|week])',
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
parser.add_argument('--page-all', dest='page_all', action='store_true', default=False,
help='all search results')
parser.add_argument('--page', '--page-range', type=str, dest='page',
help='page number of search results. e.g. 1,2-5,14')
parser.add_argument('--sorting', '--sort', dest='sorting', type=str, default='popular',
help='sorting of doujinshi (recent / popular / popular-[today|week])',
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
# download options
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store',
default=f'.{PATH_SEPARATOR}',
help='output dir')
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
help='thread count for downloading doujinshi')
parser.add_option('--timeout', '-T', type='int', dest='timeout', action='store', default=30,
help='timeout for downloading doujinshi')
parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0,
help='slow down between downloading every doujinshi')
parser.add_option('--retry', type='int', dest='retry', action='store', default=3,
help='retry times when downloading failed')
parser.add_option('--exit-on-fail', dest='exit_on_fail', action='store_true', default=False,
help='exit on fail to prevent generating incomplete files')
parser.add_option('--proxy', type='string', dest='proxy', action='store',
help='store a proxy, for example: -p "http://127.0.0.1:1080"')
parser.add_option('--file', '-f', type='string', dest='file', action='store',
help='read gallery IDs from file.')
parser.add_option('--format', type='string', dest='name_format', action='store',
help='format the saved folder name', default='[%i][%a][%t]')
parser.add_argument('--output', '-o', type=str, dest='output_dir', default='.',
help='output dir')
parser.add_argument('--threads', '-t', type=int, dest='threads', default=5,
help='thread count for downloading doujinshi')
parser.add_argument('--timeout', '-T', type=int, dest='timeout', default=30,
help='timeout for downloading doujinshi')
parser.add_argument('--delay', '-d', type=int, dest='delay', default=0,
help='slow down between downloading every doujinshi')
parser.add_argument('--retry', type=int, dest='retry', default=3,
help='retry times when downloading failed')
parser.add_argument('--exit-on-fail', dest='exit_on_fail', action='store_true', default=False,
help='exit on fail to prevent generating incomplete files')
parser.add_argument('--proxy', type=str, dest='proxy',
help='store a proxy, for example: -p "http://127.0.0.1:1080"')
parser.add_argument('--file', '-f', type=str, dest='file',
help='read gallery IDs from file.')
parser.add_argument('--format', type=str, dest='name_format', default='[%i][%a][%t]',
help='format the saved folder name')
parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding',
default=False, help='no padding in the images filename, such as \'001.jpg\'')
parser.add_argument('--no-filename-padding', action='store_true', dest='no_filename_padding',
default=False, help='no padding in the images filename, such as \'001.jpg\'')
# generate options
parser.add_option('--html', dest='html_viewer', action='store_true',
help='generate a html viewer at current directory')
parser.add_option('--no-html', dest='is_nohtml', action='store_true',
help='don\'t generate HTML after downloading')
parser.add_option('--gen-main', dest='main_viewer', action='store_true',
help='generate a main viewer contain all the doujin in the folder')
parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true',
help='generate Comic Book CBZ File')
parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true',
help='generate PDF file')
parser.add_argument('--html', dest='html_viewer', type=str, nargs='?', const='.',
help='generate an HTML viewer in the specified directory, or scan all subfolders '
'within the entire directory to generate the HTML viewer. By default, current '
'working directory is used.')
parser.add_argument('--no-html', dest='is_nohtml', action='store_true',
help='don\'t generate HTML after downloading')
parser.add_argument('--gen-main', dest='main_viewer', action='store_true',
help='generate a main viewer contain all the doujin in the folder')
parser.add_argument('--cbz', '-C', dest='is_cbz', action='store_true',
help='generate Comic Book CBZ File')
parser.add_argument('--pdf', '-P', dest='is_pdf', action='store_true',
help='generate PDF file')
parser.add_option('--meta', dest='generate_metadata', action='store_true', default=False,
help='generate a metadata file in doujinshi format')
parser.add_option('--update-meta', dest='update_metadata', action='store_true', default=False,
help='update the metadata file of a doujinshi, update CBZ metadata if exists')
parser.add_argument('--meta', dest='generate_metadata', action='store_true', default=False,
help='generate a metadata file in doujinshi format')
parser.add_argument('--update-meta', dest='update_metadata', action='store_true', default=False,
help='update the metadata file of a doujinshi, update CBZ metadata if exists')
parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
help='remove downloaded doujinshi dir when generated CBZ or PDF file')
parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
parser.add_argument('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
help='remove downloaded doujinshi dir when generated CBZ or PDF file')
parser.add_argument('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
help='regenerate the cbz or pdf file if exists')
parser.add_argument('--regenerate', dest='regenerate', action='store_true', default=False,
help='regenerate the cbz or pdf file if exists')
parser.add_argument('--zip', action='store_true', help='Package into a single zip file')
# nhentai options
parser.add_option('--cookie', type='str', dest='cookie', action='store',
help='set cookie of nhentai to bypass Cloudflare captcha')
parser.add_option('--useragent', '--user-agent', type='str', dest='useragent', action='store',
help='set useragent to bypass Cloudflare captcha')
parser.add_option('--language', type='str', dest='language', action='store',
help='set default language to parse doujinshis')
parser.add_option('--clean-language', dest='clean_language', action='store_true', default=False,
help='set DEFAULT as language to parse doujinshis')
parser.add_option('--save-download-history', dest='is_save_download_history', action='store_true',
default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them')
parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history',
help='clean download history')
parser.add_option('--template', dest='viewer_template', action='store',
help='set viewer template', default='')
parser.add_option('--legacy', dest='legacy', action='store_true', default=False,
help='use legacy searching method')
parser.add_argument('--cookie', type=str, dest='cookie',
help='set cookie of nhentai to bypass Cloudflare captcha')
parser.add_argument('--useragent', '--user-agent', type=str, dest='useragent',
help='set useragent to bypass Cloudflare captcha')
parser.add_argument('--language', type=str, dest='language',
help='set default language to parse doujinshis')
parser.add_argument('--clean-language', dest='clean_language', action='store_true', default=False,
help='set DEFAULT as language to parse doujinshis')
parser.add_argument('--save-download-history', dest='is_save_download_history', action='store_true',
default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them')
parser.add_argument('--clean-download-history', action='store_true', default=False, dest='clean_download_history',
help='clean download history')
parser.add_argument('--template', dest='viewer_template', type=str, default='',
help='set viewer template')
parser.add_argument('--legacy', dest='legacy', action='store_true', default=False,
help='use legacy searching method')
args, _ = parser.parse_args(sys.argv[1:])
args = parser.parse_args()
if args.html_viewer:
generate_html(template=constant.CONFIG['template'])
if not os.path.exists(args.html_viewer):
logger.error(f'Path \'{args.html_viewer}\' not exists')
sys.exit(1)
for root, dirs, files in os.walk(args.html_viewer):
if not dirs:
generate_html(output_dir=args.html_viewer, template=constant.CONFIG['template'])
sys.exit(0)
for dir_name in dirs:
# it will scan the entire subdirectories
doujinshi_dir = os.path.join(root, dir_name)
items = set(map(lambda s: os.path.splitext(s)[1], os.listdir(doujinshi_dir)))
# skip directory without any images
if items & set(EXTENSIONS):
generate_html(output_dir=doujinshi_dir, template=constant.CONFIG['template'])
sys.exit(0)
sys.exit(0)
if args.main_viewer and not args.id and not args.keyword and not args.favorites:

View File

@ -10,7 +10,7 @@ from nhentai import constant
from nhentai.cmdline import cmd_parser, banner, write_config
from nhentai.parser import doujinshi_parser, search_parser, legacy_search_parser, print_doujinshi, favorites_parser
from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader
from nhentai.downloader import Downloader, CompressedDownloader
from nhentai.logger import logger
from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \
@ -80,12 +80,16 @@ def main():
if options.is_save_download_history:
with DB() as db:
data = map(int, db.get_all())
data = set(map(int, db.get_all()))
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
logger.info(f'New doujinshis account: {len(doujinshi_ids)}')
if options.zip:
options.is_nohtml = True
if not options.is_show:
downloader = Downloader(path=options.output_dir, threads=options.threads,
downloader = (CompressedDownloader if options.zip else Downloader)(path=options.output_dir, threads=options.threads,
timeout=options.timeout, delay=options.delay,
exit_on_fail=options.exit_on_fail,
no_filename_padding=options.no_filename_padding)

View File

@ -4,6 +4,8 @@ import os
import asyncio
import httpx
import urllib3.exceptions
import zipfile
import io
from urllib.parse import urlparse
from nhentai import constant
@ -13,11 +15,6 @@ from nhentai.utils import Singleton, async_request
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class NHentaiImageNotExistException(Exception):
pass
def download_callback(result):
result, data = result
if result == 0:
@ -77,13 +74,7 @@ class Downloader(Singleton):
else:
filename = base_filename + extension
save_file_path = os.path.join(self.folder, filename)
try:
if os.path.exists(save_file_path):
logger.warning(f'Skipped download: {save_file_path} already exists')
return 1, url
response = await async_request('GET', url, timeout=self.timeout, proxy=proxy)
if response.status_code != 200:
@ -113,10 +104,6 @@ class Downloader(Singleton):
logger.warning(f'Download {filename} failed with {constant.RETRY_TIMES} times retried, skipped')
return -2, url
except NHentaiImageNotExistException as e:
os.remove(save_file_path)
return -3, url
except Exception as e:
import traceback
@ -130,11 +117,11 @@ class Downloader(Singleton):
return 1, url
async def save(self, save_file_path, response) -> bool:
async def save(self, filename, response) -> bool:
if response is None:
logger.error('Error: Response is None')
return False
save_file_path = os.path.join(self.folder, save_file_path)
save_file_path = os.path.join(self.folder, filename)
with open(save_file_path, 'wb') as f:
if response is not None:
length = response.headers.get('content-length')
@ -145,6 +132,15 @@ class Downloader(Singleton):
f.write(chunk)
return True
def create_storage_object(self, folder:str):
if not os.path.exists(folder):
try:
os.makedirs(folder)
except EnvironmentError as e:
logger.critical(str(e))
self.folder:str = folder
self.close = lambda: None # Only available in class CompressedDownloader
def start_download(self, queue, folder='') -> bool:
if not isinstance(folder, (str,)):
folder = str(folder)
@ -153,12 +149,7 @@ class Downloader(Singleton):
folder = os.path.join(self.path, folder)
logger.info(f'Doujinshi will be saved at "{folder}"')
if not os.path.exists(folder):
try:
os.makedirs(folder)
except EnvironmentError as e:
logger.critical(str(e))
self.folder = folder
self.create_storage_object(folder)
if os.getenv('DEBUG', None) == 'NODOWNLOAD':
# Assuming we want to continue with rest of process.
@ -174,4 +165,31 @@ class Downloader(Singleton):
# Prevent coroutines infection
asyncio.run(self.fiber(coroutines))
self.close()
return True
class CompressedDownloader(Downloader):
def create_storage_object(self, folder):
filename = f'{folder}.zip'
print(filename)
self.zipfile = zipfile.ZipFile(filename,'w')
self.close = lambda: self.zipfile.close()
async def save(self, filename, response) -> bool:
if response is None:
logger.error('Error: Response is None')
return False
image_data = io.BytesIO()
length = response.headers.get('content-length')
if length is None:
content = await response.read()
image_data.write(content)
else:
async for chunk in response.aiter_bytes(2048):
image_data.write(chunk)
image_data.seek(0)
self.zipfile.writestr(filename, image_data.read())
return True

View File

@ -1,5 +1,5 @@
# coding: utf-8
import json
import sys
import re
import os
@ -20,14 +20,24 @@ from nhentai.serializer import serialize_comic_xml, serialize_json, serialize_in
MAX_FIELD_LENGTH = 100
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
def get_headers():
headers = {
'Referer': constant.LOGIN_URL
}
user_agent = constant.CONFIG.get('useragent')
if user_agent and user_agent.strip():
headers['User-Agent'] = user_agent
cookie = constant.CONFIG.get('cookie')
if cookie and cookie.strip():
headers['Cookie'] = cookie
return headers
def request(method, url, **kwargs):
session = requests.Session()
session.headers.update({
'Referer': constant.LOGIN_URL,
'User-Agent': constant.CONFIG['useragent'],
'Cookie': constant.CONFIG['cookie']
})
session.headers.update(get_headers())
if not kwargs.get('proxies', None):
kwargs['proxies'] = {
@ -39,11 +49,7 @@ def request(method, url, **kwargs):
async def async_request(method, url, proxy = None, **kwargs):
headers = {
'Referer': constant.LOGIN_URL,
'User-Agent': constant.CONFIG['useragent'],
'Cookie': constant.CONFIG['cookie'],
}
headers=get_headers()
if proxy is None:
proxy = constant.CONFIG['proxy']
@ -109,6 +115,9 @@ def parse_doujinshi_obj(
filename = os.path.join(output_dir, _filename)
else:
if file_type == 'html':
return output_dir, 'index.html'
doujinshi_dir = f'.{PATH_SEPARATOR}'
if not os.path.exists(doujinshi_dir):
@ -118,7 +127,7 @@ def parse_doujinshi_obj(
def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, 'html')
image_html = ''
if not os.path.exists(doujinshi_dir):
@ -144,7 +153,13 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
# serialize_json(doujinshi_obj, doujinshi_dir)
name = doujinshi_obj.name
else:
name = {'title': 'nHentai HTML Viewer'}
metadata_path = os.path.join(doujinshi_dir, "metadata.json")
if os.path.exists(metadata_path):
with open(metadata_path, 'r') as file:
doujinshi_info = json.loads(file.read())
name = doujinshi_info.get("title")
else:
name = 'nHentai HTML Viewer'
data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css)
try:

View File

@ -1,14 +1,27 @@
import unittest
import os
import zipfile
import urllib3.exceptions
from nhentai import constant
from nhentai.cmdline import load_config
from nhentai.downloader import Downloader
from nhentai.downloader import Downloader, CompressedDownloader
from nhentai.parser import doujinshi_parser
from nhentai.doujinshi import Doujinshi
from nhentai.utils import generate_html
did = 440546
def has_jepg_file(path):
with zipfile.ZipFile(path, 'r') as zf:
return '01.jpg' in zf.namelist()
def is_zip_file(path):
try:
with zipfile.ZipFile(path, 'r') as _:
return True
except (zipfile.BadZipFile, FileNotFoundError):
return False
class TestDownload(unittest.TestCase):
def setUp(self) -> None:
@ -17,17 +30,27 @@ class TestDownload(unittest.TestCase):
constant.CONFIG['cookie'] = os.getenv('NHENTAI_COOKIE')
constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA')
self.info = Doujinshi(**doujinshi_parser(did), name_format='%i')
def test_download(self):
did = 440546
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
info = self.info
info.downloader = Downloader(path='/tmp', threads=5)
info.download()
self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
self.assertTrue(os.path.exists(f'/tmp/{did}/01.jpg'))
generate_html('/tmp', info)
self.assertTrue(os.path.exists(f'/tmp/{did}/index.html'))
def test_zipfile_download(self):
info = self.info
info.downloader = CompressedDownloader(path='/tmp', threads=5)
info.download()
zipfile_path = f'/tmp/{did}.zip'
self.assertTrue(os.path.exists(zipfile_path))
self.assertTrue(is_zip_file(zipfile_path))
self.assertTrue(has_jepg_file(zipfile_path))
if __name__ == '__main__':
unittest.main()