Compare commits

..

No commits in common. "master" and "0.6.0-beta" have entirely different histories.

5 changed files with 130 additions and 214 deletions

View File

@ -6,10 +6,10 @@ import json
import nhentai.constant as constant import nhentai.constant as constant
from urllib.parse import urlparse from urllib.parse import urlparse
from argparse import ArgumentParser from optparse import OptionParser
from nhentai import __version__ from nhentai import __version__
from nhentai.utils import generate_html, generate_main_html, DB, EXTENSIONS from nhentai.utils import generate_html, generate_main_html, DB
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import PATH_SEPARATOR from nhentai.constant import PATH_SEPARATOR
@ -57,133 +57,109 @@ def callback(option, _opt_str, _value, parser):
def cmd_parser(): def cmd_parser():
load_config() load_config()
parser = ArgumentParser( parser = OptionParser('\n nhentai --search [keyword] --download'
description='\n nhentai --search [keyword] --download' '\n NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]'
'\n NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]' '\n nhentai --file [filename]'
'\n nhentai --file [filename]' '\n\nEnvironment Variable:\n'
'\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url')
' NHENTAI nhentai mirror url'
)
# operation options # operation options
parser.add_argument('--download', '-D', dest='is_download', action='store_true', parser.add_option('--download', '-D', dest='is_download', action='store_true',
help='download doujinshi (for search results)') help='download doujinshi (for search results)')
parser.add_argument('--no-download', dest='no_download', action='store_true', default=False, parser.add_option('--no-download', dest='no_download', action='store_true', default=False,
help='download doujinshi (for search results)') help='download doujinshi (for search results)')
parser.add_argument('--show', '-S', dest='is_show', action='store_true', parser.add_option('--show', '-S', dest='is_show', action='store_true',
help='just show the doujinshi information') help='just show the doujinshi information')
# doujinshi options # doujinshi options
parser.add_argument('--id', dest='id', nargs='+', type=int, parser.add_option('--id', dest='id', action='callback', callback=callback,
help='doujinshi ids set, e.g. 167680 167681 167682') help='doujinshi ids set, e.g. 167680 167681 167682')
parser.add_argument('--search', '-s', type=str, dest='keyword', parser.add_option('--search', '-s', type='string', dest='keyword', action='store',
help='search doujinshi by keyword') help='search doujinshi by keyword')
parser.add_argument('--favorites', '-F', action='store_true', dest='favorites', parser.add_option('--favorites', '-F', action='store_true', dest='favorites',
help='list or download your favorites') help='list or download your favorites')
parser.add_argument('--artist', '-a', type=str, dest='artist', parser.add_option('--artist', '-a', action='store', dest='artist',
help='list doujinshi by artist name') help='list doujinshi by artist name')
# page options # page options
parser.add_argument('--page-all', dest='page_all', action='store_true', default=False, parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
help='all search results') help='all search results')
parser.add_argument('--page', '--page-range', type=str, dest='page', parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
help='page number of search results. e.g. 1,2-5,14') help='page number of search results. e.g. 1,2-5,14')
parser.add_argument('--sorting', '--sort', dest='sorting', type=str, default='popular', parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
help='sorting of doujinshi (recent / popular / popular-[today|week])', help='sorting of doujinshi (recent / popular / popular-[today|week])',
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date']) choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
# download options # download options
parser.add_argument('--output', '-o', type=str, dest='output_dir', default='.', parser.add_option('--output', '-o', type='string', dest='output_dir', action='store',
help='output dir') default=f'.{PATH_SEPARATOR}',
parser.add_argument('--threads', '-t', type=int, dest='threads', default=5, help='output dir')
help='thread count for downloading doujinshi') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
parser.add_argument('--timeout', '-T', type=int, dest='timeout', default=30, help='thread count for downloading doujinshi')
help='timeout for downloading doujinshi') parser.add_option('--timeout', '-T', type='int', dest='timeout', action='store', default=30,
parser.add_argument('--delay', '-d', type=int, dest='delay', default=0, help='timeout for downloading doujinshi')
help='slow down between downloading every doujinshi') parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0,
parser.add_argument('--retry', type=int, dest='retry', default=3, help='slow down between downloading every doujinshi')
help='retry times when downloading failed') parser.add_option('--retry', type='int', dest='retry', action='store', default=3,
parser.add_argument('--exit-on-fail', dest='exit_on_fail', action='store_true', default=False, help='retry times when downloading failed')
help='exit on fail to prevent generating incomplete files') parser.add_option('--exit-on-fail', dest='exit_on_fail', action='store_true', default=False,
parser.add_argument('--proxy', type=str, dest='proxy', help='exit on fail to prevent generating incomplete files')
help='store a proxy, for example: -p "http://127.0.0.1:1080"') parser.add_option('--proxy', type='string', dest='proxy', action='store',
parser.add_argument('--file', '-f', type=str, dest='file', help='store a proxy, for example: -p "http://127.0.0.1:1080"')
help='read gallery IDs from file.') parser.add_option('--file', '-f', type='string', dest='file', action='store',
parser.add_argument('--format', type=str, dest='name_format', default='[%i][%a][%t]', help='read gallery IDs from file.')
help='format the saved folder name') parser.add_option('--format', type='string', dest='name_format', action='store',
help='format the saved folder name', default='[%i][%a][%t]')
parser.add_argument('--no-filename-padding', action='store_true', dest='no_filename_padding', parser.add_option('--no-filename-padding', action='store_true', dest='no_filename_padding',
default=False, help='no padding in the images filename, such as \'001.jpg\'') default=False, help='no padding in the images filename, such as \'001.jpg\'')
# generate options # generate options
parser.add_argument('--html', dest='html_viewer', type=str, nargs='?', const='.', parser.add_option('--html', dest='html_viewer', action='store_true',
help='generate an HTML viewer in the specified directory, or scan all subfolders ' help='generate a html viewer at current directory')
'within the entire directory to generate the HTML viewer. By default, current ' parser.add_option('--no-html', dest='is_nohtml', action='store_true',
'working directory is used.') help='don\'t generate HTML after downloading')
parser.add_argument('--no-html', dest='is_nohtml', action='store_true', parser.add_option('--gen-main', dest='main_viewer', action='store_true',
help='don\'t generate HTML after downloading') help='generate a main viewer contain all the doujin in the folder')
parser.add_argument('--gen-main', dest='main_viewer', action='store_true', parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true',
help='generate a main viewer contain all the doujin in the folder') help='generate Comic Book CBZ File')
parser.add_argument('--cbz', '-C', dest='is_cbz', action='store_true', parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true',
help='generate Comic Book CBZ File') help='generate PDF file')
parser.add_argument('--pdf', '-P', dest='is_pdf', action='store_true',
help='generate PDF file')
parser.add_argument('--meta', dest='generate_metadata', action='store_true', default=False, parser.add_option('--meta', dest='generate_metadata', action='store_true', default=False,
help='generate a metadata file in doujinshi format') help='generate a metadata file in doujinshi format')
parser.add_argument('--update-meta', dest='update_metadata', action='store_true', default=False, parser.add_option('--update-meta', dest='update_metadata', action='store_true', default=False,
help='update the metadata file of a doujinshi, update CBZ metadata if exists') help='update the metadata file of a doujinshi, update CBZ metadata if exists')
parser.add_argument('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
help='remove downloaded doujinshi dir when generated CBZ or PDF file') help='remove downloaded doujinshi dir when generated CBZ or PDF file')
parser.add_argument('--move-to-folder', dest='move_to_folder', action='store_true', default=False, parser.add_option('--move-to-folder', dest='move_to_folder', action='store_true', default=False,
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file') help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
parser.add_argument('--regenerate', dest='regenerate', action='store_true', default=False, parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
help='regenerate the cbz or pdf file if exists') help='regenerate the cbz or pdf file if exists')
parser.add_argument('--zip', action='store_true', help='Package into a single zip file')
# nhentai options # nhentai options
parser.add_argument('--cookie', type=str, dest='cookie', parser.add_option('--cookie', type='str', dest='cookie', action='store',
help='set cookie of nhentai to bypass Cloudflare captcha') help='set cookie of nhentai to bypass Cloudflare captcha')
parser.add_argument('--useragent', '--user-agent', type=str, dest='useragent', parser.add_option('--useragent', '--user-agent', type='str', dest='useragent', action='store',
help='set useragent to bypass Cloudflare captcha') help='set useragent to bypass Cloudflare captcha')
parser.add_argument('--language', type=str, dest='language', parser.add_option('--language', type='str', dest='language', action='store',
help='set default language to parse doujinshis') help='set default language to parse doujinshis')
parser.add_argument('--clean-language', dest='clean_language', action='store_true', default=False, parser.add_option('--clean-language', dest='clean_language', action='store_true', default=False,
help='set DEFAULT as language to parse doujinshis') help='set DEFAULT as language to parse doujinshis')
parser.add_argument('--save-download-history', dest='is_save_download_history', action='store_true', parser.add_option('--save-download-history', dest='is_save_download_history', action='store_true',
default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them') default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them')
parser.add_argument('--clean-download-history', action='store_true', default=False, dest='clean_download_history', parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history',
help='clean download history') help='clean download history')
parser.add_argument('--template', dest='viewer_template', type=str, default='', parser.add_option('--template', dest='viewer_template', action='store',
help='set viewer template') help='set viewer template', default='')
parser.add_argument('--legacy', dest='legacy', action='store_true', default=False, parser.add_option('--legacy', dest='legacy', action='store_true', default=False,
help='use legacy searching method') help='use legacy searching method')
args = parser.parse_args() args, _ = parser.parse_args(sys.argv[1:])
if args.html_viewer: if args.html_viewer:
if not os.path.exists(args.html_viewer): generate_html(template=constant.CONFIG['template'])
logger.error(f'Path \'{args.html_viewer}\' not exists')
sys.exit(1)
for root, dirs, files in os.walk(args.html_viewer):
if not dirs:
generate_html(output_dir=args.html_viewer, template=constant.CONFIG['template'])
sys.exit(0)
for dir_name in dirs:
# it will scan the entire subdirectories
doujinshi_dir = os.path.join(root, dir_name)
items = set(map(lambda s: os.path.splitext(s)[1], os.listdir(doujinshi_dir)))
# skip directory without any images
if items & set(EXTENSIONS):
generate_html(output_dir=doujinshi_dir, template=constant.CONFIG['template'])
sys.exit(0)
sys.exit(0) sys.exit(0)
if args.main_viewer and not args.id and not args.keyword and not args.favorites: if args.main_viewer and not args.id and not args.keyword and not args.favorites:

View File

@ -10,7 +10,7 @@ from nhentai import constant
from nhentai.cmdline import cmd_parser, banner, write_config from nhentai.cmdline import cmd_parser, banner, write_config
from nhentai.parser import doujinshi_parser, search_parser, legacy_search_parser, print_doujinshi, favorites_parser from nhentai.parser import doujinshi_parser, search_parser, legacy_search_parser, print_doujinshi, favorites_parser
from nhentai.doujinshi import Doujinshi from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader, CompressedDownloader from nhentai.downloader import Downloader
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import BASE_URL from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \ from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata, \
@ -80,16 +80,12 @@ def main():
if options.is_save_download_history: if options.is_save_download_history:
with DB() as db: with DB() as db:
data = set(map(int, db.get_all())) data = map(int, db.get_all())
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data)) doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
logger.info(f'New doujinshis account: {len(doujinshi_ids)}')
if options.zip:
options.is_nohtml = True
if not options.is_show: if not options.is_show:
downloader = (CompressedDownloader if options.zip else Downloader)(path=options.output_dir, threads=options.threads, downloader = Downloader(path=options.output_dir, threads=options.threads,
timeout=options.timeout, delay=options.delay, timeout=options.timeout, delay=options.delay,
exit_on_fail=options.exit_on_fail, exit_on_fail=options.exit_on_fail,
no_filename_padding=options.no_filename_padding) no_filename_padding=options.no_filename_padding)

View File

@ -4,8 +4,6 @@ import os
import asyncio import asyncio
import httpx import httpx
import urllib3.exceptions import urllib3.exceptions
import zipfile
import io
from urllib.parse import urlparse from urllib.parse import urlparse
from nhentai import constant from nhentai import constant
@ -15,6 +13,11 @@ from nhentai.utils import Singleton, async_request
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class NHentaiImageNotExistException(Exception):
pass
def download_callback(result): def download_callback(result):
result, data = result result, data = result
if result == 0: if result == 0:
@ -74,7 +77,13 @@ class Downloader(Singleton):
else: else:
filename = base_filename + extension filename = base_filename + extension
save_file_path = os.path.join(self.folder, filename)
try: try:
if os.path.exists(save_file_path):
logger.warning(f'Skipped download: {save_file_path} already exists')
return 1, url
response = await async_request('GET', url, timeout=self.timeout, proxy=proxy) response = await async_request('GET', url, timeout=self.timeout, proxy=proxy)
if response.status_code != 200: if response.status_code != 200:
@ -104,6 +113,10 @@ class Downloader(Singleton):
logger.warning(f'Download {filename} failed with {constant.RETRY_TIMES} times retried, skipped') logger.warning(f'Download {filename} failed with {constant.RETRY_TIMES} times retried, skipped')
return -2, url return -2, url
except NHentaiImageNotExistException as e:
os.remove(save_file_path)
return -3, url
except Exception as e: except Exception as e:
import traceback import traceback
@ -117,11 +130,11 @@ class Downloader(Singleton):
return 1, url return 1, url
async def save(self, filename, response) -> bool: async def save(self, save_file_path, response) -> bool:
if response is None: if response is None:
logger.error('Error: Response is None') logger.error('Error: Response is None')
return False return False
save_file_path = os.path.join(self.folder, filename) save_file_path = os.path.join(self.folder, save_file_path)
with open(save_file_path, 'wb') as f: with open(save_file_path, 'wb') as f:
if response is not None: if response is not None:
length = response.headers.get('content-length') length = response.headers.get('content-length')
@ -132,15 +145,6 @@ class Downloader(Singleton):
f.write(chunk) f.write(chunk)
return True return True
def create_storage_object(self, folder:str):
if not os.path.exists(folder):
try:
os.makedirs(folder)
except EnvironmentError as e:
logger.critical(str(e))
self.folder:str = folder
self.close = lambda: None # Only available in class CompressedDownloader
def start_download(self, queue, folder='') -> bool: def start_download(self, queue, folder='') -> bool:
if not isinstance(folder, (str,)): if not isinstance(folder, (str,)):
folder = str(folder) folder = str(folder)
@ -149,7 +153,12 @@ class Downloader(Singleton):
folder = os.path.join(self.path, folder) folder = os.path.join(self.path, folder)
logger.info(f'Doujinshi will be saved at "{folder}"') logger.info(f'Doujinshi will be saved at "{folder}"')
self.create_storage_object(folder) if not os.path.exists(folder):
try:
os.makedirs(folder)
except EnvironmentError as e:
logger.critical(str(e))
self.folder = folder
if os.getenv('DEBUG', None) == 'NODOWNLOAD': if os.getenv('DEBUG', None) == 'NODOWNLOAD':
# Assuming we want to continue with rest of process. # Assuming we want to continue with rest of process.
@ -165,31 +174,4 @@ class Downloader(Singleton):
# Prevent coroutines infection # Prevent coroutines infection
asyncio.run(self.fiber(coroutines)) asyncio.run(self.fiber(coroutines))
self.close()
return True
class CompressedDownloader(Downloader):
def create_storage_object(self, folder):
filename = f'{folder}.zip'
print(filename)
self.zipfile = zipfile.ZipFile(filename,'w')
self.close = lambda: self.zipfile.close()
async def save(self, filename, response) -> bool:
if response is None:
logger.error('Error: Response is None')
return False
image_data = io.BytesIO()
length = response.headers.get('content-length')
if length is None:
content = await response.read()
image_data.write(content)
else:
async for chunk in response.aiter_bytes(2048):
image_data.write(chunk)
image_data.seek(0)
self.zipfile.writestr(filename, image_data.read())
return True return True

View File

@ -1,5 +1,5 @@
# coding: utf-8 # coding: utf-8
import json
import sys import sys
import re import re
import os import os
@ -20,24 +20,14 @@ from nhentai.serializer import serialize_comic_xml, serialize_json, serialize_in
MAX_FIELD_LENGTH = 100 MAX_FIELD_LENGTH = 100
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp') EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
def get_headers():
headers = {
'Referer': constant.LOGIN_URL
}
user_agent = constant.CONFIG.get('useragent')
if user_agent and user_agent.strip():
headers['User-Agent'] = user_agent
cookie = constant.CONFIG.get('cookie')
if cookie and cookie.strip():
headers['Cookie'] = cookie
return headers
def request(method, url, **kwargs): def request(method, url, **kwargs):
session = requests.Session() session = requests.Session()
session.headers.update(get_headers()) session.headers.update({
'Referer': constant.LOGIN_URL,
'User-Agent': constant.CONFIG['useragent'],
'Cookie': constant.CONFIG['cookie']
})
if not kwargs.get('proxies', None): if not kwargs.get('proxies', None):
kwargs['proxies'] = { kwargs['proxies'] = {
@ -49,7 +39,11 @@ def request(method, url, **kwargs):
async def async_request(method, url, proxy = None, **kwargs): async def async_request(method, url, proxy = None, **kwargs):
headers=get_headers() headers = {
'Referer': constant.LOGIN_URL,
'User-Agent': constant.CONFIG['useragent'],
'Cookie': constant.CONFIG['cookie'],
}
if proxy is None: if proxy is None:
proxy = constant.CONFIG['proxy'] proxy = constant.CONFIG['proxy']
@ -115,9 +109,6 @@ def parse_doujinshi_obj(
filename = os.path.join(output_dir, _filename) filename = os.path.join(output_dir, _filename)
else: else:
if file_type == 'html':
return output_dir, 'index.html'
doujinshi_dir = f'.{PATH_SEPARATOR}' doujinshi_dir = f'.{PATH_SEPARATOR}'
if not os.path.exists(doujinshi_dir): if not os.path.exists(doujinshi_dir):
@ -127,7 +118,7 @@ def parse_doujinshi_obj(
def generate_html(output_dir='.', doujinshi_obj=None, template='default'): def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, 'html') doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
image_html = '' image_html = ''
if not os.path.exists(doujinshi_dir): if not os.path.exists(doujinshi_dir):
@ -153,13 +144,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
# serialize_json(doujinshi_obj, doujinshi_dir) # serialize_json(doujinshi_obj, doujinshi_dir)
name = doujinshi_obj.name name = doujinshi_obj.name
else: else:
metadata_path = os.path.join(doujinshi_dir, "metadata.json") name = {'title': 'nHentai HTML Viewer'}
if os.path.exists(metadata_path):
with open(metadata_path, 'r') as file:
doujinshi_info = json.loads(file.read())
name = doujinshi_info.get("title")
else:
name = 'nHentai HTML Viewer'
data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css) data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css)
try: try:

View File

@ -1,27 +1,14 @@
import unittest import unittest
import os import os
import zipfile
import urllib3.exceptions import urllib3.exceptions
from nhentai import constant from nhentai import constant
from nhentai.cmdline import load_config from nhentai.cmdline import load_config
from nhentai.downloader import Downloader, CompressedDownloader from nhentai.downloader import Downloader
from nhentai.parser import doujinshi_parser from nhentai.parser import doujinshi_parser
from nhentai.doujinshi import Doujinshi from nhentai.doujinshi import Doujinshi
from nhentai.utils import generate_html from nhentai.utils import generate_html
did = 440546
def has_jepg_file(path):
with zipfile.ZipFile(path, 'r') as zf:
return '01.jpg' in zf.namelist()
def is_zip_file(path):
try:
with zipfile.ZipFile(path, 'r') as _:
return True
except (zipfile.BadZipFile, FileNotFoundError):
return False
class TestDownload(unittest.TestCase): class TestDownload(unittest.TestCase):
def setUp(self) -> None: def setUp(self) -> None:
@ -30,27 +17,17 @@ class TestDownload(unittest.TestCase):
constant.CONFIG['cookie'] = os.getenv('NHENTAI_COOKIE') constant.CONFIG['cookie'] = os.getenv('NHENTAI_COOKIE')
constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA') constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA')
self.info = Doujinshi(**doujinshi_parser(did), name_format='%i')
def test_download(self): def test_download(self):
info = self.info did = 440546
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
info.downloader = Downloader(path='/tmp', threads=5) info.downloader = Downloader(path='/tmp', threads=5)
info.download() info.download()
self.assertTrue(os.path.exists(f'/tmp/{did}/01.jpg')) self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
generate_html('/tmp', info) generate_html('/tmp', info)
self.assertTrue(os.path.exists(f'/tmp/{did}/index.html')) self.assertTrue(os.path.exists(f'/tmp/{did}/index.html'))
def test_zipfile_download(self):
info = self.info
info.downloader = CompressedDownloader(path='/tmp', threads=5)
info.download()
zipfile_path = f'/tmp/{did}.zip'
self.assertTrue(os.path.exists(zipfile_path))
self.assertTrue(is_zip_file(zipfile_path))
self.assertTrue(has_jepg_file(zipfile_path))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()