From 2cf4e6718ecf5012d613cbc226696d3d88f2cdb7 Mon Sep 17 00:00:00 2001 From: Lieuwe Leene Date: Sun, 16 May 2021 19:44:01 +0200 Subject: [PATCH 1/4] Add the option to perform a dry-run and only download meta-data / generate file structure --- nhentai/cmdline.py | 5 +++++ nhentai/command.py | 5 +++-- nhentai/utils.py | 13 ++++++++++--- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/nhentai/cmdline.py b/nhentai/cmdline.py index 67140cd..c7246d2 100644 --- a/nhentai/cmdline.py +++ b/nhentai/cmdline.py @@ -89,6 +89,7 @@ def cmd_parser(): parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.') parser.add_option('--format', type='string', dest='name_format', action='store', help='format the saved folder name', default='[%i][%a][%t]') + parser.add_option('--dry-run', '-r', action='store_true', dest='dryrun', help='Dry run, skip file download.') # generate options parser.add_option('--html', dest='html_viewer', action='store_true', @@ -214,4 +215,8 @@ def cmd_parser(): logger.critical('Maximum number of used threads is 15') exit(1) + if args.dryrun and (args.is_cbz or args.is_pdf): + logger.critical('Cannot generate PDF or CBZ during dry-run') + exit(1) + return args diff --git a/nhentai/command.py b/nhentai/command.py index bc987fb..92b6793 100644 --- a/nhentai/command.py +++ b/nhentai/command.py @@ -89,9 +89,10 @@ def main(): timeout=options.timeout, delay=options.delay) for doujinshi in doujinshi_list: + if not options.dryrun: + doujinshi.downloader = downloader + doujinshi.download() - doujinshi.downloader = downloader - doujinshi.download() if options.is_save_download_history: with DB() as db: db.add_one(doujinshi.id) diff --git a/nhentai/utils.py b/nhentai/utils.py index 6977297..c076cf8 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -70,6 +70,13 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'): else: doujinshi_dir = '.' + if not os.path.exists(doujinshi_dir): + logger.warning('Path \'{0}\' does not exist, creating.'.format(doujinshi_dir)) + try: + os.makedirs(doujinshi_dir) + except EnvironmentError as e: + logger.critical('{0}'.format(str(e))) + file_list = os.listdir(doujinshi_dir) file_list.sort() @@ -194,7 +201,7 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_ def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False): try: import img2pdf - + """Write images to a PDF file using img2pdf.""" if doujinshi_obj is not None: doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) @@ -220,7 +227,7 @@ def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False): shutil.rmtree(doujinshi_dir, ignore_errors=True) logger.log(15, 'PDF file has been written to \'{0}\''.format(doujinshi_dir)) - + except ImportError: logger.error("Please install img2pdf package by using pip.") @@ -236,7 +243,7 @@ def format_filename(s): It used to be a whitelist approach allowed only alphabet and a part of symbols. but most doujinshi's names include Japanese 2-byte characters and these was rejected. so it is using blacklist approach now. - if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' '). + if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' '). """ # maybe you can use `--format` to select a suitable filename ban_chars = '\\\'/:,;*?"<>|\t' From bd38294bb7ca69967302486073e02aceaa762669 Mon Sep 17 00:00:00 2001 From: Lieuwe Leene Date: Sun, 16 May 2021 19:49:26 +0200 Subject: [PATCH 2/4] undo whitespace edits --- nhentai/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nhentai/utils.py b/nhentai/utils.py index c076cf8..dd74e66 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -243,7 +243,7 @@ def format_filename(s): It used to be a whitelist approach allowed only alphabet and a part of symbols. but most doujinshi's names include Japanese 2-byte characters and these was rejected. so it is using blacklist approach now. - if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' '). + if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' '). """ # maybe you can use `--format` to select a suitable filename ban_chars = '\\\'/:,;*?"<>|\t' From d9d2a6fb91378911a259c9a46b922856e11ec072 Mon Sep 17 00:00:00 2001 From: RicterZ Date: Wed, 2 Jun 2021 23:20:56 +0800 Subject: [PATCH 3/4] fix bug of proxy while downloading doujinshi --- nhentai/constant.py | 1 - nhentai/downloader.py | 14 ++++++++------ nhentai/utils.py | 6 +++++- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/nhentai/constant.py b/nhentai/constant.py index ef2edb9..10feab8 100644 --- a/nhentai/constant.py +++ b/nhentai/constant.py @@ -29,7 +29,6 @@ NHENTAI_HOME = os.path.join(os.getenv('HOME', tempfile.gettempdir()), '.nhentai' NHENTAI_HISTORY = os.path.join(NHENTAI_HOME, 'history.sqlite3') NHENTAI_CONFIG_FILE = os.path.join(NHENTAI_HOME, 'config.json') - CONFIG = { 'proxy': {'http': '', 'https': ''}, 'cookie': '', diff --git a/nhentai/downloader.py b/nhentai/downloader.py index dd1e431..f0aa9fd 100644 --- a/nhentai/downloader.py +++ b/nhentai/downloader.py @@ -14,6 +14,7 @@ try: except ImportError: from urlparse import urlparse +from nhentai import constant from nhentai.logger import logger from nhentai.parser import request from nhentai.utils import Singleton @@ -34,7 +35,7 @@ class Downloader(Singleton): self.timeout = timeout self.delay = delay - def download_(self, url, folder='', filename='', retried=0): + def download_(self, url, folder='', filename='', retried=0, proxy=None): if self.delay: time.sleep(self.delay) logger.info('Starting to download {0} ...'.format(url)) @@ -51,7 +52,7 @@ class Downloader(Singleton): i = 0 while i < 10: try: - response = request('get', url, stream=True, timeout=self.timeout) + response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy) if response.status_code != 200: raise NHentaiImageNotExistException @@ -77,7 +78,8 @@ class Downloader(Singleton): except (requests.HTTPError, requests.Timeout) as e: if retried < 3: logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried)) - return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1) + return 0, self.download_(url=url, folder=folder, filename=filename, + retried=retried+1, proxy=proxy) else: return 0, None @@ -128,7 +130,7 @@ class Downloader(Singleton): else: logger.warning('Path \'{0}\' already exist.'.format(folder)) - queue = [(self, url, folder) for url in queue] + queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue] pool = multiprocessing.Pool(self.size, init_worker) [pool.apply_async(download_wrapper, args=item) for item in queue] @@ -137,9 +139,9 @@ class Downloader(Singleton): pool.join() -def download_wrapper(obj, url, folder=''): +def download_wrapper(obj, url, folder='', proxy=None): if sys.platform == 'darwin' or semaphore.get_value(): - return Downloader.download_(obj, url=url, folder=folder) + return Downloader.download_(obj, url=url, folder=folder, proxy=proxy) else: return -3, None diff --git a/nhentai/utils.py b/nhentai/utils.py index 6977297..4519048 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -20,7 +20,11 @@ def request(method, url, **kwargs): 'User-Agent': 'nhentai command line client (https://github.com/RicterZ/nhentai)', 'Cookie': constant.CONFIG['cookie'] }) - return getattr(session, method)(url, proxies=constant.CONFIG['proxy'], verify=False, **kwargs) + + if not kwargs.get('proxies', None): + kwargs['proxies'] = constant.CONFIG['proxy'] + + return getattr(session, method)(url, verify=False, **kwargs) def check_cookie(): From 24be2d37d4a85cf24d550d4166632ca6595b9915 Mon Sep 17 00:00:00 2001 From: RicterZ Date: Wed, 2 Jun 2021 23:22:23 +0800 Subject: [PATCH 4/4] 0.4.16 --- nhentai/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nhentai/__init__.py b/nhentai/__init__.py index e0d4f0a..e0baeec 100644 --- a/nhentai/__init__.py +++ b/nhentai/__init__.py @@ -1,3 +1,3 @@ -__version__ = '0.4.15' +__version__ = '0.4.16' __author__ = 'RicterZ' __email__ = 'ricterzheng@gmail.com'