diff --git a/.travis.yml b/.travis.yml index cdb9d4f..63a2663 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,7 +16,7 @@ script: - nhentai --cookie "csrftoken=3c4Mzn4f6NAI1awFqfIh495G3pv5Wade9n63Kx03mkSac8c2QR5vRR4jCwVzb3OR; sessionid=m034e2dyyxgbl9s07hbzgfhvadbap2tk" - nhentai --search umaru - nhentai --id=152503,146134 -t 10 --output=/tmp/ --cbz - - nhentai --tag lolicon + - nhentai --tag lolicon --sorting popular - nhentai -F - nhentai --file /tmp/test.txt - nhentai --id=152503,146134 --gen-main --output=/tmp/ diff --git a/nhentai/__init__.py b/nhentai/__init__.py index 2bc558a..f0d7796 100644 --- a/nhentai/__init__.py +++ b/nhentai/__init__.py @@ -1,3 +1,3 @@ -__version__ = '0.3.6' +__version__ = '0.3.7' __author__ = 'RicterZ' __email__ = 'ricterzheng@gmail.com' diff --git a/nhentai/cmdline.py b/nhentai/cmdline.py index 360ed9a..14b8730 100644 --- a/nhentai/cmdline.py +++ b/nhentai/cmdline.py @@ -48,7 +48,8 @@ def cmd_parser(): # doujinshi options parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') - parser.add_option('--search', '-s', type='string', dest='keyword', action='store', help='search doujinshi by keyword') + parser.add_option('--search', '-s', type='string', dest='keyword', action='store', + help='search doujinshi by keyword') parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag') parser.add_option('--artist', type='string', dest='artist', action='store', help='download doujinshi by artist') parser.add_option('--character', type='string', dest='character', action='store', help='download doujinshi by character') @@ -62,6 +63,8 @@ def cmd_parser(): help='page number of search results') parser.add_option('--max-page', type='int', dest='max_page', action='store', default=1, help='The max page when recursive download tagged doujinshi') + parser.add_option('--sorting', dest='sorting', action='store', default='date', + help='sorting of doujinshi (date / popular)', choices=['date', 'popular']) # download options parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='', diff --git a/nhentai/command.py b/nhentai/command.py index aad850a..f7dc68d 100644 --- a/nhentai/command.py +++ b/nhentai/command.py @@ -4,14 +4,15 @@ from __future__ import unicode_literals, print_function import signal import platform import time +import multiprocessing from nhentai.cmdline import cmd_parser, banner from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, favorites_parser, tag_parser, login from nhentai.doujinshi import Doujinshi -from nhentai.downloader import Downloader +from nhentai.downloader import Downloader, init_worker from nhentai.logger import logger from nhentai.constant import BASE_URL -from nhentai.utils import generate_html, generate_cbz, generate_main_html, check_cookie +from nhentai.utils import generate_html, generate_cbz, generate_main_html, check_cookie, signal_handler def main(): @@ -40,7 +41,7 @@ def main(): doujinshi_ids = map(lambda d: d['id'], doujinshis) elif options.tag: - doujinshis = tag_parser(options.tag, max_page=options.max_page) + doujinshis = tag_parser(options.tag, sorting=options.sorting, max_page=options.max_page) print_doujinshi(doujinshis) if options.is_download and doujinshis: doujinshi_ids = map(lambda d: d['id'], doujinshis) @@ -70,7 +71,7 @@ def main(): doujinshi_ids = map(lambda d: d['id'], doujinshis) elif options.keyword: - doujinshis = search_parser(options.keyword, options.page) + doujinshis = search_parser(options.keyword, sorting=options.sorting, page=options.page) print_doujinshi(doujinshis) if options.is_download: doujinshi_ids = map(lambda d: d['id'], doujinshis) @@ -79,25 +80,35 @@ def main(): doujinshi_ids = options.id if doujinshi_ids: - for id_ in doujinshi_ids: + for i, id_ in enumerate(doujinshi_ids): if options.delay: time.sleep(options.delay) + doujinshi_info = doujinshi_parser(id_) - doujinshi_list.append(Doujinshi(name_format=options.name_format, **doujinshi_info)) + + if doujinshi_info: + doujinshi_list.append(Doujinshi(name_format=options.name_format, **doujinshi_info)) + + if (i + 1) % 10 == 0: + logger.info('Progress: %d / %d' % (i + 1, len(doujinshi_ids))) if not options.is_show: - downloader = Downloader(path=options.output_dir, - thread=options.threads, timeout=options.timeout, delay=options.delay) + downloader = Downloader(path=options.output_dir, size=options.threads, + timeout=options.timeout, delay=options.delay) for doujinshi in doujinshi_list: + doujinshi.downloader = downloader doujinshi.download() + if not options.is_nohtml and not options.is_cbz: generate_html(options.output_dir, doujinshi) elif options.is_cbz: generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir) + if options.main_viewer: generate_main_html(options.output_dir) + if not platform.system() == 'Windows': logger.log(15, '🍻 All done.') else: @@ -107,12 +118,8 @@ def main(): [doujinshi.show() for doujinshi in doujinshi_list] -def signal_handler(signal, frame): - logger.error('Ctrl-C signal received. Stopping...') - exit(1) - - signal.signal(signal.SIGINT, signal_handler) + if __name__ == '__main__': main() diff --git a/nhentai/downloader.py b/nhentai/downloader.py index 81037f8..2d9a0f4 100644 --- a/nhentai/downloader.py +++ b/nhentai/downloader.py @@ -1,10 +1,15 @@ # coding: utf- from __future__ import unicode_literals, print_function + +import multiprocessing +import signal + from future.builtins import str as text import os import requests import threadpool import time +import multiprocessing as mp try: from urllib.parse import urlparse @@ -13,29 +18,25 @@ except ImportError: from nhentai.logger import logger from nhentai.parser import request -from nhentai.utils import Singleton - +from nhentai.utils import Singleton, signal_handler requests.packages.urllib3.disable_warnings() +semaphore = mp.Semaphore() -class NhentaiImageNotExistException(Exception): +class NHentaiImageNotExistException(Exception): pass class Downloader(Singleton): - def __init__(self, path='', thread=1, timeout=30, delay=0): - if not isinstance(thread, (int, )) or thread < 1 or thread > 15: - raise ValueError('Invalid threads count') + def __init__(self, path='', size=5, timeout=30, delay=0): + self.size = size self.path = str(path) - self.thread_count = thread - self.threads = [] - self.thread_pool = None self.timeout = timeout self.delay = delay - def _download(self, url, folder='', filename='', retried=0): + def download_(self, url, folder='', filename='', retried=0): if self.delay: time.sleep(self.delay) logger.info('Starting to download {0} ...'.format(url)) @@ -54,9 +55,9 @@ class Downloader(Singleton): try: response = request('get', url, stream=True, timeout=self.timeout) if response.status_code != 200: - raise NhentaiImageNotExistException + raise NHentaiImageNotExistException - except NhentaiImageNotExistException as e: + except NHentaiImageNotExistException as e: raise e except Exception as e: @@ -78,27 +79,37 @@ class Downloader(Singleton): except (requests.HTTPError, requests.Timeout) as e: if retried < 3: logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried)) - return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1) + return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1) else: return 0, None - except NhentaiImageNotExistException as e: + except NHentaiImageNotExistException as e: os.remove(os.path.join(folder, base_filename.zfill(3) + extension)) return -1, url except Exception as e: + import traceback + traceback.print_stack() logger.critical(str(e)) return 0, None + except KeyboardInterrupt: + return -3, None + return 1, url - def _download_callback(self, request, result): + def _download_callback(self, result): result, data = result if result == 0: logger.warning('fatal errors occurred, ignored') # exit(1) elif result == -1: logger.warning('url {} return status code 404'.format(data)) + elif result == -2: + logger.warning('Ctrl-C pressed, exiting sub processes ...') + elif result == -3: + # workers wont be run, just pass + pass else: logger.log(15, '{0} downloaded successfully'.format(data)) @@ -115,14 +126,34 @@ class Downloader(Singleton): os.makedirs(folder) except EnvironmentError as e: logger.critical('{0}'.format(str(e))) - exit(1) + else: logger.warn('Path \'{0}\' already exist.'.format(folder)) - queue = [([url], {'folder': folder}) for url in queue] + queue = [(self, url, folder) for url in queue] - self.thread_pool = threadpool.ThreadPool(self.thread_count) - requests_ = threadpool.makeRequests(self._download, queue, self._download_callback) - [self.thread_pool.putRequest(req) for req in requests_] + pool = multiprocessing.Pool(self.size, init_worker) - self.thread_pool.wait() + for item in queue: + pool.apply_async(download_wrapper, args=item, callback=self._download_callback) + + pool.close() + pool.join() + + +def download_wrapper(obj, url, folder=''): + if semaphore.get_value(): + return Downloader.download_(obj, url=url, folder=folder) + else: + return -3, None + + +def init_worker(): + signal.signal(signal.SIGINT, subprocess_signal) + + +def subprocess_signal(signal, frame): + if semaphore.acquire(timeout=1): + logger.warning('Ctrl-C pressed, exiting sub processes ...') + + raise KeyboardInterrupt diff --git a/nhentai/parser.py b/nhentai/parser.py index fef71ca..9b9232b 100644 --- a/nhentai/parser.py +++ b/nhentai/parser.py @@ -121,8 +121,8 @@ def doujinshi_parser(id_): return doujinshi_parser(str(id_)) except Exception as e: - logger.critical(str(e)) - raise SystemExit + logger.warn('Error: {}, ignored'.format(str(e))) + return None html = BeautifulSoup(response, 'html.parser') doujinshi_info = html.find('div', attrs={'id': 'info'}) @@ -169,14 +169,9 @@ def doujinshi_parser(id_): return doujinshi -def search_parser(keyword, page): +def search_parser(keyword, sorting='date', page=1): logger.debug('Searching doujinshis of keyword {0}'.format(keyword)) - try: - response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page}).content - except requests.ConnectionError as e: - logger.critical(e) - logger.warn('If you are in China, please configure the proxy to fu*k GFW.') - raise SystemExit + response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page, 'sort': sorting}).content result = _get_title_and_id(response) if not result: @@ -194,22 +189,19 @@ def print_doujinshi(doujinshi_list): tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst')) -def tag_parser(tag_name, max_page=1, index=0): +def tag_parser(tag_name, sorting='date', max_page=1): result = [] tag_name = tag_name.lower() - if ',' in tag_name: - tag_name = [i.strip().replace(' ', '-') for i in tag_name.split(',')] - else: tag_name = tag_name.replace(' ', '-') - + tag_name = tag_name.replace(' ', '-') + + if sorting == 'date': + sorting = '' + for p in range(1, max_page + 1): logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, tag_name)) - if isinstance(tag_name, str): - response = request('get', url='%s/%s/?page=%d' % (constant.TAG_URL[index], tag_name, p)).content - result += _get_title_and_id(response) - else: - for i in tag_name: - response = request('get', url='%s/%s/?page=%d' % (constant.TAG_URL[index], i, p)).content - result += _get_title_and_id(response) + response = request('get', url='%s/%s/%s?page=%d' % (constant.TAG_URL, tag_name, sorting, p)).content + + result += _get_title_and_id(response) if not result: logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) return @@ -220,13 +212,13 @@ def tag_parser(tag_name, max_page=1, index=0): return result -def __api_suspended_search_parser(keyword, page): +def __api_suspended_search_parser(keyword, sorting, page): logger.debug('Searching doujinshis using keywords {0}'.format(keyword)) result = [] i = 0 while i < 5: try: - response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json() + response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page, 'sort': sorting}).json() except Exception as e: i += 1 if not i < 5: @@ -250,10 +242,10 @@ def __api_suspended_search_parser(keyword, page): return result -def __api_suspended_tag_parser(tag_id, max_page=1): +def __api_suspended_tag_parser(tag_id, sorting, max_page=1): logger.info('Searching for doujinshi with tag id {0}'.format(tag_id)) result = [] - response = request('get', url=constant.TAG_API_URL, params={'sort': 'popular', 'tag_id': tag_id}).json() + response = request('get', url=constant.TAG_API_URL, params={'sort': sorting, 'tag_id': tag_id}).json() page = max_page if max_page <= response['num_pages'] else int(response['num_pages']) for i in range(1, page + 1): @@ -261,7 +253,7 @@ def __api_suspended_tag_parser(tag_id, max_page=1): if page != 1: response = request('get', url=constant.TAG_API_URL, - params={'sort': 'popular', 'tag_id': tag_id}).json() + params={'sort': sorting, 'tag_id': tag_id}).json() for row in response['result']: title = row['title']['english'] title = title[:85] + '..' if len(title) > 85 else title diff --git a/nhentai/utils.py b/nhentai/utils.py index 6a0d00b..815c690 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -207,3 +207,10 @@ an invalid filename. # Remove [] from filename filename = filename.replace('[]', '') return filename + + +def signal_handler(signal, frame): + logger.error('Ctrl-C signal received. Stopping...') + exit(1) + +