Merge branch 'dev' into master

This commit is contained in:
Alocks 2019-10-08 15:42:35 -03:00 committed by GitHub
commit 3017fff823
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 103 additions and 63 deletions

View File

@ -16,7 +16,7 @@ script:
- nhentai --cookie "csrftoken=3c4Mzn4f6NAI1awFqfIh495G3pv5Wade9n63Kx03mkSac8c2QR5vRR4jCwVzb3OR; sessionid=m034e2dyyxgbl9s07hbzgfhvadbap2tk" - nhentai --cookie "csrftoken=3c4Mzn4f6NAI1awFqfIh495G3pv5Wade9n63Kx03mkSac8c2QR5vRR4jCwVzb3OR; sessionid=m034e2dyyxgbl9s07hbzgfhvadbap2tk"
- nhentai --search umaru - nhentai --search umaru
- nhentai --id=152503,146134 -t 10 --output=/tmp/ --cbz - nhentai --id=152503,146134 -t 10 --output=/tmp/ --cbz
- nhentai --tag lolicon - nhentai --tag lolicon --sorting popular
- nhentai -F - nhentai -F
- nhentai --file /tmp/test.txt - nhentai --file /tmp/test.txt
- nhentai --id=152503,146134 --gen-main --output=/tmp/ - nhentai --id=152503,146134 --gen-main --output=/tmp/

View File

@ -1,3 +1,3 @@
__version__ = '0.3.6' __version__ = '0.3.7'
__author__ = 'RicterZ' __author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com' __email__ = 'ricterzheng@gmail.com'

View File

@ -48,7 +48,8 @@ def cmd_parser():
# doujinshi options # doujinshi options
parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3')
parser.add_option('--search', '-s', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--search', '-s', type='string', dest='keyword', action='store',
help='search doujinshi by keyword')
parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag') parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag')
parser.add_option('--artist', type='string', dest='artist', action='store', help='download doujinshi by artist') parser.add_option('--artist', type='string', dest='artist', action='store', help='download doujinshi by artist')
parser.add_option('--character', type='string', dest='character', action='store', help='download doujinshi by character') parser.add_option('--character', type='string', dest='character', action='store', help='download doujinshi by character')
@ -62,6 +63,8 @@ def cmd_parser():
help='page number of search results') help='page number of search results')
parser.add_option('--max-page', type='int', dest='max_page', action='store', default=1, parser.add_option('--max-page', type='int', dest='max_page', action='store', default=1,
help='The max page when recursive download tagged doujinshi') help='The max page when recursive download tagged doujinshi')
parser.add_option('--sorting', dest='sorting', action='store', default='date',
help='sorting of doujinshi (date / popular)', choices=['date', 'popular'])
# download options # download options
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='', parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='',

View File

@ -4,14 +4,15 @@ from __future__ import unicode_literals, print_function
import signal import signal
import platform import platform
import time import time
import multiprocessing
from nhentai.cmdline import cmd_parser, banner from nhentai.cmdline import cmd_parser, banner
from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, favorites_parser, tag_parser, login from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, favorites_parser, tag_parser, login
from nhentai.doujinshi import Doujinshi from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader from nhentai.downloader import Downloader, init_worker
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import BASE_URL from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_cbz, generate_main_html, check_cookie from nhentai.utils import generate_html, generate_cbz, generate_main_html, check_cookie, signal_handler
def main(): def main():
@ -40,7 +41,7 @@ def main():
doujinshi_ids = map(lambda d: d['id'], doujinshis) doujinshi_ids = map(lambda d: d['id'], doujinshis)
elif options.tag: elif options.tag:
doujinshis = tag_parser(options.tag, max_page=options.max_page) doujinshis = tag_parser(options.tag, sorting=options.sorting, max_page=options.max_page)
print_doujinshi(doujinshis) print_doujinshi(doujinshis)
if options.is_download and doujinshis: if options.is_download and doujinshis:
doujinshi_ids = map(lambda d: d['id'], doujinshis) doujinshi_ids = map(lambda d: d['id'], doujinshis)
@ -70,7 +71,7 @@ def main():
doujinshi_ids = map(lambda d: d['id'], doujinshis) doujinshi_ids = map(lambda d: d['id'], doujinshis)
elif options.keyword: elif options.keyword:
doujinshis = search_parser(options.keyword, options.page) doujinshis = search_parser(options.keyword, sorting=options.sorting, page=options.page)
print_doujinshi(doujinshis) print_doujinshi(doujinshis)
if options.is_download: if options.is_download:
doujinshi_ids = map(lambda d: d['id'], doujinshis) doujinshi_ids = map(lambda d: d['id'], doujinshis)
@ -79,25 +80,35 @@ def main():
doujinshi_ids = options.id doujinshi_ids = options.id
if doujinshi_ids: if doujinshi_ids:
for id_ in doujinshi_ids: for i, id_ in enumerate(doujinshi_ids):
if options.delay: if options.delay:
time.sleep(options.delay) time.sleep(options.delay)
doujinshi_info = doujinshi_parser(id_) doujinshi_info = doujinshi_parser(id_)
if doujinshi_info:
doujinshi_list.append(Doujinshi(name_format=options.name_format, **doujinshi_info)) doujinshi_list.append(Doujinshi(name_format=options.name_format, **doujinshi_info))
if (i + 1) % 10 == 0:
logger.info('Progress: %d / %d' % (i + 1, len(doujinshi_ids)))
if not options.is_show: if not options.is_show:
downloader = Downloader(path=options.output_dir, downloader = Downloader(path=options.output_dir, size=options.threads,
thread=options.threads, timeout=options.timeout, delay=options.delay) timeout=options.timeout, delay=options.delay)
for doujinshi in doujinshi_list: for doujinshi in doujinshi_list:
doujinshi.downloader = downloader doujinshi.downloader = downloader
doujinshi.download() doujinshi.download()
if not options.is_nohtml and not options.is_cbz: if not options.is_nohtml and not options.is_cbz:
generate_html(options.output_dir, doujinshi) generate_html(options.output_dir, doujinshi)
elif options.is_cbz: elif options.is_cbz:
generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir) generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir)
if options.main_viewer: if options.main_viewer:
generate_main_html(options.output_dir) generate_main_html(options.output_dir)
if not platform.system() == 'Windows': if not platform.system() == 'Windows':
logger.log(15, '🍻 All done.') logger.log(15, '🍻 All done.')
else: else:
@ -107,12 +118,8 @@ def main():
[doujinshi.show() for doujinshi in doujinshi_list] [doujinshi.show() for doujinshi in doujinshi_list]
def signal_handler(signal, frame):
logger.error('Ctrl-C signal received. Stopping...')
exit(1)
signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGINT, signal_handler)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -1,10 +1,15 @@
# coding: utf- # coding: utf-
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function
import multiprocessing
import signal
from future.builtins import str as text from future.builtins import str as text
import os import os
import requests import requests
import threadpool import threadpool
import time import time
import multiprocessing as mp
try: try:
from urllib.parse import urlparse from urllib.parse import urlparse
@ -13,29 +18,25 @@ except ImportError:
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.parser import request from nhentai.parser import request
from nhentai.utils import Singleton from nhentai.utils import Singleton, signal_handler
requests.packages.urllib3.disable_warnings() requests.packages.urllib3.disable_warnings()
semaphore = mp.Semaphore()
class NhentaiImageNotExistException(Exception): class NHentaiImageNotExistException(Exception):
pass pass
class Downloader(Singleton): class Downloader(Singleton):
def __init__(self, path='', thread=1, timeout=30, delay=0): def __init__(self, path='', size=5, timeout=30, delay=0):
if not isinstance(thread, (int, )) or thread < 1 or thread > 15: self.size = size
raise ValueError('Invalid threads count')
self.path = str(path) self.path = str(path)
self.thread_count = thread
self.threads = []
self.thread_pool = None
self.timeout = timeout self.timeout = timeout
self.delay = delay self.delay = delay
def _download(self, url, folder='', filename='', retried=0): def download_(self, url, folder='', filename='', retried=0):
if self.delay: if self.delay:
time.sleep(self.delay) time.sleep(self.delay)
logger.info('Starting to download {0} ...'.format(url)) logger.info('Starting to download {0} ...'.format(url))
@ -54,9 +55,9 @@ class Downloader(Singleton):
try: try:
response = request('get', url, stream=True, timeout=self.timeout) response = request('get', url, stream=True, timeout=self.timeout)
if response.status_code != 200: if response.status_code != 200:
raise NhentaiImageNotExistException raise NHentaiImageNotExistException
except NhentaiImageNotExistException as e: except NHentaiImageNotExistException as e:
raise e raise e
except Exception as e: except Exception as e:
@ -78,27 +79,37 @@ class Downloader(Singleton):
except (requests.HTTPError, requests.Timeout) as e: except (requests.HTTPError, requests.Timeout) as e:
if retried < 3: if retried < 3:
logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried)) logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1) return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1)
else: else:
return 0, None return 0, None
except NhentaiImageNotExistException as e: except NHentaiImageNotExistException as e:
os.remove(os.path.join(folder, base_filename.zfill(3) + extension)) os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
return -1, url return -1, url
except Exception as e: except Exception as e:
import traceback
traceback.print_stack()
logger.critical(str(e)) logger.critical(str(e))
return 0, None return 0, None
except KeyboardInterrupt:
return -3, None
return 1, url return 1, url
def _download_callback(self, request, result): def _download_callback(self, result):
result, data = result result, data = result
if result == 0: if result == 0:
logger.warning('fatal errors occurred, ignored') logger.warning('fatal errors occurred, ignored')
# exit(1) # exit(1)
elif result == -1: elif result == -1:
logger.warning('url {} return status code 404'.format(data)) logger.warning('url {} return status code 404'.format(data))
elif result == -2:
logger.warning('Ctrl-C pressed, exiting sub processes ...')
elif result == -3:
# workers wont be run, just pass
pass
else: else:
logger.log(15, '{0} downloaded successfully'.format(data)) logger.log(15, '{0} downloaded successfully'.format(data))
@ -115,14 +126,34 @@ class Downloader(Singleton):
os.makedirs(folder) os.makedirs(folder)
except EnvironmentError as e: except EnvironmentError as e:
logger.critical('{0}'.format(str(e))) logger.critical('{0}'.format(str(e)))
exit(1)
else: else:
logger.warn('Path \'{0}\' already exist.'.format(folder)) logger.warn('Path \'{0}\' already exist.'.format(folder))
queue = [([url], {'folder': folder}) for url in queue] queue = [(self, url, folder) for url in queue]
self.thread_pool = threadpool.ThreadPool(self.thread_count) pool = multiprocessing.Pool(self.size, init_worker)
requests_ = threadpool.makeRequests(self._download, queue, self._download_callback)
[self.thread_pool.putRequest(req) for req in requests_]
self.thread_pool.wait() for item in queue:
pool.apply_async(download_wrapper, args=item, callback=self._download_callback)
pool.close()
pool.join()
def download_wrapper(obj, url, folder=''):
if semaphore.get_value():
return Downloader.download_(obj, url=url, folder=folder)
else:
return -3, None
def init_worker():
signal.signal(signal.SIGINT, subprocess_signal)
def subprocess_signal(signal, frame):
if semaphore.acquire(timeout=1):
logger.warning('Ctrl-C pressed, exiting sub processes ...')
raise KeyboardInterrupt

View File

@ -121,8 +121,8 @@ def doujinshi_parser(id_):
return doujinshi_parser(str(id_)) return doujinshi_parser(str(id_))
except Exception as e: except Exception as e:
logger.critical(str(e)) logger.warn('Error: {}, ignored'.format(str(e)))
raise SystemExit return None
html = BeautifulSoup(response, 'html.parser') html = BeautifulSoup(response, 'html.parser')
doujinshi_info = html.find('div', attrs={'id': 'info'}) doujinshi_info = html.find('div', attrs={'id': 'info'})
@ -169,14 +169,9 @@ def doujinshi_parser(id_):
return doujinshi return doujinshi
def search_parser(keyword, page): def search_parser(keyword, sorting='date', page=1):
logger.debug('Searching doujinshis of keyword {0}'.format(keyword)) logger.debug('Searching doujinshis of keyword {0}'.format(keyword))
try: response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page, 'sort': sorting}).content
response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page}).content
except requests.ConnectionError as e:
logger.critical(e)
logger.warn('If you are in China, please configure the proxy to fu*k GFW.')
raise SystemExit
result = _get_title_and_id(response) result = _get_title_and_id(response)
if not result: if not result:
@ -194,21 +189,18 @@ def print_doujinshi(doujinshi_list):
tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst')) tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
def tag_parser(tag_name, max_page=1, index=0): def tag_parser(tag_name, sorting='date', max_page=1):
result = [] result = []
tag_name = tag_name.lower() tag_name = tag_name.lower()
if ',' in tag_name: tag_name = tag_name.replace(' ', '-')
tag_name = [i.strip().replace(' ', '-') for i in tag_name.split(',')]
else: tag_name = tag_name.replace(' ', '-') if sorting == 'date':
sorting = ''
for p in range(1, max_page + 1): for p in range(1, max_page + 1):
logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, tag_name)) logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, tag_name))
if isinstance(tag_name, str): response = request('get', url='%s/%s/%s?page=%d' % (constant.TAG_URL, tag_name, sorting, p)).content
response = request('get', url='%s/%s/?page=%d' % (constant.TAG_URL[index], tag_name, p)).content
result += _get_title_and_id(response)
else:
for i in tag_name:
response = request('get', url='%s/%s/?page=%d' % (constant.TAG_URL[index], i, p)).content
result += _get_title_and_id(response) result += _get_title_and_id(response)
if not result: if not result:
logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name))
@ -220,13 +212,13 @@ def tag_parser(tag_name, max_page=1, index=0):
return result return result
def __api_suspended_search_parser(keyword, page): def __api_suspended_search_parser(keyword, sorting, page):
logger.debug('Searching doujinshis using keywords {0}'.format(keyword)) logger.debug('Searching doujinshis using keywords {0}'.format(keyword))
result = [] result = []
i = 0 i = 0
while i < 5: while i < 5:
try: try:
response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json() response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page, 'sort': sorting}).json()
except Exception as e: except Exception as e:
i += 1 i += 1
if not i < 5: if not i < 5:
@ -250,10 +242,10 @@ def __api_suspended_search_parser(keyword, page):
return result return result
def __api_suspended_tag_parser(tag_id, max_page=1): def __api_suspended_tag_parser(tag_id, sorting, max_page=1):
logger.info('Searching for doujinshi with tag id {0}'.format(tag_id)) logger.info('Searching for doujinshi with tag id {0}'.format(tag_id))
result = [] result = []
response = request('get', url=constant.TAG_API_URL, params={'sort': 'popular', 'tag_id': tag_id}).json() response = request('get', url=constant.TAG_API_URL, params={'sort': sorting, 'tag_id': tag_id}).json()
page = max_page if max_page <= response['num_pages'] else int(response['num_pages']) page = max_page if max_page <= response['num_pages'] else int(response['num_pages'])
for i in range(1, page + 1): for i in range(1, page + 1):
@ -261,7 +253,7 @@ def __api_suspended_tag_parser(tag_id, max_page=1):
if page != 1: if page != 1:
response = request('get', url=constant.TAG_API_URL, response = request('get', url=constant.TAG_API_URL,
params={'sort': 'popular', 'tag_id': tag_id}).json() params={'sort': sorting, 'tag_id': tag_id}).json()
for row in response['result']: for row in response['result']:
title = row['title']['english'] title = row['title']['english']
title = title[:85] + '..' if len(title) > 85 else title title = title[:85] + '..' if len(title) > 85 else title

View File

@ -207,3 +207,10 @@ an invalid filename.
# Remove [] from filename # Remove [] from filename
filename = filename.replace('[]', '') filename = filename.replace('[]', '')
return filename return filename
def signal_handler(signal, frame):
logger.error('Ctrl-C signal received. Stopping...')
exit(1)