Merge branch 'dev' into master

This commit is contained in:
Alocks 2019-10-08 15:42:35 -03:00 committed by GitHub
commit 3017fff823
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 103 additions and 63 deletions

View File

@ -16,7 +16,7 @@ script:
- nhentai --cookie "csrftoken=3c4Mzn4f6NAI1awFqfIh495G3pv5Wade9n63Kx03mkSac8c2QR5vRR4jCwVzb3OR; sessionid=m034e2dyyxgbl9s07hbzgfhvadbap2tk"
- nhentai --search umaru
- nhentai --id=152503,146134 -t 10 --output=/tmp/ --cbz
- nhentai --tag lolicon
- nhentai --tag lolicon --sorting popular
- nhentai -F
- nhentai --file /tmp/test.txt
- nhentai --id=152503,146134 --gen-main --output=/tmp/

View File

@ -1,3 +1,3 @@
__version__ = '0.3.6'
__version__ = '0.3.7'
__author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com'

View File

@ -48,7 +48,8 @@ def cmd_parser():
# doujinshi options
parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3')
parser.add_option('--search', '-s', type='string', dest='keyword', action='store', help='search doujinshi by keyword')
parser.add_option('--search', '-s', type='string', dest='keyword', action='store',
help='search doujinshi by keyword')
parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag')
parser.add_option('--artist', type='string', dest='artist', action='store', help='download doujinshi by artist')
parser.add_option('--character', type='string', dest='character', action='store', help='download doujinshi by character')
@ -62,6 +63,8 @@ def cmd_parser():
help='page number of search results')
parser.add_option('--max-page', type='int', dest='max_page', action='store', default=1,
help='The max page when recursive download tagged doujinshi')
parser.add_option('--sorting', dest='sorting', action='store', default='date',
help='sorting of doujinshi (date / popular)', choices=['date', 'popular'])
# download options
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='',

View File

@ -4,14 +4,15 @@ from __future__ import unicode_literals, print_function
import signal
import platform
import time
import multiprocessing
from nhentai.cmdline import cmd_parser, banner
from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, favorites_parser, tag_parser, login
from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader
from nhentai.downloader import Downloader, init_worker
from nhentai.logger import logger
from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_cbz, generate_main_html, check_cookie
from nhentai.utils import generate_html, generate_cbz, generate_main_html, check_cookie, signal_handler
def main():
@ -40,7 +41,7 @@ def main():
doujinshi_ids = map(lambda d: d['id'], doujinshis)
elif options.tag:
doujinshis = tag_parser(options.tag, max_page=options.max_page)
doujinshis = tag_parser(options.tag, sorting=options.sorting, max_page=options.max_page)
print_doujinshi(doujinshis)
if options.is_download and doujinshis:
doujinshi_ids = map(lambda d: d['id'], doujinshis)
@ -70,7 +71,7 @@ def main():
doujinshi_ids = map(lambda d: d['id'], doujinshis)
elif options.keyword:
doujinshis = search_parser(options.keyword, options.page)
doujinshis = search_parser(options.keyword, sorting=options.sorting, page=options.page)
print_doujinshi(doujinshis)
if options.is_download:
doujinshi_ids = map(lambda d: d['id'], doujinshis)
@ -79,25 +80,35 @@ def main():
doujinshi_ids = options.id
if doujinshi_ids:
for id_ in doujinshi_ids:
for i, id_ in enumerate(doujinshi_ids):
if options.delay:
time.sleep(options.delay)
doujinshi_info = doujinshi_parser(id_)
doujinshi_list.append(Doujinshi(name_format=options.name_format, **doujinshi_info))
if doujinshi_info:
doujinshi_list.append(Doujinshi(name_format=options.name_format, **doujinshi_info))
if (i + 1) % 10 == 0:
logger.info('Progress: %d / %d' % (i + 1, len(doujinshi_ids)))
if not options.is_show:
downloader = Downloader(path=options.output_dir,
thread=options.threads, timeout=options.timeout, delay=options.delay)
downloader = Downloader(path=options.output_dir, size=options.threads,
timeout=options.timeout, delay=options.delay)
for doujinshi in doujinshi_list:
doujinshi.downloader = downloader
doujinshi.download()
if not options.is_nohtml and not options.is_cbz:
generate_html(options.output_dir, doujinshi)
elif options.is_cbz:
generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir)
if options.main_viewer:
generate_main_html(options.output_dir)
if not platform.system() == 'Windows':
logger.log(15, '🍻 All done.')
else:
@ -107,12 +118,8 @@ def main():
[doujinshi.show() for doujinshi in doujinshi_list]
def signal_handler(signal, frame):
logger.error('Ctrl-C signal received. Stopping...')
exit(1)
signal.signal(signal.SIGINT, signal_handler)
if __name__ == '__main__':
main()

View File

@ -1,10 +1,15 @@
# coding: utf-
from __future__ import unicode_literals, print_function
import multiprocessing
import signal
from future.builtins import str as text
import os
import requests
import threadpool
import time
import multiprocessing as mp
try:
from urllib.parse import urlparse
@ -13,29 +18,25 @@ except ImportError:
from nhentai.logger import logger
from nhentai.parser import request
from nhentai.utils import Singleton
from nhentai.utils import Singleton, signal_handler
requests.packages.urllib3.disable_warnings()
semaphore = mp.Semaphore()
class NhentaiImageNotExistException(Exception):
class NHentaiImageNotExistException(Exception):
pass
class Downloader(Singleton):
def __init__(self, path='', thread=1, timeout=30, delay=0):
if not isinstance(thread, (int, )) or thread < 1 or thread > 15:
raise ValueError('Invalid threads count')
def __init__(self, path='', size=5, timeout=30, delay=0):
self.size = size
self.path = str(path)
self.thread_count = thread
self.threads = []
self.thread_pool = None
self.timeout = timeout
self.delay = delay
def _download(self, url, folder='', filename='', retried=0):
def download_(self, url, folder='', filename='', retried=0):
if self.delay:
time.sleep(self.delay)
logger.info('Starting to download {0} ...'.format(url))
@ -54,9 +55,9 @@ class Downloader(Singleton):
try:
response = request('get', url, stream=True, timeout=self.timeout)
if response.status_code != 200:
raise NhentaiImageNotExistException
raise NHentaiImageNotExistException
except NhentaiImageNotExistException as e:
except NHentaiImageNotExistException as e:
raise e
except Exception as e:
@ -78,27 +79,37 @@ class Downloader(Singleton):
except (requests.HTTPError, requests.Timeout) as e:
if retried < 3:
logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1)
return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1)
else:
return 0, None
except NhentaiImageNotExistException as e:
except NHentaiImageNotExistException as e:
os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
return -1, url
except Exception as e:
import traceback
traceback.print_stack()
logger.critical(str(e))
return 0, None
except KeyboardInterrupt:
return -3, None
return 1, url
def _download_callback(self, request, result):
def _download_callback(self, result):
result, data = result
if result == 0:
logger.warning('fatal errors occurred, ignored')
# exit(1)
elif result == -1:
logger.warning('url {} return status code 404'.format(data))
elif result == -2:
logger.warning('Ctrl-C pressed, exiting sub processes ...')
elif result == -3:
# workers wont be run, just pass
pass
else:
logger.log(15, '{0} downloaded successfully'.format(data))
@ -115,14 +126,34 @@ class Downloader(Singleton):
os.makedirs(folder)
except EnvironmentError as e:
logger.critical('{0}'.format(str(e)))
exit(1)
else:
logger.warn('Path \'{0}\' already exist.'.format(folder))
queue = [([url], {'folder': folder}) for url in queue]
queue = [(self, url, folder) for url in queue]
self.thread_pool = threadpool.ThreadPool(self.thread_count)
requests_ = threadpool.makeRequests(self._download, queue, self._download_callback)
[self.thread_pool.putRequest(req) for req in requests_]
pool = multiprocessing.Pool(self.size, init_worker)
self.thread_pool.wait()
for item in queue:
pool.apply_async(download_wrapper, args=item, callback=self._download_callback)
pool.close()
pool.join()
def download_wrapper(obj, url, folder=''):
if semaphore.get_value():
return Downloader.download_(obj, url=url, folder=folder)
else:
return -3, None
def init_worker():
signal.signal(signal.SIGINT, subprocess_signal)
def subprocess_signal(signal, frame):
if semaphore.acquire(timeout=1):
logger.warning('Ctrl-C pressed, exiting sub processes ...')
raise KeyboardInterrupt

View File

@ -121,8 +121,8 @@ def doujinshi_parser(id_):
return doujinshi_parser(str(id_))
except Exception as e:
logger.critical(str(e))
raise SystemExit
logger.warn('Error: {}, ignored'.format(str(e)))
return None
html = BeautifulSoup(response, 'html.parser')
doujinshi_info = html.find('div', attrs={'id': 'info'})
@ -169,14 +169,9 @@ def doujinshi_parser(id_):
return doujinshi
def search_parser(keyword, page):
def search_parser(keyword, sorting='date', page=1):
logger.debug('Searching doujinshis of keyword {0}'.format(keyword))
try:
response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page}).content
except requests.ConnectionError as e:
logger.critical(e)
logger.warn('If you are in China, please configure the proxy to fu*k GFW.')
raise SystemExit
response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page, 'sort': sorting}).content
result = _get_title_and_id(response)
if not result:
@ -194,22 +189,19 @@ def print_doujinshi(doujinshi_list):
tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
def tag_parser(tag_name, max_page=1, index=0):
def tag_parser(tag_name, sorting='date', max_page=1):
result = []
tag_name = tag_name.lower()
if ',' in tag_name:
tag_name = [i.strip().replace(' ', '-') for i in tag_name.split(',')]
else: tag_name = tag_name.replace(' ', '-')
tag_name = tag_name.replace(' ', '-')
if sorting == 'date':
sorting = ''
for p in range(1, max_page + 1):
logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, tag_name))
if isinstance(tag_name, str):
response = request('get', url='%s/%s/?page=%d' % (constant.TAG_URL[index], tag_name, p)).content
result += _get_title_and_id(response)
else:
for i in tag_name:
response = request('get', url='%s/%s/?page=%d' % (constant.TAG_URL[index], i, p)).content
result += _get_title_and_id(response)
response = request('get', url='%s/%s/%s?page=%d' % (constant.TAG_URL, tag_name, sorting, p)).content
result += _get_title_and_id(response)
if not result:
logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name))
return
@ -220,13 +212,13 @@ def tag_parser(tag_name, max_page=1, index=0):
return result
def __api_suspended_search_parser(keyword, page):
def __api_suspended_search_parser(keyword, sorting, page):
logger.debug('Searching doujinshis using keywords {0}'.format(keyword))
result = []
i = 0
while i < 5:
try:
response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json()
response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page, 'sort': sorting}).json()
except Exception as e:
i += 1
if not i < 5:
@ -250,10 +242,10 @@ def __api_suspended_search_parser(keyword, page):
return result
def __api_suspended_tag_parser(tag_id, max_page=1):
def __api_suspended_tag_parser(tag_id, sorting, max_page=1):
logger.info('Searching for doujinshi with tag id {0}'.format(tag_id))
result = []
response = request('get', url=constant.TAG_API_URL, params={'sort': 'popular', 'tag_id': tag_id}).json()
response = request('get', url=constant.TAG_API_URL, params={'sort': sorting, 'tag_id': tag_id}).json()
page = max_page if max_page <= response['num_pages'] else int(response['num_pages'])
for i in range(1, page + 1):
@ -261,7 +253,7 @@ def __api_suspended_tag_parser(tag_id, max_page=1):
if page != 1:
response = request('get', url=constant.TAG_API_URL,
params={'sort': 'popular', 'tag_id': tag_id}).json()
params={'sort': sorting, 'tag_id': tag_id}).json()
for row in response['result']:
title = row['title']['english']
title = title[:85] + '..' if len(title) > 85 else title

View File

@ -207,3 +207,10 @@ an invalid filename.
# Remove [] from filename
filename = filename.replace('[]', '')
return filename
def signal_handler(signal, frame):
logger.error('Ctrl-C signal received. Stopping...')
exit(1)