Merge remote-tracking branch 'upstream/master' into dev

This commit is contained in:
symant233 2019-12-11 11:02:43 +08:00
commit ab2dff4859
9 changed files with 193 additions and 70 deletions

View File

@ -16,7 +16,7 @@ script:
- nhentai --cookie "csrftoken=3c4Mzn4f6NAI1awFqfIh495G3pv5Wade9n63Kx03mkSac8c2QR5vRR4jCwVzb3OR; sessionid=m034e2dyyxgbl9s07hbzgfhvadbap2tk"
- nhentai --search umaru
- nhentai --id=152503,146134 -t 10 --output=/tmp/ --cbz
- nhentai --tag lolicon
- nhentai --tag lolicon --sorting popular
- nhentai -F
- nhentai --file /tmp/test.txt
- nhentai --id=152503,146134 --gen-main --output=/tmp/

View File

@ -74,6 +74,36 @@ Download by tag name:
nhentai --tag lolicon --download --page=2
Download by artist name:
.. code-block:: bash
nhentai --artist henreader --download
Download by character name:
.. code-block:: bash
nhentai --character kuro von einsbern --download
Download by parody name:
.. code-block:: bash
nhentai --parody the idolmaster --download
Download by group name:
.. code-block:: bash
nhentai --group clesta --download
Download using multiple tags (--tag, --character, --paordy and --group supported):
.. code-block:: bash
nhentai --tag lolicon, teasing --artist tamano kedama, atte nanakusa
Download your favorites with delay:
.. code-block:: bash
@ -132,6 +162,7 @@ Other options:
# Generating options
--html generate a html viewer at current directory
--no-html don't generate HTML after downloading
--gen-main generate a main viewer contain all the doujin in the folder
-C, --cbz generate Comic Book CBZ File
--rm-origin-dir remove downloaded doujinshi dir when generated CBZ
file.

View File

@ -1,3 +1,3 @@
__version__ = '0.3.6'
__version__ = '0.3.7'
__author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com'

View File

@ -48,8 +48,13 @@ def cmd_parser():
# doujinshi options
parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3')
parser.add_option('--search', '-s', type='string', dest='keyword', action='store', help='search doujinshi by keyword')
parser.add_option('--search', '-s', type='string', dest='keyword', action='store',
help='search doujinshi by keyword')
parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag')
parser.add_option('--artist', type='string', dest='artist', action='store', help='download doujinshi by artist')
parser.add_option('--character', type='string', dest='character', action='store', help='download doujinshi by character')
parser.add_option('--parody', type='string', dest='parody', action='store', help='download doujinshi by parody')
parser.add_option('--group', type='string', dest='group', action='store', help='download doujinshi by group')
parser.add_option('--favorites', '-F', action='store_true', dest='favorites',
help='list or download your favorites.')
@ -58,6 +63,8 @@ def cmd_parser():
help='page number of search results')
parser.add_option('--max-page', type='int', dest='max_page', action='store', default=1,
help='The max page when recursive download tagged doujinshi')
parser.add_option('--sorting', dest='sorting', action='store', default='date',
help='sorting of doujinshi (date / popular)', choices=['date', 'popular'])
# download options
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='',
@ -91,7 +98,8 @@ def cmd_parser():
help='set cookie of nhentai to bypass Google recaptcha')
try:
sys.argv = list(map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv))
sys.argv = [unicode(i.decode(sys.stdin.encoding)) for i in sys.argv]
print()
except (NameError, TypeError):
pass
except UnicodeDecodeError:
@ -104,7 +112,8 @@ def cmd_parser():
exit(0)
if args.main_viewer and not args.id and not args.keyword and \
not args.tag and not args.favorites:
not args.tag and not args.artist and not args.character and \
not args.parody and not args.group and not args.favorites:
generate_main_html()
exit(0)
@ -155,21 +164,23 @@ def cmd_parser():
exit(1)
if args.id:
_ = map(lambda id_: id_.strip(), args.id.split(','))
args.id = set(map(int, filter(lambda id_: id_.isdigit(), _)))
_ = [i.strip() for i in args.id.split(',')]
args.id = set(int(i) for i in _ if i.isdigit())
if args.file:
with open(args.file, 'r') as f:
_ = map(lambda id: id.strip(), f.readlines())
args.id = set(map(int, filter(lambda id_: id_.isdigit(), _)))
_ = [i.strip() for i in f.readlines()]
args.id = set(int(i) for i in _ if i.isdigit())
if (args.is_download or args.is_show) and not args.id and not args.keyword and \
not args.tag and not args.favorites:
not args.tag and not args.artist and not args.character and \
not args.parody and not args.group and not args.favorites:
logger.critical('Doujinshi id(s) are required for downloading')
parser.print_help()
exit(1)
if not args.keyword and not args.id and not args.tag and not args.favorites:
if not args.keyword and not args.id and not args.tag and not args.artist and \
not args.character and not args.parody and not args.group and not args.favorites:
parser.print_help()
exit(1)

View File

@ -4,14 +4,15 @@ from __future__ import unicode_literals, print_function
import signal
import platform
import time
import multiprocessing
from nhentai.cmdline import cmd_parser, banner
from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, favorites_parser, tag_parser, login
from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader
from nhentai.downloader import Downloader, init_worker
from nhentai.logger import logger
from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_cbz, generate_main_html, check_cookie
from nhentai.utils import generate_html, generate_cbz, generate_main_html, check_cookie, signal_handler
def main():
@ -19,7 +20,7 @@ def main():
options = cmd_parser()
logger.info('Using mirror: {0}'.format(BASE_URL))
from nhentai.constant import PROXY
from nhentai.constant import PROXY
# constant.PROXY will be changed after cmd_parser()
if PROXY != {}:
logger.info('Using proxy: {0}'.format(PROXY))
@ -37,43 +38,77 @@ def main():
doujinshis = favorites_parser()
print_doujinshi(doujinshis)
if options.is_download and doujinshis:
doujinshi_ids = map(lambda d: d['id'], doujinshis)
doujinshi_ids = [i['id'] for i in doujinshis]
elif options.tag:
doujinshis = tag_parser(options.tag, max_page=options.max_page)
doujinshis = tag_parser(options.tag, sorting=options.sorting, max_page=options.max_page)
print_doujinshi(doujinshis)
if options.is_download and doujinshis:
doujinshi_ids = map(lambda d: d['id'], doujinshis)
doujinshi_ids = [i['id'] for i in doujinshis]
elif options.artist:
doujinshis = tag_parser(options.artist, max_page=options.max_page, index=1)
print_doujinshi(doujinshis)
if options.is_download and doujinshis:
doujinshi_ids = [i['id'] for i in doujinshis]
elif options.character:
doujinshis = tag_parser(options.character, max_page=options.max_page, index=2)
print_doujinshi(doujinshis)
if options.is_download and doujinshis:
doujinshi_ids = [i['id'] for i in doujinshis]
elif options.parody:
doujinshis = tag_parser(options.parody, max_page=options.max_page, index=3)
print_doujinshi(doujinshis)
if options.is_download and doujinshis:
doujinshi_ids = [i['id'] for i in doujinshis]
elif options.group:
doujinshis = tag_parser(options.group, max_page=options.max_page, index=4)
print_doujinshi(doujinshis)
if options.is_download and doujinshis:
doujinshi_ids = [i['id'] for i in doujinshis]
elif options.keyword:
doujinshis = search_parser(options.keyword, options.page)
doujinshis = search_parser(options.keyword, sorting=options.sorting, page=options.page)
print_doujinshi(doujinshis)
if options.is_download:
doujinshi_ids = map(lambda d: d['id'], doujinshis)
doujinshi_ids = [i['id'] for i in doujinshis]
elif not doujinshi_ids:
doujinshi_ids = options.id
if doujinshi_ids:
for id_ in doujinshi_ids:
for i, id_ in enumerate(doujinshi_ids):
if options.delay:
time.sleep(options.delay)
doujinshi_info = doujinshi_parser(id_)
doujinshi_list.append(Doujinshi(name_format=options.name_format, **doujinshi_info))
if doujinshi_info:
doujinshi_list.append(Doujinshi(name_format=options.name_format, **doujinshi_info))
if (i + 1) % 10 == 0:
logger.info('Progress: %d / %d' % (i + 1, len(doujinshi_ids)))
if not options.is_show:
downloader = Downloader(path=options.output_dir,
thread=options.threads, timeout=options.timeout, delay=options.delay)
downloader = Downloader(path=options.output_dir, size=options.threads,
timeout=options.timeout, delay=options.delay)
for doujinshi in doujinshi_list:
doujinshi.downloader = downloader
doujinshi.download()
if not options.is_nohtml and not options.is_cbz:
generate_html(options.output_dir, doujinshi)
elif options.is_cbz:
generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir)
if options.main_viewer:
generate_main_html(options.output_dir)
if not platform.system() == 'Windows':
logger.log(15, '🍻 All done.')
else:
@ -83,12 +118,8 @@ def main():
[doujinshi.show() for doujinshi in doujinshi_list]
def signal_handler(signal, frame):
logger.error('Ctrl-C signal received. Stopping...')
exit(1)
signal.signal(signal.SIGINT, signal_handler)
if __name__ == '__main__':
main()

View File

@ -17,7 +17,12 @@ __api_suspended_SEARCH_URL = '%s/api/galleries/search' % BASE_URL
DETAIL_URL = '%s/g' % BASE_URL
SEARCH_URL = '%s/search/' % BASE_URL
TAG_URL = '%s/tag' % BASE_URL
TAG_URL = ['%s/tag' % BASE_URL,
'%s/artist' % BASE_URL,
'%s/character' % BASE_URL,
'%s/parody' % BASE_URL,
'%s/group' % BASE_URL]
TAG_API_URL = '%s/api/galleries/tagged' % BASE_URL
LOGIN_URL = '%s/login/' % BASE_URL
CHALLENGE_URL = '%s/challenge' % BASE_URL

View File

@ -1,10 +1,15 @@
# coding: utf-
from __future__ import unicode_literals, print_function
import multiprocessing
import signal
from future.builtins import str as text
import os
import requests
import threadpool
import time
import multiprocessing as mp
try:
from urllib.parse import urlparse
@ -13,29 +18,25 @@ except ImportError:
from nhentai.logger import logger
from nhentai.parser import request
from nhentai.utils import Singleton
from nhentai.utils import Singleton, signal_handler
requests.packages.urllib3.disable_warnings()
semaphore = mp.Semaphore()
class NhentaiImageNotExistException(Exception):
class NHentaiImageNotExistException(Exception):
pass
class Downloader(Singleton):
def __init__(self, path='', thread=1, timeout=30, delay=0):
if not isinstance(thread, (int, )) or thread < 1 or thread > 15:
raise ValueError('Invalid threads count')
def __init__(self, path='', size=5, timeout=30, delay=0):
self.size = size
self.path = str(path)
self.thread_count = thread
self.threads = []
self.thread_pool = None
self.timeout = timeout
self.delay = delay
def _download(self, url, folder='', filename='', retried=0):
def download_(self, url, folder='', filename='', retried=0):
if self.delay:
time.sleep(self.delay)
logger.info('Starting to download {0} ...'.format(url))
@ -54,9 +55,9 @@ class Downloader(Singleton):
try:
response = request('get', url, stream=True, timeout=self.timeout)
if response.status_code != 200:
raise NhentaiImageNotExistException
raise NHentaiImageNotExistException
except NhentaiImageNotExistException as e:
except NHentaiImageNotExistException as e:
raise e
except Exception as e:
@ -78,27 +79,37 @@ class Downloader(Singleton):
except (requests.HTTPError, requests.Timeout) as e:
if retried < 3:
logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1)
return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1)
else:
return 0, None
except NhentaiImageNotExistException as e:
except NHentaiImageNotExistException as e:
os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
return -1, url
except Exception as e:
import traceback
traceback.print_stack()
logger.critical(str(e))
return 0, None
except KeyboardInterrupt:
return -3, None
return 1, url
def _download_callback(self, request, result):
def _download_callback(self, result):
result, data = result
if result == 0:
logger.warning('fatal errors occurred, ignored')
# exit(1)
elif result == -1:
logger.warning('url {} return status code 404'.format(data))
elif result == -2:
logger.warning('Ctrl-C pressed, exiting sub processes ...')
elif result == -3:
# workers wont be run, just pass
pass
else:
logger.log(15, '{0} downloaded successfully'.format(data))
@ -115,14 +126,34 @@ class Downloader(Singleton):
os.makedirs(folder)
except EnvironmentError as e:
logger.critical('{0}'.format(str(e)))
exit(1)
else:
logger.warn('Path \'{0}\' already exist.'.format(folder))
queue = [([url], {'folder': folder}) for url in queue]
queue = [(self, url, folder) for url in queue]
self.thread_pool = threadpool.ThreadPool(self.thread_count)
requests_ = threadpool.makeRequests(self._download, queue, self._download_callback)
[self.thread_pool.putRequest(req) for req in requests_]
pool = multiprocessing.Pool(self.size, init_worker)
self.thread_pool.wait()
for item in queue:
pool.apply_async(download_wrapper, args=item, callback=self._download_callback)
pool.close()
pool.join()
def download_wrapper(obj, url, folder=''):
if semaphore.get_value():
return Downloader.download_(obj, url=url, folder=folder)
else:
return -3, None
def init_worker():
signal.signal(signal.SIGINT, subprocess_signal)
def subprocess_signal(signal, frame):
if semaphore.acquire(timeout=1):
logger.warning('Ctrl-C pressed, exiting sub processes ...')
raise KeyboardInterrupt

View File

@ -121,8 +121,8 @@ def doujinshi_parser(id_):
return doujinshi_parser(str(id_))
except Exception as e:
logger.critical(str(e))
raise SystemExit
logger.warn('Error: {}, ignored'.format(str(e)))
return None
html = BeautifulSoup(response, 'html.parser')
doujinshi_info = html.find('div', attrs={'id': 'info'})
@ -169,14 +169,9 @@ def doujinshi_parser(id_):
return doujinshi
def search_parser(keyword, page):
def search_parser(keyword, sorting='date', page=1):
logger.debug('Searching doujinshis of keyword {0}'.format(keyword))
try:
response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page}).content
except requests.ConnectionError as e:
logger.critical(e)
logger.warn('If you are in China, please configure the proxy to fu*k GFW.')
raise SystemExit
response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page, 'sort': sorting}).content
result = _get_title_and_id(response)
if not result:
@ -194,16 +189,28 @@ def print_doujinshi(doujinshi_list):
tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
def tag_parser(tag_name, max_page=1):
def tag_parser(tag_name, sorting='date', max_page=1, index=0):
result = []
tag_name = tag_name.lower()
tag_name = tag_name.replace(' ', '-')
if ',' in tag_name:
tag_name = [i.strip().replace(' ', '-') for i in tag_name.split(',')]
else:
tag_name = tag_name.replace(' ', '-')
if sorting == 'date':
sorting = ''
for p in range(1, max_page + 1):
logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, tag_name))
response = request('get', url='%s/%s/?page=%d' % (constant.TAG_URL, tag_name, p)).content
if isinstance(tag_name, str):
logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, tag_name))
response = request('get', url='%s/%s/%s?page=%d' % (constant.TAG_URL[index], tag_name, sorting, p)).content
result += _get_title_and_id(response)
else:
for i in tag_name:
logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, i))
response = request('get',
url='%s/%s/%s?page=%d' % (constant.TAG_URL[index], i, sorting, p)).content
result += _get_title_and_id(response)
result += _get_title_and_id(response)
if not result:
logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name))
return
@ -214,13 +221,13 @@ def tag_parser(tag_name, max_page=1):
return result
def __api_suspended_search_parser(keyword, page):
def __api_suspended_search_parser(keyword, sorting, page):
logger.debug('Searching doujinshis using keywords {0}'.format(keyword))
result = []
i = 0
while i < 5:
try:
response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json()
response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page, 'sort': sorting}).json()
except Exception as e:
i += 1
if not i < 5:
@ -244,10 +251,10 @@ def __api_suspended_search_parser(keyword, page):
return result
def __api_suspended_tag_parser(tag_id, max_page=1):
def __api_suspended_tag_parser(tag_id, sorting, max_page=1):
logger.info('Searching for doujinshi with tag id {0}'.format(tag_id))
result = []
response = request('get', url=constant.TAG_API_URL, params={'sort': 'popular', 'tag_id': tag_id}).json()
response = request('get', url=constant.TAG_API_URL, params={'sort': sorting, 'tag_id': tag_id}).json()
page = max_page if max_page <= response['num_pages'] else int(response['num_pages'])
for i in range(1, page + 1):
@ -255,7 +262,7 @@ def __api_suspended_tag_parser(tag_id, max_page=1):
if page != 1:
response = request('get', url=constant.TAG_API_URL,
params={'sort': 'popular', 'tag_id': tag_id}).json()
params={'sort': sorting, 'tag_id': tag_id}).json()
for row in response['result']:
title = row['title']['english']
title = title[:85] + '..' if len(title) > 85 else title
@ -291,7 +298,7 @@ def __api_suspended_doujinshi_parser(id_):
doujinshi['name'] = response['title']['english']
doujinshi['subtitle'] = response['title']['japanese']
doujinshi['img_id'] = response['media_id']
doujinshi['ext'] = ''.join(map(lambda s: s['t'], response['images']['pages']))
doujinshi['ext'] = ''.join([i['t'] for i in response['images']['pages']])
doujinshi['pages'] = len(response['images']['pages'])
# gain information of the doujinshi

View File

@ -207,3 +207,10 @@ an invalid filename.
# Remove [] from filename
filename = filename.replace('[]', '')
return filename
def signal_handler(signal, frame):
logger.error('Ctrl-C signal received. Stopping...')
exit(1)