mirror of
				https://github.com/RicterZ/nhentai.git
				synced 2025-11-04 02:50:55 +01:00 
			
		
		
		
	Merge branch 'dev' into master
This commit is contained in:
		@@ -16,7 +16,7 @@ script:
 | 
			
		||||
    - nhentai --cookie "csrftoken=3c4Mzn4f6NAI1awFqfIh495G3pv5Wade9n63Kx03mkSac8c2QR5vRR4jCwVzb3OR; sessionid=m034e2dyyxgbl9s07hbzgfhvadbap2tk"
 | 
			
		||||
    - nhentai --search umaru
 | 
			
		||||
    - nhentai --id=152503,146134 -t 10 --output=/tmp/ --cbz
 | 
			
		||||
    - nhentai --tag lolicon
 | 
			
		||||
    - nhentai --tag lolicon --sorting popular
 | 
			
		||||
    - nhentai -F
 | 
			
		||||
    - nhentai --file /tmp/test.txt
 | 
			
		||||
    - nhentai --id=152503,146134 --gen-main --output=/tmp/
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,3 @@
 | 
			
		||||
__version__ = '0.3.6'
 | 
			
		||||
__version__ = '0.3.7'
 | 
			
		||||
__author__ = 'RicterZ'
 | 
			
		||||
__email__ = 'ricterzheng@gmail.com'
 | 
			
		||||
 
 | 
			
		||||
@@ -48,7 +48,8 @@ def cmd_parser():
 | 
			
		||||
 | 
			
		||||
    # doujinshi options
 | 
			
		||||
    parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3')
 | 
			
		||||
    parser.add_option('--search', '-s', type='string', dest='keyword', action='store', help='search doujinshi by keyword')
 | 
			
		||||
    parser.add_option('--search', '-s', type='string', dest='keyword', action='store',
 | 
			
		||||
                      help='search doujinshi by keyword')
 | 
			
		||||
    parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag')
 | 
			
		||||
    parser.add_option('--artist', type='string', dest='artist', action='store', help='download doujinshi by artist')
 | 
			
		||||
    parser.add_option('--character', type='string', dest='character', action='store', help='download doujinshi by character')
 | 
			
		||||
@@ -62,6 +63,8 @@ def cmd_parser():
 | 
			
		||||
                      help='page number of search results')
 | 
			
		||||
    parser.add_option('--max-page', type='int', dest='max_page', action='store', default=1,
 | 
			
		||||
                      help='The max page when recursive download tagged doujinshi')
 | 
			
		||||
    parser.add_option('--sorting', dest='sorting', action='store', default='date',
 | 
			
		||||
                      help='sorting of doujinshi (date / popular)', choices=['date', 'popular'])
 | 
			
		||||
 | 
			
		||||
    # download options
 | 
			
		||||
    parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='',
 | 
			
		||||
 
 | 
			
		||||
@@ -4,14 +4,15 @@ from __future__ import unicode_literals, print_function
 | 
			
		||||
import signal
 | 
			
		||||
import platform
 | 
			
		||||
import time
 | 
			
		||||
import multiprocessing
 | 
			
		||||
 | 
			
		||||
from nhentai.cmdline import cmd_parser, banner
 | 
			
		||||
from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, favorites_parser, tag_parser, login
 | 
			
		||||
from nhentai.doujinshi import Doujinshi
 | 
			
		||||
from nhentai.downloader import Downloader
 | 
			
		||||
from nhentai.downloader import Downloader, init_worker
 | 
			
		||||
from nhentai.logger import logger
 | 
			
		||||
from nhentai.constant import BASE_URL
 | 
			
		||||
from nhentai.utils import generate_html, generate_cbz, generate_main_html, check_cookie
 | 
			
		||||
from nhentai.utils import generate_html, generate_cbz, generate_main_html, check_cookie, signal_handler
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
@@ -40,7 +41,7 @@ def main():
 | 
			
		||||
            doujinshi_ids = map(lambda d: d['id'], doujinshis)
 | 
			
		||||
 | 
			
		||||
    elif options.tag:
 | 
			
		||||
        doujinshis = tag_parser(options.tag, max_page=options.max_page)
 | 
			
		||||
        doujinshis = tag_parser(options.tag, sorting=options.sorting, max_page=options.max_page)
 | 
			
		||||
        print_doujinshi(doujinshis)
 | 
			
		||||
        if options.is_download and doujinshis:
 | 
			
		||||
            doujinshi_ids = map(lambda d: d['id'], doujinshis)
 | 
			
		||||
@@ -70,7 +71,7 @@ def main():
 | 
			
		||||
            doujinshi_ids = map(lambda d: d['id'], doujinshis)
 | 
			
		||||
 | 
			
		||||
    elif options.keyword:
 | 
			
		||||
        doujinshis = search_parser(options.keyword, options.page)
 | 
			
		||||
        doujinshis = search_parser(options.keyword, sorting=options.sorting, page=options.page)
 | 
			
		||||
        print_doujinshi(doujinshis)
 | 
			
		||||
        if options.is_download:
 | 
			
		||||
            doujinshi_ids = map(lambda d: d['id'], doujinshis)
 | 
			
		||||
@@ -79,25 +80,35 @@ def main():
 | 
			
		||||
        doujinshi_ids = options.id
 | 
			
		||||
 | 
			
		||||
    if doujinshi_ids:
 | 
			
		||||
        for id_ in doujinshi_ids:
 | 
			
		||||
        for i, id_ in enumerate(doujinshi_ids):
 | 
			
		||||
            if options.delay:
 | 
			
		||||
                time.sleep(options.delay)
 | 
			
		||||
 | 
			
		||||
            doujinshi_info = doujinshi_parser(id_)
 | 
			
		||||
            doujinshi_list.append(Doujinshi(name_format=options.name_format, **doujinshi_info))
 | 
			
		||||
 | 
			
		||||
            if doujinshi_info:
 | 
			
		||||
                doujinshi_list.append(Doujinshi(name_format=options.name_format, **doujinshi_info))
 | 
			
		||||
 | 
			
		||||
            if (i + 1) % 10 == 0:
 | 
			
		||||
                logger.info('Progress: %d / %d' % (i + 1, len(doujinshi_ids)))
 | 
			
		||||
 | 
			
		||||
    if not options.is_show:
 | 
			
		||||
        downloader = Downloader(path=options.output_dir,
 | 
			
		||||
                                thread=options.threads, timeout=options.timeout, delay=options.delay)
 | 
			
		||||
        downloader = Downloader(path=options.output_dir, size=options.threads,
 | 
			
		||||
                                timeout=options.timeout, delay=options.delay)
 | 
			
		||||
 | 
			
		||||
        for doujinshi in doujinshi_list:
 | 
			
		||||
 | 
			
		||||
            doujinshi.downloader = downloader
 | 
			
		||||
            doujinshi.download()
 | 
			
		||||
 | 
			
		||||
            if not options.is_nohtml and not options.is_cbz:
 | 
			
		||||
                generate_html(options.output_dir, doujinshi)
 | 
			
		||||
            elif options.is_cbz:
 | 
			
		||||
                generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir)
 | 
			
		||||
 | 
			
		||||
        if options.main_viewer:
 | 
			
		||||
            generate_main_html(options.output_dir)
 | 
			
		||||
 | 
			
		||||
        if not platform.system() == 'Windows':
 | 
			
		||||
            logger.log(15, '🍻 All done.')
 | 
			
		||||
        else:
 | 
			
		||||
@@ -107,12 +118,8 @@ def main():
 | 
			
		||||
        [doujinshi.show() for doujinshi in doujinshi_list]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def signal_handler(signal, frame):
 | 
			
		||||
    logger.error('Ctrl-C signal received. Stopping...')
 | 
			
		||||
    exit(1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
signal.signal(signal.SIGINT, signal_handler)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    main()
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,15 @@
 | 
			
		||||
# coding: utf-
 | 
			
		||||
from __future__ import unicode_literals, print_function
 | 
			
		||||
 | 
			
		||||
import multiprocessing
 | 
			
		||||
import signal
 | 
			
		||||
 | 
			
		||||
from future.builtins import str as text
 | 
			
		||||
import os
 | 
			
		||||
import requests
 | 
			
		||||
import threadpool
 | 
			
		||||
import time
 | 
			
		||||
import multiprocessing as mp
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    from urllib.parse import urlparse
 | 
			
		||||
@@ -13,29 +18,25 @@ except ImportError:
 | 
			
		||||
 | 
			
		||||
from nhentai.logger import logger
 | 
			
		||||
from nhentai.parser import request
 | 
			
		||||
from nhentai.utils import Singleton
 | 
			
		||||
 | 
			
		||||
from nhentai.utils import Singleton, signal_handler
 | 
			
		||||
 | 
			
		||||
requests.packages.urllib3.disable_warnings()
 | 
			
		||||
semaphore = mp.Semaphore()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NhentaiImageNotExistException(Exception):
 | 
			
		||||
class NHentaiImageNotExistException(Exception):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Downloader(Singleton):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, path='', thread=1, timeout=30, delay=0):
 | 
			
		||||
        if not isinstance(thread, (int, )) or thread < 1 or thread > 15:
 | 
			
		||||
            raise ValueError('Invalid threads count')
 | 
			
		||||
    def __init__(self, path='', size=5, timeout=30, delay=0):
 | 
			
		||||
        self.size = size
 | 
			
		||||
        self.path = str(path)
 | 
			
		||||
        self.thread_count = thread
 | 
			
		||||
        self.threads = []
 | 
			
		||||
        self.thread_pool = None
 | 
			
		||||
        self.timeout = timeout
 | 
			
		||||
        self.delay = delay
 | 
			
		||||
 | 
			
		||||
    def _download(self, url, folder='', filename='', retried=0):
 | 
			
		||||
    def download_(self, url, folder='', filename='', retried=0):
 | 
			
		||||
        if self.delay:
 | 
			
		||||
            time.sleep(self.delay)
 | 
			
		||||
        logger.info('Starting to download {0} ...'.format(url))
 | 
			
		||||
@@ -54,9 +55,9 @@ class Downloader(Singleton):
 | 
			
		||||
                    try:
 | 
			
		||||
                        response = request('get', url, stream=True, timeout=self.timeout)
 | 
			
		||||
                        if response.status_code != 200:
 | 
			
		||||
                            raise NhentaiImageNotExistException
 | 
			
		||||
                            raise NHentaiImageNotExistException
 | 
			
		||||
 | 
			
		||||
                    except NhentaiImageNotExistException as e:
 | 
			
		||||
                    except NHentaiImageNotExistException as e:
 | 
			
		||||
                        raise e
 | 
			
		||||
 | 
			
		||||
                    except Exception as e:
 | 
			
		||||
@@ -78,27 +79,37 @@ class Downloader(Singleton):
 | 
			
		||||
        except (requests.HTTPError, requests.Timeout) as e:
 | 
			
		||||
            if retried < 3:
 | 
			
		||||
                logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
 | 
			
		||||
                return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1)
 | 
			
		||||
                return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1)
 | 
			
		||||
            else:
 | 
			
		||||
                return 0, None
 | 
			
		||||
 | 
			
		||||
        except NhentaiImageNotExistException as e:
 | 
			
		||||
        except NHentaiImageNotExistException as e:
 | 
			
		||||
            os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
 | 
			
		||||
            return -1, url
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            import traceback
 | 
			
		||||
            traceback.print_stack()
 | 
			
		||||
            logger.critical(str(e))
 | 
			
		||||
            return 0, None
 | 
			
		||||
 | 
			
		||||
        except KeyboardInterrupt:
 | 
			
		||||
            return -3, None
 | 
			
		||||
 | 
			
		||||
        return 1, url
 | 
			
		||||
 | 
			
		||||
    def _download_callback(self, request, result):
 | 
			
		||||
    def _download_callback(self, result):
 | 
			
		||||
        result, data = result
 | 
			
		||||
        if result == 0:
 | 
			
		||||
            logger.warning('fatal errors occurred, ignored')
 | 
			
		||||
            # exit(1)
 | 
			
		||||
        elif result == -1:
 | 
			
		||||
            logger.warning('url {} return status code 404'.format(data))
 | 
			
		||||
        elif result == -2:
 | 
			
		||||
            logger.warning('Ctrl-C pressed, exiting sub processes ...')
 | 
			
		||||
        elif result == -3:
 | 
			
		||||
            # workers wont be run, just pass
 | 
			
		||||
            pass
 | 
			
		||||
        else:
 | 
			
		||||
            logger.log(15, '{0} downloaded successfully'.format(data))
 | 
			
		||||
 | 
			
		||||
@@ -115,14 +126,34 @@ class Downloader(Singleton):
 | 
			
		||||
                os.makedirs(folder)
 | 
			
		||||
            except EnvironmentError as e:
 | 
			
		||||
                logger.critical('{0}'.format(str(e)))
 | 
			
		||||
                exit(1)
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            logger.warn('Path \'{0}\' already exist.'.format(folder))
 | 
			
		||||
 | 
			
		||||
        queue = [([url], {'folder': folder}) for url in queue]
 | 
			
		||||
        queue = [(self, url, folder) for url in queue]
 | 
			
		||||
 | 
			
		||||
        self.thread_pool = threadpool.ThreadPool(self.thread_count)
 | 
			
		||||
        requests_ = threadpool.makeRequests(self._download, queue, self._download_callback)
 | 
			
		||||
        [self.thread_pool.putRequest(req) for req in requests_]
 | 
			
		||||
        pool = multiprocessing.Pool(self.size, init_worker)
 | 
			
		||||
 | 
			
		||||
        self.thread_pool.wait()
 | 
			
		||||
        for item in queue:
 | 
			
		||||
            pool.apply_async(download_wrapper, args=item, callback=self._download_callback)
 | 
			
		||||
 | 
			
		||||
        pool.close()
 | 
			
		||||
        pool.join()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def download_wrapper(obj, url, folder=''):
 | 
			
		||||
    if semaphore.get_value():
 | 
			
		||||
        return Downloader.download_(obj, url=url, folder=folder)
 | 
			
		||||
    else:
 | 
			
		||||
        return -3, None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def init_worker():
 | 
			
		||||
    signal.signal(signal.SIGINT, subprocess_signal)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def subprocess_signal(signal, frame):
 | 
			
		||||
    if semaphore.acquire(timeout=1):
 | 
			
		||||
        logger.warning('Ctrl-C pressed, exiting sub processes ...')
 | 
			
		||||
 | 
			
		||||
    raise KeyboardInterrupt
 | 
			
		||||
 
 | 
			
		||||
@@ -121,8 +121,8 @@ def doujinshi_parser(id_):
 | 
			
		||||
            return doujinshi_parser(str(id_))
 | 
			
		||||
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.critical(str(e))
 | 
			
		||||
        raise SystemExit
 | 
			
		||||
        logger.warn('Error: {}, ignored'.format(str(e)))
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    html = BeautifulSoup(response, 'html.parser')
 | 
			
		||||
    doujinshi_info = html.find('div', attrs={'id': 'info'})
 | 
			
		||||
@@ -169,14 +169,9 @@ def doujinshi_parser(id_):
 | 
			
		||||
    return doujinshi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def search_parser(keyword, page):
 | 
			
		||||
def search_parser(keyword, sorting='date', page=1):
 | 
			
		||||
    logger.debug('Searching doujinshis of keyword {0}'.format(keyword))
 | 
			
		||||
    try:
 | 
			
		||||
        response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page}).content
 | 
			
		||||
    except requests.ConnectionError as e:
 | 
			
		||||
        logger.critical(e)
 | 
			
		||||
        logger.warn('If you are in China, please configure the proxy to fu*k GFW.')
 | 
			
		||||
        raise SystemExit
 | 
			
		||||
    response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page, 'sort': sorting}).content
 | 
			
		||||
 | 
			
		||||
    result = _get_title_and_id(response)
 | 
			
		||||
    if not result:
 | 
			
		||||
@@ -194,22 +189,19 @@ def print_doujinshi(doujinshi_list):
 | 
			
		||||
                tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def tag_parser(tag_name, max_page=1, index=0):
 | 
			
		||||
def tag_parser(tag_name, sorting='date', max_page=1):
 | 
			
		||||
    result = []
 | 
			
		||||
    tag_name = tag_name.lower()
 | 
			
		||||
    if ',' in tag_name:
 | 
			
		||||
        tag_name = [i.strip().replace(' ', '-') for i in tag_name.split(',')]
 | 
			
		||||
    else: tag_name = tag_name.replace(' ', '-')
 | 
			
		||||
        
 | 
			
		||||
    tag_name = tag_name.replace(' ', '-')
 | 
			
		||||
 | 
			
		||||
    if sorting == 'date':
 | 
			
		||||
        sorting = ''
 | 
			
		||||
 | 
			
		||||
    for p in range(1, max_page + 1):
 | 
			
		||||
        logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, tag_name))
 | 
			
		||||
        if isinstance(tag_name, str):
 | 
			
		||||
            response = request('get', url='%s/%s/?page=%d' % (constant.TAG_URL[index], tag_name, p)).content
 | 
			
		||||
            result += _get_title_and_id(response)
 | 
			
		||||
        else:
 | 
			
		||||
            for i in tag_name:
 | 
			
		||||
                response = request('get', url='%s/%s/?page=%d' % (constant.TAG_URL[index], i, p)).content
 | 
			
		||||
                result += _get_title_and_id(response)
 | 
			
		||||
        response = request('get', url='%s/%s/%s?page=%d' % (constant.TAG_URL, tag_name, sorting, p)).content
 | 
			
		||||
 | 
			
		||||
        result += _get_title_and_id(response)
 | 
			
		||||
        if not result:
 | 
			
		||||
            logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name))
 | 
			
		||||
            return
 | 
			
		||||
@@ -220,13 +212,13 @@ def tag_parser(tag_name, max_page=1, index=0):
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def __api_suspended_search_parser(keyword, page):
 | 
			
		||||
def __api_suspended_search_parser(keyword, sorting, page):
 | 
			
		||||
    logger.debug('Searching doujinshis using keywords {0}'.format(keyword))
 | 
			
		||||
    result = []
 | 
			
		||||
    i = 0
 | 
			
		||||
    while i < 5:
 | 
			
		||||
        try:
 | 
			
		||||
            response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json()
 | 
			
		||||
            response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page, 'sort': sorting}).json()
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            i += 1
 | 
			
		||||
            if not i < 5:
 | 
			
		||||
@@ -250,10 +242,10 @@ def __api_suspended_search_parser(keyword, page):
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def __api_suspended_tag_parser(tag_id, max_page=1):
 | 
			
		||||
def __api_suspended_tag_parser(tag_id, sorting, max_page=1):
 | 
			
		||||
    logger.info('Searching for doujinshi with tag id {0}'.format(tag_id))
 | 
			
		||||
    result = []
 | 
			
		||||
    response = request('get', url=constant.TAG_API_URL, params={'sort': 'popular', 'tag_id': tag_id}).json()
 | 
			
		||||
    response = request('get', url=constant.TAG_API_URL, params={'sort': sorting, 'tag_id': tag_id}).json()
 | 
			
		||||
    page = max_page if max_page <= response['num_pages'] else int(response['num_pages'])
 | 
			
		||||
 | 
			
		||||
    for i in range(1, page + 1):
 | 
			
		||||
@@ -261,7 +253,7 @@ def __api_suspended_tag_parser(tag_id, max_page=1):
 | 
			
		||||
 | 
			
		||||
        if page != 1:
 | 
			
		||||
            response = request('get', url=constant.TAG_API_URL,
 | 
			
		||||
                               params={'sort': 'popular', 'tag_id': tag_id}).json()
 | 
			
		||||
                               params={'sort': sorting, 'tag_id': tag_id}).json()
 | 
			
		||||
    for row in response['result']:
 | 
			
		||||
        title = row['title']['english']
 | 
			
		||||
        title = title[:85] + '..' if len(title) > 85 else title
 | 
			
		||||
 
 | 
			
		||||
@@ -207,3 +207,10 @@ an invalid filename.
 | 
			
		||||
    # Remove [] from filename
 | 
			
		||||
    filename = filename.replace('[]', '')
 | 
			
		||||
    return filename
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def signal_handler(signal, frame):
 | 
			
		||||
    logger.error('Ctrl-C signal received. Stopping...')
 | 
			
		||||
    exit(1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user