diff --git a/nhentai/doujinshi.py b/nhentai/doujinshi.py index 8d0b9a8..d7de7d3 100644 --- a/nhentai/doujinshi.py +++ b/nhentai/doujinshi.py @@ -5,6 +5,7 @@ from future.builtins import range from nhentai.constant import DETAIL_URL, IMAGE_URL from nhentai.logger import logger +from nhentai.utils import format_filename class DoujinshiInfo(dict): @@ -51,7 +52,7 @@ class Doujinshi(object): download_queue = [] for i in range(1, self.pages + 1): download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext)) - self.downloader.download(download_queue, self.id) + self.downloader.download(download_queue, format_filename('%s-%s' % (self.id, self.name[:200]))) else: logger.critical('Downloader has not be loaded') diff --git a/nhentai/downloader.py b/nhentai/downloader.py index 2587d52..1a6e61c 100644 --- a/nhentai/downloader.py +++ b/nhentai/downloader.py @@ -1,67 +1,100 @@ -# coding: utf-8 -from __future__ import print_function, unicode_literals -from tabulate import tabulate -from future.builtins import range +# coding: utf- +from __future__ import unicode_literals, print_function +from future.builtins import str as text +import os +import requests +import threadpool +try: + from urllib.parse import urlparse +except ImportError: + from urlparse import urlparse -from nhentai.constant import DETAIL_URL, IMAGE_URL from nhentai.logger import logger -from nhentai.utils import format_filename +from nhentai.parser import request +from nhentai.utils import Singleton -class DoujinshiInfo(dict): - def __init__(self, **kwargs): - super(DoujinshiInfo, self).__init__(**kwargs) +requests.packages.urllib3.disable_warnings() - def __getattr__(self, item): + +class NhentaiImageNotExistException(Exception): + pass + + +class Downloader(Singleton): + + def __init__(self, path='', thread=1, timeout=30): + if not isinstance(thread, (int, )) or thread < 1 or thread > 15: + raise ValueError('Invalid threads count') + self.path = str(path) + self.thread_count = thread + self.threads = [] + self.timeout = timeout + + def _download(self, url, folder='', filename='', retried=0): + logger.info('Start downloading: {0} ...'.format(url)) + filename = filename if filename else os.path.basename(urlparse(url).path) + base_filename, extension = os.path.splitext(filename) try: - return dict.__getitem__(self, item) - except KeyError: - return '' + with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f: + response = request('get', url, stream=True, timeout=self.timeout) + if response.status_code != 200: + raise NhentaiImageNotExistException + length = response.headers.get('content-length') + if length is None: + f.write(response.content) + else: + for chunk in response.iter_content(2048): + f.write(chunk) + except (requests.HTTPError, requests.Timeout) as e: + if retried < 3: + logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried)) + return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1) + else: + return 0, None -class Doujinshi(object): - def __init__(self, name=None, id=None, img_id=None, ext='jpg', pages=0, **kwargs): - self.name = name - self.id = id - self.img_id = img_id - self.ext = ext - self.pages = pages - self.downloader = None - self.url = '%s/%d' % (DETAIL_URL, self.id) - self.info = DoujinshiInfo(**kwargs) + except NhentaiImageNotExistException as e: + os.remove(os.path.join(folder, base_filename.zfill(3) + extension)) + return -1, url - def __repr__(self): - return ''.format(self.name) + except Exception as e: + logger.critical(str(e)) + return 0, None - def show(self): - table = [ - ["Doujinshi", self.name], - ["Subtitle", self.info.subtitle], - ["Characters", self.info.characters], - ["Authors", self.info.artists], - ["Language", self.info.language], - ["Tags", self.info.tags], - ["URL", self.url], - ["Pages", self.pages], - ] - logger.info(u'Print doujinshi information of {0}\n{1}'.format(self.id, tabulate(table))) + return 1, url - def download(self): - logger.info('Start download doujinshi: %s' % self.name) - if self.downloader: - download_queue = [] - for i in range(1, self.pages + 1): - download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext)) - self.downloader.download(download_queue, format_filename(self.name)) + def _download_callback(self, request, result): + result, data = result + if result == 0: + logger.warning('fatal errors occurred, ignored') + # exit(1) + elif result == -1: + logger.warning('url {} return status code 404'.format(data)) else: - logger.critical('Downloader has not be loaded') + logger.log(15, '{0} download successfully'.format(data)) + def download(self, queue, folder=''): + if not isinstance(folder, (text)): + folder = str(folder) -if __name__ == '__main__': - test = Doujinshi(name='test nhentai doujinshi', id=1) - print(test) - test.show() - try: - test.download() - except Exception as e: - print('Exception: %s' % str(e)) + if self.path: + folder = os.path.join(self.path, folder) + + if not os.path.exists(folder): + logger.warn('Path \'{0}\' not exist.'.format(folder)) + try: + os.makedirs(folder) + except EnvironmentError as e: + logger.critical('{0}'.format(str(e))) + exit(1) + else: + logger.warn('Path \'{0}\' already exist.'.format(folder)) + + queue = [([url], {'folder': folder}) for url in queue] + + self.thread_pool = threadpool.ThreadPool(self.thread_count) + requests_ = threadpool.makeRequests(self._download, queue, self._download_callback) + [self.thread_pool.putRequest(req) for req in requests_] + + self.thread_pool.wait() diff --git a/nhentai/utils.py b/nhentai/utils.py index 255cbfb..c7a3437 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals, print_function import os +import string from nhentai.logger import logger