diff --git a/nhentai/downloader.py b/nhentai/downloader.py index 1a6e61c..2587d52 100644 --- a/nhentai/downloader.py +++ b/nhentai/downloader.py @@ -1,100 +1,67 @@ -# coding: utf- -from __future__ import unicode_literals, print_function -from future.builtins import str as text -import os -import requests -import threadpool -try: - from urllib.parse import urlparse -except ImportError: - from urlparse import urlparse +# coding: utf-8 +from __future__ import print_function, unicode_literals +from tabulate import tabulate +from future.builtins import range +from nhentai.constant import DETAIL_URL, IMAGE_URL from nhentai.logger import logger -from nhentai.parser import request -from nhentai.utils import Singleton +from nhentai.utils import format_filename -requests.packages.urllib3.disable_warnings() +class DoujinshiInfo(dict): + def __init__(self, **kwargs): + super(DoujinshiInfo, self).__init__(**kwargs) - -class NhentaiImageNotExistException(Exception): - pass - - -class Downloader(Singleton): - - def __init__(self, path='', thread=1, timeout=30): - if not isinstance(thread, (int, )) or thread < 1 or thread > 15: - raise ValueError('Invalid threads count') - self.path = str(path) - self.thread_count = thread - self.threads = [] - self.timeout = timeout - - def _download(self, url, folder='', filename='', retried=0): - logger.info('Start downloading: {0} ...'.format(url)) - filename = filename if filename else os.path.basename(urlparse(url).path) - base_filename, extension = os.path.splitext(filename) + def __getattr__(self, item): try: - with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f: - response = request('get', url, stream=True, timeout=self.timeout) - if response.status_code != 200: - raise NhentaiImageNotExistException - length = response.headers.get('content-length') - if length is None: - f.write(response.content) - else: - for chunk in response.iter_content(2048): - f.write(chunk) + return dict.__getitem__(self, item) + except KeyError: + return '' - except (requests.HTTPError, requests.Timeout) as e: - if retried < 3: - logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried)) - return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1) - else: - return 0, None - except NhentaiImageNotExistException as e: - os.remove(os.path.join(folder, base_filename.zfill(3) + extension)) - return -1, url +class Doujinshi(object): + def __init__(self, name=None, id=None, img_id=None, ext='jpg', pages=0, **kwargs): + self.name = name + self.id = id + self.img_id = img_id + self.ext = ext + self.pages = pages + self.downloader = None + self.url = '%s/%d' % (DETAIL_URL, self.id) + self.info = DoujinshiInfo(**kwargs) - except Exception as e: - logger.critical(str(e)) - return 0, None + def __repr__(self): + return ''.format(self.name) - return 1, url + def show(self): + table = [ + ["Doujinshi", self.name], + ["Subtitle", self.info.subtitle], + ["Characters", self.info.characters], + ["Authors", self.info.artists], + ["Language", self.info.language], + ["Tags", self.info.tags], + ["URL", self.url], + ["Pages", self.pages], + ] + logger.info(u'Print doujinshi information of {0}\n{1}'.format(self.id, tabulate(table))) - def _download_callback(self, request, result): - result, data = result - if result == 0: - logger.warning('fatal errors occurred, ignored') - # exit(1) - elif result == -1: - logger.warning('url {} return status code 404'.format(data)) + def download(self): + logger.info('Start download doujinshi: %s' % self.name) + if self.downloader: + download_queue = [] + for i in range(1, self.pages + 1): + download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext)) + self.downloader.download(download_queue, format_filename(self.name)) else: - logger.log(15, '{0} download successfully'.format(data)) + logger.critical('Downloader has not be loaded') - def download(self, queue, folder=''): - if not isinstance(folder, (text)): - folder = str(folder) - if self.path: - folder = os.path.join(self.path, folder) - - if not os.path.exists(folder): - logger.warn('Path \'{0}\' not exist.'.format(folder)) - try: - os.makedirs(folder) - except EnvironmentError as e: - logger.critical('{0}'.format(str(e))) - exit(1) - else: - logger.warn('Path \'{0}\' already exist.'.format(folder)) - - queue = [([url], {'folder': folder}) for url in queue] - - self.thread_pool = threadpool.ThreadPool(self.thread_count) - requests_ = threadpool.makeRequests(self._download, queue, self._download_callback) - [self.thread_pool.putRequest(req) for req in requests_] - - self.thread_pool.wait() +if __name__ == '__main__': + test = Doujinshi(name='test nhentai doujinshi', id=1) + print(test) + test.show() + try: + test.download() + except Exception as e: + print('Exception: %s' % str(e)) diff --git a/nhentai/utils.py b/nhentai/utils.py index 4f48036..255cbfb 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -66,3 +66,20 @@ def generate_html(output_dir='.', doujinshi_obj=None): f.write(data) logger.log(15, 'HTML Viewer has been write to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html'))) + + +def format_filename(s): + """Take a string and return a valid filename constructed from the string. +Uses a whitelist approach: any characters not present in valid_chars are +removed. Also spaces are replaced with underscores. + +Note: this method may produce invalid filenames such as ``, `.` or `..` +When I use this method I prepend a date string like '2009_01_15_19_46_32_' +and append a file extension like '.txt', so I avoid the potential of using +an invalid filename. + +""" + valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) + filename = ''.join(c for c in s if c in valid_chars) + filename = filename.replace(' ', '_') # I don't like spaces in filenames. + return filename