This commit is contained in:
Ricter Z 2018-03-16 23:23:25 +08:00
parent c1975897d2
commit a56d3ca18c
3 changed files with 89 additions and 54 deletions

View File

@ -5,6 +5,7 @@ from future.builtins import range
from nhentai.constant import DETAIL_URL, IMAGE_URL from nhentai.constant import DETAIL_URL, IMAGE_URL
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.utils import format_filename
class DoujinshiInfo(dict): class DoujinshiInfo(dict):
@ -51,7 +52,7 @@ class Doujinshi(object):
download_queue = [] download_queue = []
for i in range(1, self.pages + 1): for i in range(1, self.pages + 1):
download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext)) download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext))
self.downloader.download(download_queue, self.id) self.downloader.download(download_queue, format_filename('%s-%s' % (self.id, self.name[:200])))
else: else:
logger.critical('Downloader has not be loaded') logger.critical('Downloader has not be loaded')

View File

@ -1,67 +1,100 @@
# coding: utf-8 # coding: utf-
from __future__ import print_function, unicode_literals from __future__ import unicode_literals, print_function
from tabulate import tabulate from future.builtins import str as text
from future.builtins import range import os
import requests
import threadpool
try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
from nhentai.constant import DETAIL_URL, IMAGE_URL
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.utils import format_filename from nhentai.parser import request
from nhentai.utils import Singleton
class DoujinshiInfo(dict): requests.packages.urllib3.disable_warnings()
def __init__(self, **kwargs):
super(DoujinshiInfo, self).__init__(**kwargs)
def __getattr__(self, item):
class NhentaiImageNotExistException(Exception):
pass
class Downloader(Singleton):
def __init__(self, path='', thread=1, timeout=30):
if not isinstance(thread, (int, )) or thread < 1 or thread > 15:
raise ValueError('Invalid threads count')
self.path = str(path)
self.thread_count = thread
self.threads = []
self.timeout = timeout
def _download(self, url, folder='', filename='', retried=0):
logger.info('Start downloading: {0} ...'.format(url))
filename = filename if filename else os.path.basename(urlparse(url).path)
base_filename, extension = os.path.splitext(filename)
try: try:
return dict.__getitem__(self, item) with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
except KeyError: response = request('get', url, stream=True, timeout=self.timeout)
return '' if response.status_code != 200:
raise NhentaiImageNotExistException
length = response.headers.get('content-length')
if length is None:
f.write(response.content)
else:
for chunk in response.iter_content(2048):
f.write(chunk)
except (requests.HTTPError, requests.Timeout) as e:
if retried < 3:
logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1)
else:
return 0, None
class Doujinshi(object): except NhentaiImageNotExistException as e:
def __init__(self, name=None, id=None, img_id=None, ext='jpg', pages=0, **kwargs): os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
self.name = name return -1, url
self.id = id
self.img_id = img_id
self.ext = ext
self.pages = pages
self.downloader = None
self.url = '%s/%d' % (DETAIL_URL, self.id)
self.info = DoujinshiInfo(**kwargs)
def __repr__(self): except Exception as e:
return '<Doujinshi: {0}>'.format(self.name) logger.critical(str(e))
return 0, None
def show(self): return 1, url
table = [
["Doujinshi", self.name],
["Subtitle", self.info.subtitle],
["Characters", self.info.characters],
["Authors", self.info.artists],
["Language", self.info.language],
["Tags", self.info.tags],
["URL", self.url],
["Pages", self.pages],
]
logger.info(u'Print doujinshi information of {0}\n{1}'.format(self.id, tabulate(table)))
def download(self): def _download_callback(self, request, result):
logger.info('Start download doujinshi: %s' % self.name) result, data = result
if self.downloader: if result == 0:
download_queue = [] logger.warning('fatal errors occurred, ignored')
for i in range(1, self.pages + 1): # exit(1)
download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext)) elif result == -1:
self.downloader.download(download_queue, format_filename(self.name)) logger.warning('url {} return status code 404'.format(data))
else: else:
logger.critical('Downloader has not be loaded') logger.log(15, '{0} download successfully'.format(data))
def download(self, queue, folder=''):
if not isinstance(folder, (text)):
folder = str(folder)
if __name__ == '__main__': if self.path:
test = Doujinshi(name='test nhentai doujinshi', id=1) folder = os.path.join(self.path, folder)
print(test)
test.show() if not os.path.exists(folder):
try: logger.warn('Path \'{0}\' not exist.'.format(folder))
test.download() try:
except Exception as e: os.makedirs(folder)
print('Exception: %s' % str(e)) except EnvironmentError as e:
logger.critical('{0}'.format(str(e)))
exit(1)
else:
logger.warn('Path \'{0}\' already exist.'.format(folder))
queue = [([url], {'folder': folder}) for url in queue]
self.thread_pool = threadpool.ThreadPool(self.thread_count)
requests_ = threadpool.makeRequests(self._download, queue, self._download_callback)
[self.thread_pool.putRequest(req) for req in requests_]
self.thread_pool.wait()

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function
import os import os
import string
from nhentai.logger import logger from nhentai.logger import logger