mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-04-20 02:41:19 +02:00
save downloaded doujinshi as doujinshi name #13
This commit is contained in:
parent
4ed596ff98
commit
c1975897d2
@ -1,100 +1,67 @@
|
|||||||
# coding: utf-
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import print_function, unicode_literals
|
||||||
from future.builtins import str as text
|
from tabulate import tabulate
|
||||||
import os
|
from future.builtins import range
|
||||||
import requests
|
|
||||||
import threadpool
|
|
||||||
try:
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
except ImportError:
|
|
||||||
from urlparse import urlparse
|
|
||||||
|
|
||||||
|
from nhentai.constant import DETAIL_URL, IMAGE_URL
|
||||||
from nhentai.logger import logger
|
from nhentai.logger import logger
|
||||||
from nhentai.parser import request
|
from nhentai.utils import format_filename
|
||||||
from nhentai.utils import Singleton
|
|
||||||
|
|
||||||
|
|
||||||
requests.packages.urllib3.disable_warnings()
|
class DoujinshiInfo(dict):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(DoujinshiInfo, self).__init__(**kwargs)
|
||||||
|
|
||||||
|
def __getattr__(self, item):
|
||||||
class NhentaiImageNotExistException(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class Downloader(Singleton):
|
|
||||||
|
|
||||||
def __init__(self, path='', thread=1, timeout=30):
|
|
||||||
if not isinstance(thread, (int, )) or thread < 1 or thread > 15:
|
|
||||||
raise ValueError('Invalid threads count')
|
|
||||||
self.path = str(path)
|
|
||||||
self.thread_count = thread
|
|
||||||
self.threads = []
|
|
||||||
self.timeout = timeout
|
|
||||||
|
|
||||||
def _download(self, url, folder='', filename='', retried=0):
|
|
||||||
logger.info('Start downloading: {0} ...'.format(url))
|
|
||||||
filename = filename if filename else os.path.basename(urlparse(url).path)
|
|
||||||
base_filename, extension = os.path.splitext(filename)
|
|
||||||
try:
|
try:
|
||||||
with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
|
return dict.__getitem__(self, item)
|
||||||
response = request('get', url, stream=True, timeout=self.timeout)
|
except KeyError:
|
||||||
if response.status_code != 200:
|
return ''
|
||||||
raise NhentaiImageNotExistException
|
|
||||||
length = response.headers.get('content-length')
|
|
||||||
if length is None:
|
class Doujinshi(object):
|
||||||
f.write(response.content)
|
def __init__(self, name=None, id=None, img_id=None, ext='jpg', pages=0, **kwargs):
|
||||||
|
self.name = name
|
||||||
|
self.id = id
|
||||||
|
self.img_id = img_id
|
||||||
|
self.ext = ext
|
||||||
|
self.pages = pages
|
||||||
|
self.downloader = None
|
||||||
|
self.url = '%s/%d' % (DETAIL_URL, self.id)
|
||||||
|
self.info = DoujinshiInfo(**kwargs)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<Doujinshi: {0}>'.format(self.name)
|
||||||
|
|
||||||
|
def show(self):
|
||||||
|
table = [
|
||||||
|
["Doujinshi", self.name],
|
||||||
|
["Subtitle", self.info.subtitle],
|
||||||
|
["Characters", self.info.characters],
|
||||||
|
["Authors", self.info.artists],
|
||||||
|
["Language", self.info.language],
|
||||||
|
["Tags", self.info.tags],
|
||||||
|
["URL", self.url],
|
||||||
|
["Pages", self.pages],
|
||||||
|
]
|
||||||
|
logger.info(u'Print doujinshi information of {0}\n{1}'.format(self.id, tabulate(table)))
|
||||||
|
|
||||||
|
def download(self):
|
||||||
|
logger.info('Start download doujinshi: %s' % self.name)
|
||||||
|
if self.downloader:
|
||||||
|
download_queue = []
|
||||||
|
for i in range(1, self.pages + 1):
|
||||||
|
download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext))
|
||||||
|
self.downloader.download(download_queue, format_filename(self.name))
|
||||||
else:
|
else:
|
||||||
for chunk in response.iter_content(2048):
|
logger.critical('Downloader has not be loaded')
|
||||||
f.write(chunk)
|
|
||||||
|
|
||||||
except (requests.HTTPError, requests.Timeout) as e:
|
|
||||||
if retried < 3:
|
|
||||||
logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
|
|
||||||
return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1)
|
|
||||||
else:
|
|
||||||
return 0, None
|
|
||||||
|
|
||||||
except NhentaiImageNotExistException as e:
|
|
||||||
os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
|
|
||||||
return -1, url
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
test = Doujinshi(name='test nhentai doujinshi', id=1)
|
||||||
|
print(test)
|
||||||
|
test.show()
|
||||||
|
try:
|
||||||
|
test.download()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.critical(str(e))
|
print('Exception: %s' % str(e))
|
||||||
return 0, None
|
|
||||||
|
|
||||||
return 1, url
|
|
||||||
|
|
||||||
def _download_callback(self, request, result):
|
|
||||||
result, data = result
|
|
||||||
if result == 0:
|
|
||||||
logger.warning('fatal errors occurred, ignored')
|
|
||||||
# exit(1)
|
|
||||||
elif result == -1:
|
|
||||||
logger.warning('url {} return status code 404'.format(data))
|
|
||||||
else:
|
|
||||||
logger.log(15, '{0} download successfully'.format(data))
|
|
||||||
|
|
||||||
def download(self, queue, folder=''):
|
|
||||||
if not isinstance(folder, (text)):
|
|
||||||
folder = str(folder)
|
|
||||||
|
|
||||||
if self.path:
|
|
||||||
folder = os.path.join(self.path, folder)
|
|
||||||
|
|
||||||
if not os.path.exists(folder):
|
|
||||||
logger.warn('Path \'{0}\' not exist.'.format(folder))
|
|
||||||
try:
|
|
||||||
os.makedirs(folder)
|
|
||||||
except EnvironmentError as e:
|
|
||||||
logger.critical('{0}'.format(str(e)))
|
|
||||||
exit(1)
|
|
||||||
else:
|
|
||||||
logger.warn('Path \'{0}\' already exist.'.format(folder))
|
|
||||||
|
|
||||||
queue = [([url], {'folder': folder}) for url in queue]
|
|
||||||
|
|
||||||
self.thread_pool = threadpool.ThreadPool(self.thread_count)
|
|
||||||
requests_ = threadpool.makeRequests(self._download, queue, self._download_callback)
|
|
||||||
[self.thread_pool.putRequest(req) for req in requests_]
|
|
||||||
|
|
||||||
self.thread_pool.wait()
|
|
||||||
|
@ -66,3 +66,20 @@ def generate_html(output_dir='.', doujinshi_obj=None):
|
|||||||
f.write(data)
|
f.write(data)
|
||||||
|
|
||||||
logger.log(15, 'HTML Viewer has been write to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html')))
|
logger.log(15, 'HTML Viewer has been write to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html')))
|
||||||
|
|
||||||
|
|
||||||
|
def format_filename(s):
|
||||||
|
"""Take a string and return a valid filename constructed from the string.
|
||||||
|
Uses a whitelist approach: any characters not present in valid_chars are
|
||||||
|
removed. Also spaces are replaced with underscores.
|
||||||
|
|
||||||
|
Note: this method may produce invalid filenames such as ``, `.` or `..`
|
||||||
|
When I use this method I prepend a date string like '2009_01_15_19_46_32_'
|
||||||
|
and append a file extension like '.txt', so I avoid the potential of using
|
||||||
|
an invalid filename.
|
||||||
|
|
||||||
|
"""
|
||||||
|
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
||||||
|
filename = ''.join(c for c in s if c in valid_chars)
|
||||||
|
filename = filename.replace(' ', '_') # I don't like spaces in filenames.
|
||||||
|
return filename
|
||||||
|
Loading…
x
Reference in New Issue
Block a user