diff --git a/hentai/cmdline.py b/hentai/cmdline.py index 14ba288..4941680 100644 --- a/hentai/cmdline.py +++ b/hentai/cmdline.py @@ -1,4 +1,15 @@ +#coding: utf-8 +import sys from optparse import OptionParser +from logger import logger + +def banner(): + print ''' _ _ _ _ + _ __ | | | | ___ _ __ | |_ __ _(_) +| '_ \| |_| |/ _ \ '_ \| __/ _` | | +| | | | _ | __/ | | | || (_| | | +|_| |_|_| |_|\___|_| |_|\__\__,_|_| +''' def cmd_parser(): @@ -6,6 +17,21 @@ def cmd_parser(): parser.add_option('--search', type='string', dest='keyword', action='store') parser.add_option('--download', dest='is_download', action='store_true') parser.add_option('--id', type='int', dest='id', action='store') - + parser.add_option('--path', type='string', dest='saved_path', action='store', default='') + parser.add_option('--threads', type='int', dest='threads', action='store', default=1) args, _ = parser.parse_args() + + if args.threads <= 0: + args.threads = 1 + elif args.threads > 10: + logger.critical('Maximum number of used threads is 10') + sys.exit() + + if args.is_download and not args.id: + logger.critical('Dojinshi id is required for downloading') + sys.exit() + + if args.keyword: + logger.critical(u'并没有做这个功能_(:3」∠)_') + return args diff --git a/hentai/dojinshi.py b/hentai/dojinshi.py index 0cc214c..36aa359 100644 --- a/hentai/dojinshi.py +++ b/hentai/dojinshi.py @@ -1,12 +1,15 @@ import Queue from constant import DETAIL_URL, IMAGE_URL +from hentai.logger import logger class Dojinshi(object): - def __init__(self, name=None, subtitle=None, id=None, pages=0): + def __init__(self, name=None, subtitle=None, id=None, img_id=None, ext='jpg', pages=0): self.name = name self.subtitle = subtitle self.id = id + self.img_id = img_id + self.ext = ext self.pages = pages self.downloader = None self.url = '%s/%d' % (DETAIL_URL, self.id) @@ -15,17 +18,19 @@ class Dojinshi(object): return '' % self.name def show(self): + logger.info('Print dojinshi information') print 'Dojinshi: %s' % self.name print 'Subtitle: %s' % self.subtitle print 'URL: %s' % self.url print 'Pages: %d' % self.pages def download(self): + logger.info('Start download dojinshi: %s' % self.name) if self.downloader: download_queue = Queue.Queue() for i in xrange(1, self.pages + 1): - download_queue.put('%s/%d/%d.jpg' % (IMAGE_URL, self.id, i)) - self.downloader.download(download_queue) + download_queue.put('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext)) + self.downloader.download(download_queue, self.id) else: raise Exception('Downloader has not be loaded') diff --git a/hentai/downloader.py b/hentai/downloader.py index 35767e8..5b80471 100644 --- a/hentai/downloader.py +++ b/hentai/downloader.py @@ -1,37 +1,85 @@ +#coding: utf-8 +import os +import sys import threading import Queue +import requests +from urlparse import urlparse +from hentai.logger import logger +shutdown = threading.Event() class Downloader(object): - def __init__(self): + def __init__(self, path='', thread=1): + if not isinstance(thread, (int, )) or thread < 1 or thread > 10: + raise ValueError('Invalid threads count') + self.path = str(path) + self.thread_count = thread self.threads = [] - def _download(self, queue): + def _download(self, url, folder='', filename=''): + if shutdown.is_set(): + return + + if not os.path.exists(folder): + try: + os.mkdir(folder) + except os.error, e: + logger.error('Error %s' % str(e)) + sys.exit() + + filename = filename if filename else os.path.basename(urlparse(url).path) + try: + with open(os.path.join(folder, filename), "wb") as f: + response = requests.get(url, stream=True) + length = response.headers.get('content-length') + if length is None: + f.write(response.content) + else: + for chunk in response.iter_content(2048): + f.write(chunk) + except (os.error, IOError), e: + logger.error('Error %s' % e) + sys.exit() + + except Exception, e: + raise e + + logger.info('%s downloaded: %s done!' % (threading.currentThread().getName(), url)) + + def _download_thread(self, queue, folder=''): while True: - if not queue.qsize(): + if queue.empty(): queue.task_done() break try: url = queue.get(False) - print 'Downloading: %s' % url + logger.info('%s downloading: %s ...' % (threading.currentThread().getName(), url)) + self._download(url, folder) except Queue.Empty: break - def download(self, queue): - for i in range(10): - _ = threading.Thread(target=self._download, args=(queue, )) + def download(self, queue, folder=''): + if not isinstance(folder, (str, unicode)): + folder = str(folder) + + if self.path: + folder = self.path + + if os.path.exists(path=folder): + logger.warn('Path \'%s\' already exist' % folder) + else: + logger.warn('Path \'%s\' not exist' % folder) + + for i in range(self.thread_count): + _ = threading.Thread(target=self._download_thread, args=(queue, folder, )) + _.setDaemon(True) self.threads.append(_) - for i in self.threads: - i.start() + for thread in self.threads: + thread.start() - for i in self.threads: - i.join() + for thread in self.threads: + thread.join() - -if __name__ == '__main__': - d = Downloader() - q = Queue.Queue() - for i in range(0, 50): - q.put(i) - d.download(q) + logger.log(15, u'🍺 All done, saved to \'%s\'!' % folder) \ No newline at end of file diff --git a/hentai/logger.py b/hentai/logger.py new file mode 100644 index 0000000..b7684e0 --- /dev/null +++ b/hentai/logger.py @@ -0,0 +1,154 @@ +import logging +# +# Copyright (C) 2010-2012 Vinay Sajip. All rights reserved. Licensed under the new BSD license. +# +import logging +import os +import re +import sys + + +class ColorizingStreamHandler(logging.StreamHandler): + # color names to indices + color_map = { + 'black': 0, + 'red': 1, + 'green': 2, + 'yellow': 3, + 'blue': 4, + 'magenta': 5, + 'cyan': 6, + 'white': 7, + } + + # levels to (background, foreground, bold/intense) + if os.name == 'nt': + level_map = { + logging.DEBUG: (None, 'white', False), + logging.INFO: (None, 'green', False), + logging.WARNING: (None, 'yellow', False), + logging.ERROR: (None, 'red', False), + logging.CRITICAL: ('red', 'white', False) + } + else: + level_map = { + logging.DEBUG: (None, 'white', False), + logging.INFO: (None, 'green', False), + logging.WARNING: (None, 'yellow', False), + logging.ERROR: (None, 'red', False), + logging.CRITICAL: ('red', 'white', False) + } + csi = '\x1b[' + reset = '\x1b[0m' + disable_coloring = False + + @property + def is_tty(self): + isatty = getattr(self.stream, 'isatty', None) + return isatty and isatty() and not self.disable_coloring + + if os.name != 'nt': + def output_colorized(self, message): + self.stream.write(message) + else: + ansi_esc = re.compile(r'\x1b\[((?:\d+)(?:;(?:\d+))*)m') + + nt_color_map = { + 0: 0x00, # black + 1: 0x04, # red + 2: 0x02, # green + 3: 0x06, # yellow + 4: 0x01, # blue + 5: 0x05, # magenta + 6: 0x03, # cyan + 7: 0x07, # white + } + + def output_colorized(self, message): + import ctypes + + parts = self.ansi_esc.split(message) + write = self.stream.write + h = None + fd = getattr(self.stream, 'fileno', None) + + if fd is not None: + fd = fd() + + if fd in (1, 2): # stdout or stderr + h = ctypes.windll.kernel32.GetStdHandle(-10 - fd) + + while parts: + text = parts.pop(0) + + if text: + write(text) + + if parts: + params = parts.pop(0) + + if h is not None: + params = [int(p) for p in params.split(';')] + color = 0 + + for p in params: + if 40 <= p <= 47: + color |= self.nt_color_map[p - 40] << 4 + elif 30 <= p <= 37: + color |= self.nt_color_map[p - 30] + elif p == 1: + color |= 0x08 # foreground intensity on + elif p == 0: # reset to default color + color = 0x07 + else: + pass # error condition ignored + + ctypes.windll.kernel32.SetConsoleTextAttribute(h, color) + + def colorize(self, message, record): + if record.levelno in self.level_map and self.is_tty: + bg, fg, bold = self.level_map[record.levelno] + params = [] + + if bg in self.color_map: + params.append(str(self.color_map[bg] + 40)) + + if fg in self.color_map: + params.append(str(self.color_map[fg] + 30)) + + if bold: + params.append('1') + + if params and message: + if message.lstrip() != message: + prefix = re.search(r"\s+", message).group(0) + message = message[len(prefix):] + else: + prefix = "" + + message = "%s%s" % (prefix, ''.join((self.csi, ';'.join(params), + 'm', message, self.reset))) + + return message + + def format(self, record): + message = logging.StreamHandler.format(self, record) + return self.colorize(message, record) + +logging.addLevelName(15, "INFO") +logger = logging.getLogger('nhentai') +LOGGER_HANDLER = ColorizingStreamHandler(sys.stdout) +FORMATTER = logging.Formatter("\r[%(asctime)s] [%(levelname)s] %(message)s", "%H:%M:%S") +LOGGER_HANDLER.setFormatter(FORMATTER) +LOGGER_HANDLER.level_map[logging.getLevelName("INFO")] = (None, "cyan", False) +logger.addHandler(LOGGER_HANDLER) +logger.setLevel(logging.DEBUG) + + +if __name__ == '__main__': + logger.log(15, 'hentai') + logger.info('info') + logger.warn('warn') + logger.debug('debug') + logger.error('error') + logger.critical('critical') diff --git a/hentai/parser.py b/hentai/parser.py index ee029a6..4ba9565 100644 --- a/hentai/parser.py +++ b/hentai/parser.py @@ -1,13 +1,12 @@ +import sys import re import requests from bs4 import BeautifulSoup from constant import DETAIL_URL - - -dojinshi_fields = ['Artists:'] - +from hentai.logger import logger def dojinshi_parser(id): + logger.debug('Fetching dojinshi information') if not isinstance(id, (int, )) or (isinstance(id, (str, )) and not id.isdigit()): raise Exception('Dojinshi id(%s) is not valid' % str(id)) id = int(id) @@ -25,6 +24,14 @@ def dojinshi_parser(id): dojinshi['name'] = title dojinshi['subtitle'] = subtitle.text if subtitle else '' + dojinshi_cover = html.find('div', attrs={'id': 'cover'}) + img_id = re.search('/galleries/([\d]+)/cover\.(jpg|png)$', dojinshi_cover.a.img['src']) + if not img_id: + logger.critical('Tried yo get image id failed') + sys.exit() + dojinshi['img_id'] = img_id.group(1) + dojinshi['ext'] = img_id.group(2) + pages = 0 for _ in dojinshi_info.find_all('div', class_=''): pages = re.search('([\d]+) pages', _.text) diff --git a/nhentai.py b/nhentai.py index 9f65464..00d15e8 100644 --- a/nhentai.py +++ b/nhentai.py @@ -1,13 +1,20 @@ -from hentai.cmdline import cmd_parser +#coding: utf-8 +from hentai.cmdline import cmd_parser, banner from hentai.parser import dojinshi_parser from hentai.dojinshi import Dojinshi from hentai.downloader import Downloader +from hentai.logger import logger + + +__version__ = '0.1' def main(): + banner() options = cmd_parser() dojinshi = None + logger.log(15, 'nHentai: あなたも変態。 いいね?') if options.id: dojinshi_info = dojinshi_parser(options.id) dojinshi = Dojinshi(**dojinshi_info) @@ -16,10 +23,12 @@ def main(): else: raise SystemExit - dojinshi.show() if options.is_download: - dojinshi.downloader = Downloader() + dojinshi.downloader = Downloader(path=options.saved_path, + thread=options.threads) dojinshi.download() + else: + dojinshi.show() if __name__ == '__main__': diff --git a/requirements.txt b/requirements.txt index 713b1e6..eb03d15 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ requests>=2.5.0 -wget>=2.2 BeautifulSoup4>=4.0.0 \ No newline at end of file