setup.py and add command line support

This commit is contained in:
ricterz
2015-05-10 01:11:57 +08:00
parent 0b9a70d59b
commit 530316cb91
10 changed files with 17 additions and 13 deletions

0
nhentai/__init__.py Normal file
View File

52
nhentai/cmdline.py Normal file
View File

@ -0,0 +1,52 @@
#coding: utf-8
from optparse import OptionParser
from itertools import ifilter
from logger import logger
def banner():
print ''' _ _ _ _
_ __ | | | | ___ _ __ | |_ __ _(_)
| '_ \| |_| |/ _ \ '_ \| __/ _` | |
| | | | _ | __/ | | | || (_| | |
|_| |_|_| |_|\___|_| |_|\__\__,_|_|
'''
def cmd_parser():
parser = OptionParser()
parser.add_option('--download', dest='is_download', action='store_true', help='download dojinshi or not')
parser.add_option('--id', type='int', dest='id', action='store', help='dojinshi id of nhentai')
parser.add_option('--ids', type='str', dest='ids', action='store', help='dojinshi id set, e.g. 1,2,3')
parser.add_option('--search', type='string', dest='keyword', action='store', help='keyword searched')
parser.add_option('--page', type='int', dest='page', action='store', default=1,
help='page number of search result')
parser.add_option('--path', type='string', dest='saved_path', action='store', default='',
help='path which save the dojinshi')
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=1,
help='thread count of download dojinshi')
args, _ = parser.parse_args()
if args.ids:
_ = map(lambda id: id.strip(), args.ids.split(','))
args.ids = set(map(int, ifilter(lambda id: id.isdigit(), _)))
if args.is_download and not args.id and not args.ids and not args.keyword:
logger.critical('Dojinshi id/ids is required for downloading')
parser.print_help()
raise SystemExit
if args.id:
args.ids = (args.id, ) if not args.ids else args.ids
if not args.keyword and not args.ids:
parser.print_help()
raise SystemExit
if args.threads <= 0:
args.threads = 1
elif args.threads > 10:
logger.critical('Maximum number of used threads is 10')
raise SystemExit
return args

52
nhentai/command.py Normal file
View File

@ -0,0 +1,52 @@
#!/usr/bin/env python2.7
#coding: utf-8
from nhentai.cmdline import cmd_parser, banner
from nhentai.parser import dojinshi_parser, search_parser, print_dojinshi
from nhentai.dojinshi import Dojinshi
from nhentai.downloader import Downloader
from nhentai.logger import logger
__version__ = '0.1'
def main():
banner()
options = cmd_parser()
logger.log(15, 'nHentai: あなたも変態。 いいね?')
dojinshi_ids = []
dojinshi_list = []
if options.keyword:
dojinshis = search_parser(options.keyword, options.page)
if options.is_download:
dojinshi_ids = map(lambda d: d['id'], dojinshis)
else:
print_dojinshi(dojinshis)
else:
dojinshi_ids = options.ids
if dojinshi_ids:
for id in dojinshi_ids:
dojinshi_info = dojinshi_parser(id)
dojinshi_list.append(Dojinshi(**dojinshi_info))
else:
logger.log(15, 'Nothing has been done.')
raise SystemExit
if options.is_download:
downloader = Downloader(path=options.saved_path, thread=options.threads)
for dojinshi in dojinshi_list:
dojinshi.downloader = downloader
dojinshi.download()
else:
map(lambda dojinshi: dojinshi.show(), dojinshi_list)
logger.log(15, u'🍺 All done.')
if __name__ == '__main__':
main()

5
nhentai/constant.py Normal file
View File

@ -0,0 +1,5 @@
SCHEMA = 'http://'
URL = '%snhentai.net' % SCHEMA
DETAIL_URL = '%s/g' % URL
IMAGE_URL = '%si.nhentai.net/galleries' % SCHEMA
SEARCH_URL = '%s/search/' % URL

45
nhentai/dojinshi.py Normal file
View File

@ -0,0 +1,45 @@
import Queue
from constant import DETAIL_URL, IMAGE_URL
from nhentai.logger import logger
class Dojinshi(object):
def __init__(self, name=None, subtitle=None, id=None, img_id=None, ext='jpg', pages=0):
self.name = name
self.subtitle = subtitle
self.id = id
self.img_id = img_id
self.ext = ext
self.pages = pages
self.downloader = None
self.url = '%s/%d' % (DETAIL_URL, self.id)
def __repr__(self):
return '<Dojinshi: %s>' % self.name
def show(self):
logger.info('Print dojinshi information')
print 'Dojinshi: %s' % self.name
print 'Subtitle: %s' % self.subtitle
print 'URL: %s' % self.url
print 'Pages: %d' % self.pages
def download(self):
logger.info('Start download dojinshi: %s' % self.name)
if self.downloader:
download_queue = Queue.Queue()
for i in xrange(1, self.pages + 1):
download_queue.put('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext))
self.downloader.download(download_queue, self.id)
else:
logger.critical('Downloader has not be loaded')
if __name__ == '__main__':
test = Dojinshi(name='test nhentai dojinshi', id=1)
print test
test.show()
try:
test.download()
except Exception, e:
print 'Exception: %s' % str(e)

95
nhentai/downloader.py Normal file
View File

@ -0,0 +1,95 @@
#coding: utf-8
import os
import sys
import socket
import threading
import Queue
import requests
from urlparse import urlparse
from nhentai.logger import logger
# global timeout
timeout = 30
socket.setdefaulttimeout(timeout)
class Downloader(object):
_instance = None
def __new__(cls, *args, **kwargs):
if not cls._instance:
cls._instance = super(Downloader, cls).__new__(cls, *args, **kwargs)
return cls._instance
def __init__(self, path='', thread=1):
if not isinstance(thread, (int, )) or thread < 1 or thread > 10:
raise ValueError('Invalid threads count')
self.path = str(path)
self.thread_count = thread
self.threads = []
def _download(self, url, folder='', filename=''):
if not os.path.exists(folder):
try:
os.mkdir(folder)
except os.error, e:
logger.error('Error %s' % str(e))
sys.exit()
filename = filename if filename else os.path.basename(urlparse(url).path)
try:
with open(os.path.join(folder, filename), "wb") as f:
response = requests.get(url, stream=True, timeout=10)
length = response.headers.get('content-length')
if length is None:
f.write(response.content)
else:
for chunk in response.iter_content(2048):
f.write(chunk)
except (os.error, IOError), e:
logger.error('Error %s' % e)
sys.exit()
except Exception, e:
raise e
logger.info('%s %s downloaded.' % (threading.currentThread().getName(), url))
def _download_thread(self, queue, folder=''):
while True:
if queue.empty():
queue.task_done()
break
try:
url = queue.get(False)
logger.info('%s downloading: %s ...' % (threading.currentThread().getName(), url))
self._download(url, folder)
except Queue.Empty:
break
def download(self, queue, folder=''):
if not isinstance(folder, (str, unicode)):
folder = str(folder)
if self.path:
folder = '%s/%s' % (self.path, folder)
if os.path.exists(path=folder):
logger.warn('Path \'%s\' already exist' % folder)
else:
logger.warn('Path \'%s\' not exist' % folder)
for i in range(self.thread_count):
_ = threading.Thread(target=self._download_thread, args=(queue, folder, ))
_.setDaemon(True)
self.threads.append(_)
for thread in self.threads:
thread.start()
for thread in self.threads:
thread.join()
# clean threads list
self.threads = []

154
nhentai/logger.py Normal file
View File

@ -0,0 +1,154 @@
import logging
#
# Copyright (C) 2010-2012 Vinay Sajip. All rights reserved. Licensed under the new BSD license.
#
import logging
import os
import re
import sys
class ColorizingStreamHandler(logging.StreamHandler):
# color names to indices
color_map = {
'black': 0,
'red': 1,
'green': 2,
'yellow': 3,
'blue': 4,
'magenta': 5,
'cyan': 6,
'white': 7,
}
# levels to (background, foreground, bold/intense)
if os.name == 'nt':
level_map = {
logging.DEBUG: (None, 'white', False),
logging.INFO: (None, 'green', False),
logging.WARNING: (None, 'yellow', False),
logging.ERROR: (None, 'red', False),
logging.CRITICAL: ('red', 'white', False)
}
else:
level_map = {
logging.DEBUG: (None, 'white', False),
logging.INFO: (None, 'green', False),
logging.WARNING: (None, 'yellow', False),
logging.ERROR: (None, 'red', False),
logging.CRITICAL: ('red', 'white', False)
}
csi = '\x1b['
reset = '\x1b[0m'
disable_coloring = False
@property
def is_tty(self):
isatty = getattr(self.stream, 'isatty', None)
return isatty and isatty() and not self.disable_coloring
if os.name != 'nt':
def output_colorized(self, message):
self.stream.write(message)
else:
ansi_esc = re.compile(r'\x1b\[((?:\d+)(?:;(?:\d+))*)m')
nt_color_map = {
0: 0x00, # black
1: 0x04, # red
2: 0x02, # green
3: 0x06, # yellow
4: 0x01, # blue
5: 0x05, # magenta
6: 0x03, # cyan
7: 0x07, # white
}
def output_colorized(self, message):
import ctypes
parts = self.ansi_esc.split(message)
write = self.stream.write
h = None
fd = getattr(self.stream, 'fileno', None)
if fd is not None:
fd = fd()
if fd in (1, 2): # stdout or stderr
h = ctypes.windll.kernel32.GetStdHandle(-10 - fd)
while parts:
text = parts.pop(0)
if text:
write(text)
if parts:
params = parts.pop(0)
if h is not None:
params = [int(p) for p in params.split(';')]
color = 0
for p in params:
if 40 <= p <= 47:
color |= self.nt_color_map[p - 40] << 4
elif 30 <= p <= 37:
color |= self.nt_color_map[p - 30]
elif p == 1:
color |= 0x08 # foreground intensity on
elif p == 0: # reset to default color
color = 0x07
else:
pass # error condition ignored
ctypes.windll.kernel32.SetConsoleTextAttribute(h, color)
def colorize(self, message, record):
if record.levelno in self.level_map and self.is_tty:
bg, fg, bold = self.level_map[record.levelno]
params = []
if bg in self.color_map:
params.append(str(self.color_map[bg] + 40))
if fg in self.color_map:
params.append(str(self.color_map[fg] + 30))
if bold:
params.append('1')
if params and message:
if message.lstrip() != message:
prefix = re.search(r"\s+", message).group(0)
message = message[len(prefix):]
else:
prefix = ""
message = "%s%s" % (prefix, ''.join((self.csi, ';'.join(params),
'm', message, self.reset)))
return message
def format(self, record):
message = logging.StreamHandler.format(self, record)
return self.colorize(message, record)
logging.addLevelName(15, "INFO")
logger = logging.getLogger('nhentai')
LOGGER_HANDLER = ColorizingStreamHandler(sys.stdout)
FORMATTER = logging.Formatter("\r[%(asctime)s] [%(levelname)s] %(message)s", "%H:%M:%S")
LOGGER_HANDLER.setFormatter(FORMATTER)
LOGGER_HANDLER.level_map[logging.getLevelName("INFO")] = (None, "cyan", False)
logger.addHandler(LOGGER_HANDLER)
logger.setLevel(logging.DEBUG)
if __name__ == '__main__':
logger.log(15, 'nhentai')
logger.info('info')
logger.warn('warn')
logger.debug('debug')
logger.error('error')
logger.critical('critical')

76
nhentai/parser.py Normal file
View File

@ -0,0 +1,76 @@
import sys
import re
import requests
from bs4 import BeautifulSoup
from constant import DETAIL_URL, SEARCH_URL
from nhentai.logger import logger
def dojinshi_parser(id):
if not isinstance(id, (int, )) and (isinstance(id, (str, )) and not id.isdigit()):
raise Exception('Dojinshi id(%s) is not valid' % str(id))
id = int(id)
logger.debug('Fetching dojinshi information of id %d' % id)
dojinshi = dict()
dojinshi['id'] = id
url = '%s/%d/' % (DETAIL_URL, id)
try:
response = requests.get(url).content
except Exception, e:
logger.critical('%s%s' % tuple(e.message))
sys.exit()
html = BeautifulSoup(response)
dojinshi_info = html.find('div', attrs={'id': 'info'})
title = dojinshi_info.find('h1').text
subtitle = dojinshi_info.find('h2')
dojinshi['name'] = title
dojinshi['subtitle'] = subtitle.text if subtitle else ''
dojinshi_cover = html.find('div', attrs={'id': 'cover'})
img_id = re.search('/galleries/([\d]+)/cover\.(jpg|png)$', dojinshi_cover.a.img['src'])
if not img_id:
logger.critical('Tried yo get image id failed')
sys.exit()
dojinshi['img_id'] = img_id.group(1)
dojinshi['ext'] = img_id.group(2)
pages = 0
for _ in dojinshi_info.find_all('div', class_=''):
pages = re.search('([\d]+) pages', _.text)
if pages:
pages = pages.group(1)
break
dojinshi['pages'] = int(pages)
return dojinshi
def search_parser(keyword, page):
logger.debug('Searching dojinshis of keyword %s' % keyword)
result = []
response = requests.get(SEARCH_URL, params={'q': keyword, 'page': page}).content
html = BeautifulSoup(response)
dojinshi_search_result = html.find_all('div', attrs={'class': 'preview-container'})
for dojinshi in dojinshi_search_result:
dojinshi_container = dojinshi.find('div', attrs={'class': 'caption'})
title = dojinshi_container.text.strip()
id_ = re.search('/g/(\d+)/', dojinshi.a['href']).group(1)
result.append({'id': id_, 'title': title})
return result
def print_dojinshi(dojinshi_list):
if not dojinshi_list:
return
logger.log(15, 'Print dojinshi list')
print '-' * 60
for dojinshi in dojinshi_list:
print dojinshi['id'], '-', dojinshi['title']
print '-' * 60
if __name__ == '__main__':
print dojinshi_parser(32271)