mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-04-19 10:21:19 +02:00
multithreading download
This commit is contained in:
parent
0a6285a868
commit
47c88050e4
@ -1 +1,11 @@
|
||||
__author__ = 'ricter'
|
||||
from optparse import OptionParser
|
||||
|
||||
|
||||
def cmd_parser():
|
||||
parser = OptionParser()
|
||||
parser.add_option('--search', type='string', dest='keyword', action='store')
|
||||
parser.add_option('--download', dest='is_download', action='store_true')
|
||||
parser.add_option('--id', type='int', dest='id', action='store')
|
||||
|
||||
args, _ = parser.parse_args()
|
||||
return args
|
||||
|
@ -1 +1,4 @@
|
||||
__author__ = 'ricter'
|
||||
SCHEMA = 'http://'
|
||||
URL = '%snhentai.net' % SCHEMA
|
||||
DETAIL_URL = '%s/g' % URL
|
||||
IMAGE_URL = '%si.nhentai.net/galleries' % SCHEMA
|
@ -1 +1,40 @@
|
||||
__author__ = 'ricter'
|
||||
import Queue
|
||||
from constant import DETAIL_URL, IMAGE_URL
|
||||
|
||||
|
||||
class Dojinshi(object):
|
||||
def __init__(self, name=None, subtitle=None, id=None, pages=0):
|
||||
self.name = name
|
||||
self.subtitle = subtitle
|
||||
self.id = id
|
||||
self.pages = pages
|
||||
self.downloader = None
|
||||
self.url = '%s/%d' % (DETAIL_URL, self.id)
|
||||
|
||||
def __repr__(self):
|
||||
return '<Dojinshi: %s>' % self.name
|
||||
|
||||
def show(self):
|
||||
print 'Dojinshi: %s' % self.name
|
||||
print 'Subtitle: %s' % self.subtitle
|
||||
print 'URL: %s' % self.url
|
||||
print 'Pages: %d' % self.pages
|
||||
|
||||
def download(self):
|
||||
if self.downloader:
|
||||
download_queue = Queue.Queue()
|
||||
for i in xrange(1, self.pages + 1):
|
||||
download_queue.put('%s/%d/%d.jpg' % (IMAGE_URL, self.id, i))
|
||||
self.downloader.download(download_queue)
|
||||
else:
|
||||
raise Exception('Downloader has not be loaded')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test = Dojinshi(name='test hentai dojinshi', id=1)
|
||||
print test
|
||||
test.show()
|
||||
try:
|
||||
test.download()
|
||||
except Exception, e:
|
||||
print 'Exception: %s' % str(e)
|
@ -1 +1,37 @@
|
||||
__author__ = 'ricter'
|
||||
import threading
|
||||
import Queue
|
||||
|
||||
|
||||
class Downloader(object):
|
||||
def __init__(self):
|
||||
self.threads = []
|
||||
|
||||
def _download(self, queue):
|
||||
while True:
|
||||
if not queue.qsize():
|
||||
queue.task_done()
|
||||
break
|
||||
try:
|
||||
url = queue.get(False)
|
||||
print 'Downloading: %s' % url
|
||||
except Queue.Empty:
|
||||
break
|
||||
|
||||
def download(self, queue):
|
||||
for i in range(10):
|
||||
_ = threading.Thread(target=self._download, args=(queue, ))
|
||||
self.threads.append(_)
|
||||
|
||||
for i in self.threads:
|
||||
i.start()
|
||||
|
||||
for i in self.threads:
|
||||
i.join()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
d = Downloader()
|
||||
q = Queue.Queue()
|
||||
for i in range(0, 50):
|
||||
q.put(i)
|
||||
d.download(q)
|
||||
|
@ -1 +1,39 @@
|
||||
__author__ = 'ricter'
|
||||
import re
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from constant import DETAIL_URL
|
||||
|
||||
|
||||
dojinshi_fields = ['Artists:']
|
||||
|
||||
|
||||
def dojinshi_parser(id):
|
||||
if not isinstance(id, (int, )) or (isinstance(id, (str, )) and not id.isdigit()):
|
||||
raise Exception('Dojinshi id(%s) is not valid' % str(id))
|
||||
id = int(id)
|
||||
dojinshi = dict()
|
||||
dojinshi['id'] = id
|
||||
url = '%s/%d/' % (DETAIL_URL, id)
|
||||
|
||||
response = requests.get(url).content
|
||||
html = BeautifulSoup(response)
|
||||
dojinshi_info = html.find('div', attrs={'id': 'info'})
|
||||
|
||||
title = dojinshi_info.find('h1').text
|
||||
subtitle = dojinshi_info.find('h2')
|
||||
|
||||
dojinshi['name'] = title
|
||||
dojinshi['subtitle'] = subtitle.text if subtitle else ''
|
||||
|
||||
pages = 0
|
||||
for _ in dojinshi_info.find_all('div', class_=''):
|
||||
pages = re.search('([\d]+) pages', _.text)
|
||||
if pages:
|
||||
pages = pages.group(1)
|
||||
break
|
||||
dojinshi['pages'] = int(pages)
|
||||
return dojinshi
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print dojinshi_parser(32271)
|
26
nhentai.py
26
nhentai.py
@ -0,0 +1,26 @@
|
||||
from hentai.cmdline import cmd_parser
|
||||
from hentai.parser import dojinshi_parser
|
||||
from hentai.dojinshi import Dojinshi
|
||||
from hentai.downloader import Downloader
|
||||
|
||||
|
||||
def main():
|
||||
options = cmd_parser()
|
||||
dojinshi = None
|
||||
|
||||
if options.id:
|
||||
dojinshi_info = dojinshi_parser(options.id)
|
||||
dojinshi = Dojinshi(**dojinshi_info)
|
||||
elif options.keyword:
|
||||
pass
|
||||
else:
|
||||
raise SystemExit
|
||||
|
||||
dojinshi.show()
|
||||
if options.is_download:
|
||||
dojinshi.downloader = Downloader()
|
||||
dojinshi.download()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,3 @@
|
||||
requests>=2.5.0
|
||||
wget>=2.2
|
||||
BeautifulSoup4>=4.0.0
|
Loading…
x
Reference in New Issue
Block a user