mirror of
				https://github.com/RicterZ/nhentai.git
				synced 2025-11-04 02:50:55 +01:00 
			
		
		
		
	multithreading download
This commit is contained in:
		@@ -1 +1,11 @@
 | 
			
		||||
__author__ = 'ricter'
 | 
			
		||||
from optparse import OptionParser
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cmd_parser():
 | 
			
		||||
    parser = OptionParser()
 | 
			
		||||
    parser.add_option('--search', type='string', dest='keyword', action='store')
 | 
			
		||||
    parser.add_option('--download', dest='is_download', action='store_true')
 | 
			
		||||
    parser.add_option('--id', type='int', dest='id', action='store')
 | 
			
		||||
 | 
			
		||||
    args, _ = parser.parse_args()
 | 
			
		||||
    return args
 | 
			
		||||
 
 | 
			
		||||
@@ -1 +1,4 @@
 | 
			
		||||
__author__ = 'ricter'
 | 
			
		||||
SCHEMA = 'http://'
 | 
			
		||||
URL = '%snhentai.net' % SCHEMA
 | 
			
		||||
DETAIL_URL = '%s/g' % URL
 | 
			
		||||
IMAGE_URL = '%si.nhentai.net/galleries' % SCHEMA
 | 
			
		||||
@@ -1 +1,40 @@
 | 
			
		||||
__author__ = 'ricter'
 | 
			
		||||
import Queue
 | 
			
		||||
from constant import DETAIL_URL, IMAGE_URL
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Dojinshi(object):
 | 
			
		||||
    def __init__(self, name=None, subtitle=None, id=None, pages=0):
 | 
			
		||||
        self.name = name
 | 
			
		||||
        self.subtitle = subtitle
 | 
			
		||||
        self.id = id
 | 
			
		||||
        self.pages = pages
 | 
			
		||||
        self.downloader = None
 | 
			
		||||
        self.url = '%s/%d' % (DETAIL_URL, self.id)
 | 
			
		||||
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return '<Dojinshi: %s>' % self.name
 | 
			
		||||
 | 
			
		||||
    def show(self):
 | 
			
		||||
        print 'Dojinshi: %s' % self.name
 | 
			
		||||
        print 'Subtitle: %s' % self.subtitle
 | 
			
		||||
        print 'URL: %s' % self.url
 | 
			
		||||
        print 'Pages: %d' % self.pages
 | 
			
		||||
 | 
			
		||||
    def download(self):
 | 
			
		||||
        if self.downloader:
 | 
			
		||||
            download_queue = Queue.Queue()
 | 
			
		||||
            for i in xrange(1, self.pages + 1):
 | 
			
		||||
                download_queue.put('%s/%d/%d.jpg' % (IMAGE_URL, self.id, i))
 | 
			
		||||
            self.downloader.download(download_queue)
 | 
			
		||||
        else:
 | 
			
		||||
            raise Exception('Downloader has not be loaded')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    test = Dojinshi(name='test hentai dojinshi', id=1)
 | 
			
		||||
    print test
 | 
			
		||||
    test.show()
 | 
			
		||||
    try:
 | 
			
		||||
        test.download()
 | 
			
		||||
    except Exception, e:
 | 
			
		||||
        print 'Exception: %s' % str(e)
 | 
			
		||||
@@ -1 +1,37 @@
 | 
			
		||||
__author__ = 'ricter'
 | 
			
		||||
import threading
 | 
			
		||||
import Queue
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Downloader(object):
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.threads = []
 | 
			
		||||
 | 
			
		||||
    def _download(self, queue):
 | 
			
		||||
        while True:
 | 
			
		||||
            if not queue.qsize():
 | 
			
		||||
                queue.task_done()
 | 
			
		||||
                break
 | 
			
		||||
            try:
 | 
			
		||||
                url = queue.get(False)
 | 
			
		||||
                print 'Downloading: %s' % url
 | 
			
		||||
            except Queue.Empty:
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
    def download(self, queue):
 | 
			
		||||
        for i in range(10):
 | 
			
		||||
            _ = threading.Thread(target=self._download, args=(queue, ))
 | 
			
		||||
            self.threads.append(_)
 | 
			
		||||
 | 
			
		||||
        for i in self.threads:
 | 
			
		||||
            i.start()
 | 
			
		||||
 | 
			
		||||
        for i in self.threads:
 | 
			
		||||
            i.join()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    d = Downloader()
 | 
			
		||||
    q = Queue.Queue()
 | 
			
		||||
    for i in range(0, 50):
 | 
			
		||||
        q.put(i)
 | 
			
		||||
    d.download(q)
 | 
			
		||||
 
 | 
			
		||||
@@ -1 +1,39 @@
 | 
			
		||||
__author__ = 'ricter'
 | 
			
		||||
import re
 | 
			
		||||
import requests
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from constant import DETAIL_URL
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
dojinshi_fields = ['Artists:']
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def dojinshi_parser(id):
 | 
			
		||||
    if not isinstance(id, (int, )) or (isinstance(id, (str, )) and not id.isdigit()):
 | 
			
		||||
        raise Exception('Dojinshi id(%s) is not valid' % str(id))
 | 
			
		||||
    id = int(id)
 | 
			
		||||
    dojinshi = dict()
 | 
			
		||||
    dojinshi['id'] = id
 | 
			
		||||
    url = '%s/%d/' % (DETAIL_URL, id)
 | 
			
		||||
 | 
			
		||||
    response = requests.get(url).content
 | 
			
		||||
    html = BeautifulSoup(response)
 | 
			
		||||
    dojinshi_info = html.find('div', attrs={'id': 'info'})
 | 
			
		||||
 | 
			
		||||
    title = dojinshi_info.find('h1').text
 | 
			
		||||
    subtitle = dojinshi_info.find('h2')
 | 
			
		||||
 | 
			
		||||
    dojinshi['name'] = title
 | 
			
		||||
    dojinshi['subtitle'] = subtitle.text if subtitle else ''
 | 
			
		||||
 | 
			
		||||
    pages = 0
 | 
			
		||||
    for _ in dojinshi_info.find_all('div', class_=''):
 | 
			
		||||
        pages = re.search('([\d]+) pages', _.text)
 | 
			
		||||
        if pages:
 | 
			
		||||
            pages = pages.group(1)
 | 
			
		||||
            break
 | 
			
		||||
    dojinshi['pages'] = int(pages)
 | 
			
		||||
    return dojinshi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    print dojinshi_parser(32271)
 | 
			
		||||
		Reference in New Issue
	
	Block a user