multithreading download

2026-01-06 05:41:36 +01:00 · 2015-04-19 13:13:55 +08:00
parent 0a6285a868
commit 47c88050e4
7 changed files with 160 additions and 5 deletions
--- a/hentai/cmdline.py
+++ b/hentai/cmdline.py
@@ -1 +1,11 @@
-__author__ = 'ricter'
+from optparse import OptionParser
 def cmd_parser():
    parser = OptionParser()
    parser.add_option('--search', type='string', dest='keyword', action='store')
    parser.add_option('--download', dest='is_download', action='store_true')
    parser.add_option('--id', type='int', dest='id', action='store')
    args, _ = parser.parse_args()
    return args
--- a/hentai/constant.py
+++ b/hentai/constant.py
@@ -1 +1,4 @@
-__author__ = 'ricter'
+SCHEMA = 'http://'
 URL = '%snhentai.net' % SCHEMA
 DETAIL_URL = '%s/g' % URL
 IMAGE_URL = '%si.nhentai.net/galleries' % SCHEMA
--- a/hentai/dojinshi.py
+++ b/hentai/dojinshi.py
@@ -1 +1,40 @@
-__author__ = 'ricter'
+import Queue
 from constant import DETAIL_URL, IMAGE_URL
 class Dojinshi(object):
    def __init__(self, name=None, subtitle=None, id=None, pages=0):
        self.name = name
        self.subtitle = subtitle
        self.id = id
        self.pages = pages
        self.downloader = None
        self.url = '%s/%d' % (DETAIL_URL, self.id)
    def __repr__(self):
        return '<Dojinshi: %s>' % self.name
    def show(self):
        print 'Dojinshi: %s' % self.name
        print 'Subtitle: %s' % self.subtitle
        print 'URL: %s' % self.url
        print 'Pages: %d' % self.pages
    def download(self):
        if self.downloader:
            download_queue = Queue.Queue()
            for i in xrange(1, self.pages + 1):
                download_queue.put('%s/%d/%d.jpg' % (IMAGE_URL, self.id, i))
            self.downloader.download(download_queue)
        else:
            raise Exception('Downloader has not be loaded')
 if __name__ == '__main__':
    test = Dojinshi(name='test hentai dojinshi', id=1)
    print test
    test.show()
    try:
        test.download()
    except Exception, e:
        print 'Exception: %s' % str(e)
--- a/hentai/downloader.py
+++ b/hentai/downloader.py
@@ -1 +1,37 @@
-__author__ = 'ricter'
+import threading
 import Queue
 class Downloader(object):
    def __init__(self):
        self.threads = []
    def _download(self, queue):
        while True:
            if not queue.qsize():
                queue.task_done()
                break
            try:
                url = queue.get(False)
                print 'Downloading: %s' % url
            except Queue.Empty:
                break
    def download(self, queue):
        for i in range(10):
            _ = threading.Thread(target=self._download, args=(queue, ))
            self.threads.append(_)
        for i in self.threads:
            i.start()
        for i in self.threads:
            i.join()
 if __name__ == '__main__':
    d = Downloader()
    q = Queue.Queue()
    for i in range(0, 50):
        q.put(i)
    d.download(q)
--- a/hentai/parser.py
+++ b/hentai/parser.py
@@ -1 +1,39 @@
-__author__ = 'ricter'
+import re
 import requests
 from bs4 import BeautifulSoup
 from constant import DETAIL_URL
 dojinshi_fields = ['Artists:']
 def dojinshi_parser(id):
    if not isinstance(id, (int, )) or (isinstance(id, (str, )) and not id.isdigit()):
        raise Exception('Dojinshi id(%s) is not valid' % str(id))
    id = int(id)
    dojinshi = dict()
    dojinshi['id'] = id
    url = '%s/%d/' % (DETAIL_URL, id)
    response = requests.get(url).content
    html = BeautifulSoup(response)
    dojinshi_info = html.find('div', attrs={'id': 'info'})
    title = dojinshi_info.find('h1').text
    subtitle = dojinshi_info.find('h2')
    dojinshi['name'] = title
    dojinshi['subtitle'] = subtitle.text if subtitle else ''
    pages = 0
    for _ in dojinshi_info.find_all('div', class_=''):
        pages = re.search('([\d]+) pages', _.text)
        if pages:
            pages = pages.group(1)
            break
    dojinshi['pages'] = int(pages)
    return dojinshi
 if __name__ == '__main__':
    print dojinshi_parser(32271)
--- a/nhentai.py
+++ b/nhentai.py
@@ -0,0 +1,26 @@
 from hentai.cmdline import cmd_parser
 from hentai.parser import dojinshi_parser
 from hentai.dojinshi import Dojinshi
 from hentai.downloader import Downloader
 def main():
    options = cmd_parser()
    dojinshi = None
    if options.id:
        dojinshi_info = dojinshi_parser(options.id)
        dojinshi = Dojinshi(**dojinshi_info)
    elif options.keyword:
        pass
    else:
        raise SystemExit
    dojinshi.show()
    if options.is_download:
        dojinshi.downloader = Downloader()
        dojinshi.download()
 if __name__ == '__main__':
    main()
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,3 @@
 requests>=2.5.0
 wget>=2.2
 BeautifulSoup4>=4.0.0