update search_parse and use tabulate

This commit is contained in:
Ricter Z 2016-02-02 13:25:37 +08:00
parent b85af08ca0
commit 3da4418f22
5 changed files with 14 additions and 15 deletions

View File

@ -1,4 +1,4 @@
#coding: utf-8
# coding: utf-8
from __future__ import print_function
from optparse import OptionParser
from logger import logger
@ -27,7 +27,7 @@ def cmd_parser():
help='page number of search result')
parser.add_option('--path', type='string', dest='saved_path', action='store', default='',
help='path which save the dojinshi')
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=1,
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
help='thread count of download dojinshi')
parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30,
help='timeout of download dojinshi')

View File

@ -19,10 +19,9 @@ def main():
if options.keyword:
dojinshis = search_parser(options.keyword, options.page)
print_dojinshi(dojinshis)
if options.is_download:
dojinshi_ids = map(lambda d: d['id'], dojinshis)
else:
print_dojinshi(dojinshis)
else:
dojinshi_ids = options.ids
@ -31,7 +30,6 @@ def main():
dojinshi_info = dojinshi_parser(id)
dojinshi_list.append(Dojinshi(**dojinshi_info))
else:
logger.log(15, 'Nothing has been done.')
raise SystemExit
if options.is_download:

View File

@ -61,7 +61,7 @@ class Downloader(object):
if not os.path.exists(folder):
logger.warn('Path \'%s\' not exist.' % folder)
try:
os.mkdir(folder)
os.makedirs(folder)
except EnvironmentError as e:
logger.critical('Error: %s' % str(e))
raise SystemExit

View File

@ -5,6 +5,7 @@ import requests
from bs4 import BeautifulSoup
from constant import DETAIL_URL, SEARCH_URL
from logger import logger
from tabulate import tabulate
def dojinshi_parser(id):
@ -54,10 +55,11 @@ def search_parser(keyword, page):
result = []
response = requests.get(SEARCH_URL, params={'q': keyword, 'page': page}).content
html = BeautifulSoup(response)
dojinshi_search_result = html.find_all('div', attrs={'class': 'preview-container'})
dojinshi_search_result = html.find_all('div', attrs={'class': 'gallery'})
for dojinshi in dojinshi_search_result:
dojinshi_container = dojinshi.find('div', attrs={'class': 'caption'})
title = dojinshi_container.text.strip()
title = (title[:85] + '..') if len(title) > 85 else title
id_ = re.search('/g/(\d+)/', dojinshi.a['href']).group(1)
result.append({'id': id_, 'title': title})
return result
@ -66,12 +68,10 @@ def search_parser(keyword, page):
def print_dojinshi(dojinshi_list):
if not dojinshi_list:
return
logger.log(15, 'Print dojinshi list')
print('-' * 60)
for dojinshi in dojinshi_list:
print(dojinshi['id'], '-', dojinshi['title'])
print('-' * 60)
dojinshi_list = [i.values() for i in dojinshi_list]
headers = ['id', 'dojinshi']
logger.info('Search Result\n' +
tabulate(tabular_data=dojinshi_list, headers=headers, tablefmt='rst'))
if __name__ == '__main__':
print(dojinshi_parser(32271))
print(dojinshi_parser("32271"))

View File

@ -1,3 +1,4 @@
requests>=2.5.0
BeautifulSoup4>=4.0.0
threadpool>=1.2.7
threadpool>=1.2.7
tabulate>=0.7.5