Merge pull request #28 from tbinavsl/master

Max retries + misc. language fixes
This commit is contained in:
Ricter Zheng 2018-09-28 13:28:44 +08:00 committed by GitHub
commit 86c31f9b5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 103 additions and 49 deletions

View File

@ -40,23 +40,23 @@ def cmd_parser():
'\n\nEnvironment Variable:\n' '\n\nEnvironment Variable:\n'
' NHENTAI nhentai mirror url') ' NHENTAI nhentai mirror url')
parser.add_option('--download', dest='is_download', action='store_true', parser.add_option('--download', dest='is_download', action='store_true',
help='download doujinshi (for search result)') help='download doujinshi (for search results)')
parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information') parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information')
parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3')
parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword')
parser.add_option('--page', type='int', dest='page', action='store', default=1, parser.add_option('--page', type='int', dest='page', action='store', default=1,
help='page number of search result') help='page number of search results')
parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag') parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag')
parser.add_option('--max-page', type='int', dest='max_page', action='store', default=1, parser.add_option('--max-page', type='int', dest='max_page', action='store', default=1,
help='The max page when recursive download tagged doujinshi') help='The max page when recursive download tagged doujinshi')
parser.add_option('--output', type='string', dest='output_dir', action='store', default='', parser.add_option('--output', type='string', dest='output_dir', action='store', default='',
help='output dir') help='output dir')
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
help='thread count of download doujinshi') help='thread count for downloading doujinshi')
parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30, parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30,
help='timeout of download doujinshi') help='timeout for downloading doujinshi')
parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', parser.add_option('--proxy', type='string', dest='proxy', action='store', default='',
help='use proxy, example: http://127.0.0.1:1080') help='uses a proxy, for example: http://127.0.0.1:1080')
parser.add_option('--html', dest='html_viewer', action='store_true', parser.add_option('--html', dest='html_viewer', action='store_true',
help='generate a html viewer at current directory') help='generate a html viewer at current directory')

View File

@ -23,7 +23,7 @@ def main():
if options.login: if options.login:
username, password = options.login.split(':', 1) username, password = options.login.split(':', 1)
logger.info('Login to nhentai use credential \'%s:%s\'' % (username, '*' * len(password))) logger.info('Logging in to nhentai using credential pair \'%s:%s\'' % (username, '*' * len(password)))
for doujinshi_info in login_parser(username=username, password=password): for doujinshi_info in login_parser(username=username, password=password):
doujinshi_list.append(Doujinshi(**doujinshi_info)) doujinshi_list.append(Doujinshi(**doujinshi_info))
@ -68,7 +68,7 @@ def main():
def signal_handler(signal, frame): def signal_handler(signal, frame):
logger.error('Ctrl-C signal received. Quit.') logger.error('Ctrl-C signal received. Stopping...')
exit(1) exit(1)

View File

@ -11,6 +11,7 @@ from nhentai.utils import format_filename
EXT_MAP = { EXT_MAP = {
'j': 'jpg', 'j': 'jpg',
'p': 'png', 'p': 'png',
'g': 'gif',
} }
@ -53,7 +54,7 @@ class Doujinshi(object):
logger.info(u'Print doujinshi information of {0}\n{1}'.format(self.id, tabulate(table))) logger.info(u'Print doujinshi information of {0}\n{1}'.format(self.id, tabulate(table)))
def download(self): def download(self):
logger.info('Start download doujinshi: %s' % self.name) logger.info('Starting to download doujinshi: %s' % self.name)
if self.downloader: if self.downloader:
download_queue = [] download_queue = []
for i in range(len(self.ext)): for i in range(len(self.ext)):
@ -61,7 +62,7 @@ class Doujinshi(object):
self.downloader.download(download_queue, format_filename('%s-%s' % (self.id, self.name[:200]))) self.downloader.download(download_queue, format_filename('%s-%s' % (self.id, self.name[:200])))
else: else:
logger.critical('Downloader has not be loaded') logger.critical('Downloader has not been loaded')
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -32,17 +32,27 @@ class Downloader(Singleton):
self.timeout = timeout self.timeout = timeout
def _download(self, url, folder='', filename='', retried=0): def _download(self, url, folder='', filename='', retried=0):
logger.info('Start downloading: {0} ...'.format(url)) logger.info('Starting to download {0} ...'.format(url))
filename = filename if filename else os.path.basename(urlparse(url).path) filename = filename if filename else os.path.basename(urlparse(url).path)
base_filename, extension = os.path.splitext(filename) base_filename, extension = os.path.splitext(filename)
try: try:
if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)): if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
logger.warning('File: {0} existed, ignore.'.format(os.path.join(folder, base_filename.zfill(3) + logger.warning('File: {0} exists, ignoring'.format(os.path.join(folder, base_filename.zfill(3) +
extension))) extension)))
return 1, url return 1, url
with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f: with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
i=0
while i<10:
try:
response = request('get', url, stream=True, timeout=self.timeout) response = request('get', url, stream=True, timeout=self.timeout)
except Exception as e:
i+=1
if not i<10:
logger.critical(str(e))
return 0, None
continue
break
if response.status_code != 200: if response.status_code != 200:
raise NhentaiImageNotExistException raise NhentaiImageNotExistException
length = response.headers.get('content-length') length = response.headers.get('content-length')
@ -77,7 +87,7 @@ class Downloader(Singleton):
elif result == -1: elif result == -1:
logger.warning('url {} return status code 404'.format(data)) logger.warning('url {} return status code 404'.format(data))
else: else:
logger.log(15, '{0} download successfully'.format(data)) logger.log(15, '{0} downloaded successfully'.format(data))
def download(self, queue, folder=''): def download(self, queue, folder=''):
if not isinstance(folder, text): if not isinstance(folder, text):
@ -87,7 +97,7 @@ class Downloader(Singleton):
folder = os.path.join(self.path, folder) folder = os.path.join(self.path, folder)
if not os.path.exists(folder): if not os.path.exists(folder):
logger.warn('Path \'{0}\' not exist.'.format(folder)) logger.warn('Path \'{0}\' does not exist, creating.'.format(folder))
try: try:
os.makedirs(folder) os.makedirs(folder)
except EnvironmentError as e: except EnvironmentError as e:

View File

@ -5,6 +5,7 @@ import os
import re import re
import threadpool import threadpool
import requests import requests
import time
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from tabulate import tabulate from tabulate import tabulate
@ -40,14 +41,14 @@ def login_parser(username, password):
'password': password, 'password': password,
} }
resp = s.post(constant.LOGIN_URL, data=login_dict) resp = s.post(constant.LOGIN_URL, data=login_dict)
if 'Invalid username (or email) or password' in resp.text: if 'Invalid username/email or password' in resp.text:
logger.error('Login failed, please check your username and password') logger.error('Login failed, please check your username and password')
exit(1) exit(1)
html = BeautifulSoup(s.get(constant.FAV_URL).content, 'html.parser') html = BeautifulSoup(s.get(constant.FAV_URL).content, 'html.parser')
count = html.find('span', attrs={'class': 'count'}) count = html.find('span', attrs={'class': 'count'})
if not count: if not count:
logger.error('Cannot get count of your favorites, maybe login failed.') logger.error("Can't get your number of favorited doujins. Did the login failed?")
count = int(count.text.strip('(').strip(')')) count = int(count.text.strip('(').strip(')'))
if count == 0: if count == 0:
@ -60,7 +61,7 @@ def login_parser(username, password):
else: else:
pages = 1 pages = 1
logger.info('Your have %d favorites in %d pages.' % (count, pages)) logger.info('You have %d favorites in %d pages.' % (count, pages))
if os.getenv('DEBUG'): if os.getenv('DEBUG'):
pages = 1 pages = 1
@ -75,7 +76,7 @@ def login_parser(username, password):
for page in range(1, pages+1): for page in range(1, pages+1):
try: try:
logger.info('Getting doujinshi id of page %d' % page) logger.info('Getting doujinshi ids of page %d' % page)
resp = s.get(constant.FAV_URL + '?page=%d' % page).text resp = s.get(constant.FAV_URL + '?page=%d' % page).text
ids = doujinshi_id.findall(resp) ids = doujinshi_id.findall(resp)
requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback) requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback)
@ -92,16 +93,21 @@ def doujinshi_parser(id_):
raise Exception('Doujinshi id({0}) is not valid'.format(id_)) raise Exception('Doujinshi id({0}) is not valid'.format(id_))
id_ = int(id_) id_ = int(id_)
logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_)) logger.log(15, 'Fetching information of doujinshi id {0}'.format(id_))
doujinshi = dict() doujinshi = dict()
doujinshi['id'] = id_ doujinshi['id'] = id_
url = '{0}/{1}'.format(constant.DETAIL_URL, id_) url = '{0}/{1}'.format(constant.DETAIL_URL, id_)
i=0
while i<5:
try: try:
response = request('get', url).json() response = request('get', url).json()
except Exception as e: except Exception as e:
i+=1
if not i<5:
logger.critical(str(e)) logger.critical(str(e))
exit(1) exit(1)
continue
break
doujinshi['name'] = response['title']['english'] doujinshi['name'] = response['title']['english']
doujinshi['subtitle'] = response['title']['japanese'] doujinshi['subtitle'] = response['title']['japanese']
@ -130,16 +136,23 @@ def doujinshi_parser(id_):
def search_parser(keyword, page): def search_parser(keyword, page):
logger.debug('Searching doujinshis of keyword {0}'.format(keyword)) logger.debug('Searching doujinshis using keywords {0}'.format(keyword))
result = [] result = []
i=0
while i<5:
try: try:
response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json() response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json()
if 'result' not in response: except Exception as e:
raise Exception('No result in response') i+=1
except requests.ConnectionError as e: if not i<5:
logger.critical(e) logger.critical(str(e))
logger.warn('If you are in China, please configure the proxy to fu*k GFW.') logger.warn('If you are in China, please configure the proxy to fu*k GFW.')
exit(1) exit(1)
continue
break
if 'result' not in response:
raise Exception('No result in response')
for row in response['result']: for row in response['result']:
title = row['title']['english'] title = row['title']['english']
@ -147,7 +160,7 @@ def search_parser(keyword, page):
result.append({'id': row['id'], 'title': title}) result.append({'id': row['id'], 'title': title})
if not result: if not result:
logger.warn('Not found anything of keyword {}'.format(keyword)) logger.warn('No results for keywords {}'.format(keyword))
return result return result
@ -157,29 +170,48 @@ def print_doujinshi(doujinshi_list):
return return
doujinshi_list = [(i['id'], i['title']) for i in doujinshi_list] doujinshi_list = [(i['id'], i['title']) for i in doujinshi_list]
headers = ['id', 'doujinshi'] headers = ['id', 'doujinshi']
data = tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst') logger.info('Search Result\n' +
logger.info('Search Result\n{}'.format(data)) tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
def tag_parser(tag_id, max_page=1): def tag_parser(tag_id, max_page=1):
logger.info('Get doujinshi of tag id: {0}'.format(tag_id)) logger.info('Searching for doujinshi with tag id {0}'.format(tag_id))
result = [] result = []
i=0
while i<5:
try:
response = request('get', url=constant.TAG_API_URL, params={'sort': 'popular', 'tag_id': tag_id}).json() response = request('get', url=constant.TAG_API_URL, params={'sort': 'popular', 'tag_id': tag_id}).json()
except Exception as e:
i+=1
if not i<5:
logger.critical(str(e))
exit(1)
continue
break
page = max_page if max_page <= response['num_pages'] else int(response['num_pages']) page = max_page if max_page <= response['num_pages'] else int(response['num_pages'])
for i in range(1, page+1): for i in range(1, page+1):
logger.info('Get page {} ...'.format(i)) logger.info('Getting page {} ...'.format(i))
if page != 1: if page != 1:
i=0
while i<5:
try:
response = request('get', url=constant.TAG_API_URL, params={'sort': 'popular', 'tag_id': tag_id}).json() response = request('get', url=constant.TAG_API_URL, params={'sort': 'popular', 'tag_id': tag_id}).json()
except Exception as e:
i+=1
if not i<5:
logger.critical(str(e))
exit(1)
continue
break
for row in response['result']: for row in response['result']:
title = row['title']['english'] title = row['title']['english']
title = title[:85] + '..' if len(title) > 85 else title title = title[:85] + '..' if len(title) > 85 else title
result.append({'id': row['id'], 'title': title}) result.append({'id': row['id'], 'title': title})
if not result: if not result:
logger.warn('Not found anything of tag id {}'.format(tag_id)) logger.warn('No results for tag id {}'.format(tag_id))
return result return result
@ -188,7 +220,18 @@ def tag_guessing(tag_name):
tag_name = tag_name.lower() tag_name = tag_name.lower()
tag_name = tag_name.replace(' ', '-') tag_name = tag_name.replace(' ', '-')
logger.info('Trying to get tag_id of tag \'{0}\''.format(tag_name)) logger.info('Trying to get tag_id of tag \'{0}\''.format(tag_name))
i=0
while i<5:
try:
response = request('get', url='%s/%s' % (constant.TAG_URL, tag_name)).content response = request('get', url='%s/%s' % (constant.TAG_URL, tag_name)).content
except Exception as e:
i+=1
if not i<5:
logger.critical(str(e))
exit(1)
continue
break
html = BeautifulSoup(response, 'html.parser') html = BeautifulSoup(response, 'html.parser')
first_item = html.find('div', attrs={'class': 'gallery'}) first_item = html.find('div', attrs={'class': 'gallery'})
if not first_item: if not first_item: