From 5c3dace937c2679aaecaa93449b8c1d924981ec7 Mon Sep 17 00:00:00 2001 From: RicterZ Date: Tue, 15 Jan 2019 21:12:20 +0800 Subject: [PATCH] tag page download #40 --- nhentai/parser.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/nhentai/parser.py b/nhentai/parser.py index e68885a..e5b8cd3 100644 --- a/nhentai/parser.py +++ b/nhentai/parser.py @@ -281,20 +281,21 @@ def tag_parser(tag_name, max_page=1): tag_name = tag_name.lower() tag_name = tag_name.replace(' ', '-') - logger.info('Searching for doujinshi with tag \'{0}\''.format(tag_name)) - response = request('get', url='%s/%s' % (constant.TAG_URL, tag_name)).content + for p in range(1, max_page + 1): + logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, tag_name)) + response = request('get', url='%s/%s?page=%d' % (constant.TAG_URL, tag_name, p)).content - html = BeautifulSoup(response, 'html.parser') - doujinshi_items = html.find_all('div', attrs={'class': 'gallery'}) - if not doujinshi_items: - logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) - return + html = BeautifulSoup(response, 'html.parser') + doujinshi_items = html.find_all('div', attrs={'class': 'gallery'}) + if not doujinshi_items: + logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) + return - for i in doujinshi_items[:2]: - doujinshi_id = i.a.attrs['href'].strip('/g') - doujinshi_title = i.a.text.strip() - doujinshi_title = doujinshi_title if len(doujinshi_title) < 85 else doujinshi_title[:82] + '...' - result.append({'title': doujinshi_title, 'id': doujinshi_id}) + for i in doujinshi_items: + doujinshi_id = i.a.attrs['href'].strip('/g') + doujinshi_title = i.a.text.strip() + doujinshi_title = doujinshi_title if len(doujinshi_title) < 85 else doujinshi_title[:82] + '...' + result.append({'title': doujinshi_title, 'id': doujinshi_id}) if not result: logger.warn('No results for tag \'{}\''.format(tag_name))