legacy search by @gayspacegems of issue #265

This commit is contained in:
Ricter Z 2023-02-07 19:40:52 +08:00
parent 8972026456
commit f66653c55e

View File

@ -241,37 +241,32 @@ def print_doujinshi(doujinshi_list):
def legacy_search_parser(keyword, sorting, page, is_page_all=False): def legacy_search_parser(keyword, sorting, page, is_page_all=False):
logger.debug(f'Searching doujinshis of keyword {keyword}') logger.info(f'Searching doujinshis of keyword {keyword}')
response = None
result = [] result = []
if is_page_all and len(page) != 1: if is_page_all:
# `--page-all` option will override the `--page` option response = request('get', url=constant.LEGACY_SEARCH_URL,
page = [1] params={'q': keyword, 'page': 1, 'sort': sorting}).content
html = BeautifulSoup(response, 'lxml')
pagination = html.find(attrs={'class': 'pagination'})
last_page = pagination.find(attrs={'class': 'last'})
last_page = re.findall('page=([0-9]+)', last_page.attrs['href'])[0]
logger.info(f'Getting doujinshi ids of {last_page} pages')
pages = range(1, int(last_page))
else:
pages = page
for p in page: for p in pages:
logger.debug(f'Fetching page {p} ...') logger.info(f'Fetching page {p} ...')
response = request('get', url=constant.LEGACY_SEARCH_URL, response = request('get', url=constant.LEGACY_SEARCH_URL,
params={'q': keyword, 'page': p, 'sort': sorting}).content params={'q': keyword, 'page': p, 'sort': sorting}).content
if response is None:
logger.warning(f'No result in response in page {p}')
continue
result.extend(_get_title_and_id(response)) result.extend(_get_title_and_id(response))
if not result: if not result:
logger.warning(f'Not found anything of keyword {keyword} on page {page[0]}') logger.warning(f'No results for keywords {keyword}')
return result
if is_page_all:
html = BeautifulSoup(response, 'lxml')
pagination = html.find(attrs={'class': 'pagination'})
next_page = pagination.find(attrs={'class': 'next'})
if next_page is None:
logger.warning('Reached the last page')
return result
else:
next_page = re.findall('page=([0-9]+)', next_page.attrs['href'])[0]
result.extend(legacy_search_parser(keyword, sorting, [next_page], is_page_all))
return result
return result return result