mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-04-19 10:21:19 +02:00
Merge pull request #268 from RicterZ/dev
enhancement of legacy search parser
This commit is contained in:
commit
b56e5b63a9
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@ -5,7 +5,7 @@ name: Python application
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "master" ]
|
||||
branches: [ "master", "dev" ]
|
||||
pull_request:
|
||||
branches: [ "master" ]
|
||||
|
||||
@ -20,7 +20,7 @@ jobs:
|
||||
steps:
|
||||
- name: Set env
|
||||
run: |
|
||||
echo "NHENTAI_COOKIE=csrftoken=zV4qotZJrHKTeEc9uEg5uvXV5qwTcZIHRjF3TgbkvP6OoxZNj8I6RFQeGSSiZT04; sessionid=t2x0ke3u5i1azg2kvepe7w0ej89btc7t; cf_chl_2=eff13178a8d7814; cf_clearance=Gti8UaeKBbXj2A7V0XFSGydeWbLj2VnqP83FnGx_wIU-1675675191-0-160" >> $GITHUB_ENV
|
||||
echo "NHENTAI_COOKIE=csrftoken=zV4qotZJrHKTeEc9uEg5uvXV5qwTcZIHRjF3TgbkvP6OoxZNj8I6RFQeGSSiZT04; sessionid=t2x0ke3u5i1azg2kvepe7w0ej89btc7t; cf_clearance=Gti8UaeKBbXj2A7V0XFSGydeWbLj2VnqP83FnGx_wIU-1675675191-0-160" >> $GITHUB_ENV
|
||||
echo "NHENTAI_UA=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" >> $GITHUB_ENV
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
|
@ -241,37 +241,32 @@ def print_doujinshi(doujinshi_list):
|
||||
|
||||
|
||||
def legacy_search_parser(keyword, sorting, page, is_page_all=False):
|
||||
logger.debug(f'Searching doujinshis of keyword {keyword}')
|
||||
|
||||
response = None
|
||||
logger.info(f'Searching doujinshis of keyword {keyword}')
|
||||
result = []
|
||||
|
||||
if is_page_all and len(page) != 1:
|
||||
# `--page-all` option will override the `--page` option
|
||||
page = [1]
|
||||
if is_page_all:
|
||||
response = request('get', url=constant.LEGACY_SEARCH_URL,
|
||||
params={'q': keyword, 'page': 1, 'sort': sorting}).content
|
||||
html = BeautifulSoup(response, 'lxml')
|
||||
pagination = html.find(attrs={'class': 'pagination'})
|
||||
last_page = pagination.find(attrs={'class': 'last'})
|
||||
last_page = re.findall('page=([0-9]+)', last_page.attrs['href'])[0]
|
||||
logger.info(f'Getting doujinshi ids of {last_page} pages')
|
||||
pages = range(1, int(last_page))
|
||||
else:
|
||||
pages = page
|
||||
|
||||
for p in page:
|
||||
logger.debug(f'Fetching page {p} ...')
|
||||
for p in pages:
|
||||
logger.info(f'Fetching page {p} ...')
|
||||
response = request('get', url=constant.LEGACY_SEARCH_URL,
|
||||
params={'q': keyword, 'page': p, 'sort': sorting}).content
|
||||
if response is None:
|
||||
logger.warning(f'No result in response in page {p}')
|
||||
continue
|
||||
result.extend(_get_title_and_id(response))
|
||||
|
||||
if not result:
|
||||
logger.warning(f'Not found anything of keyword {keyword} on page {page[0]}')
|
||||
return result
|
||||
|
||||
if is_page_all:
|
||||
html = BeautifulSoup(response, 'lxml')
|
||||
pagination = html.find(attrs={'class': 'pagination'})
|
||||
next_page = pagination.find(attrs={'class': 'next'})
|
||||
|
||||
if next_page is None:
|
||||
logger.warning('Reached the last page')
|
||||
return result
|
||||
else:
|
||||
next_page = re.findall('page=([0-9]+)', next_page.attrs['href'])[0]
|
||||
result.extend(legacy_search_parser(keyword, sorting, [next_page], is_page_all))
|
||||
return result
|
||||
logger.warning(f'No results for keywords {keyword}')
|
||||
|
||||
return result
|
||||
|
||||
|
@ -15,6 +15,7 @@ class TestLogin(unittest.TestCase):
|
||||
constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA')
|
||||
|
||||
def test_cookie(self):
|
||||
raise Exception(constant.CONFIG['cookie'])
|
||||
try:
|
||||
check_cookie()
|
||||
self.assertTrue(True)
|
||||
|
Loading…
x
Reference in New Issue
Block a user