From f66653c55e37d68f13049e39dcc7d8740ab3ab98 Mon Sep 17 00:00:00 2001 From: Ricter Z Date: Tue, 7 Feb 2023 19:40:52 +0800 Subject: [PATCH 1/2] legacy search by @gayspacegems of issue #265 --- nhentai/parser.py | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/nhentai/parser.py b/nhentai/parser.py index 6bf007a..988ba80 100644 --- a/nhentai/parser.py +++ b/nhentai/parser.py @@ -241,37 +241,32 @@ def print_doujinshi(doujinshi_list): def legacy_search_parser(keyword, sorting, page, is_page_all=False): - logger.debug(f'Searching doujinshis of keyword {keyword}') - - response = None + logger.info(f'Searching doujinshis of keyword {keyword}') result = [] - if is_page_all and len(page) != 1: - # `--page-all` option will override the `--page` option - page = [1] + if is_page_all: + response = request('get', url=constant.LEGACY_SEARCH_URL, + params={'q': keyword, 'page': 1, 'sort': sorting}).content + html = BeautifulSoup(response, 'lxml') + pagination = html.find(attrs={'class': 'pagination'}) + last_page = pagination.find(attrs={'class': 'last'}) + last_page = re.findall('page=([0-9]+)', last_page.attrs['href'])[0] + logger.info(f'Getting doujinshi ids of {last_page} pages') + pages = range(1, int(last_page)) + else: + pages = page - for p in page: - logger.debug(f'Fetching page {p} ...') + for p in pages: + logger.info(f'Fetching page {p} ...') response = request('get', url=constant.LEGACY_SEARCH_URL, params={'q': keyword, 'page': p, 'sort': sorting}).content + if response is None: + logger.warning(f'No result in response in page {p}') + continue result.extend(_get_title_and_id(response)) if not result: - logger.warning(f'Not found anything of keyword {keyword} on page {page[0]}') - return result - - if is_page_all: - html = BeautifulSoup(response, 'lxml') - pagination = html.find(attrs={'class': 'pagination'}) - next_page = pagination.find(attrs={'class': 'next'}) - - if next_page is None: - logger.warning('Reached the last page') - return result - else: - next_page = re.findall('page=([0-9]+)', next_page.attrs['href'])[0] - result.extend(legacy_search_parser(keyword, sorting, [next_page], is_page_all)) - return result + logger.warning(f'No results for keywords {keyword}') return result From fefdd3858a8b27cd41706cd888e5ae9426e37cac Mon Sep 17 00:00:00 2001 From: Ricter Z Date: Tue, 7 Feb 2023 19:42:27 +0800 Subject: [PATCH 2/2] update test --- .github/workflows/test.yml | 4 ++-- tests/test_login.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4c2e9f3..e3c6bf1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,7 +5,7 @@ name: Python application on: push: - branches: [ "master" ] + branches: [ "master", "dev" ] pull_request: branches: [ "master" ] @@ -20,7 +20,7 @@ jobs: steps: - name: Set env run: | - echo "NHENTAI_COOKIE=csrftoken=zV4qotZJrHKTeEc9uEg5uvXV5qwTcZIHRjF3TgbkvP6OoxZNj8I6RFQeGSSiZT04; sessionid=t2x0ke3u5i1azg2kvepe7w0ej89btc7t; cf_chl_2=eff13178a8d7814; cf_clearance=Gti8UaeKBbXj2A7V0XFSGydeWbLj2VnqP83FnGx_wIU-1675675191-0-160" >> $GITHUB_ENV + echo "NHENTAI_COOKIE=csrftoken=zV4qotZJrHKTeEc9uEg5uvXV5qwTcZIHRjF3TgbkvP6OoxZNj8I6RFQeGSSiZT04; sessionid=t2x0ke3u5i1azg2kvepe7w0ej89btc7t; cf_clearance=Gti8UaeKBbXj2A7V0XFSGydeWbLj2VnqP83FnGx_wIU-1675675191-0-160" >> $GITHUB_ENV echo "NHENTAI_UA=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" >> $GITHUB_ENV - uses: actions/checkout@v3 diff --git a/tests/test_login.py b/tests/test_login.py index 2303417..df73be2 100644 --- a/tests/test_login.py +++ b/tests/test_login.py @@ -15,6 +15,7 @@ class TestLogin(unittest.TestCase): constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA') def test_cookie(self): + raise Exception(constant.CONFIG['cookie']) try: check_cookie() self.assertTrue(True)