Merge pull request #268 from RicterZ/dev

enhancement of legacy search parser
2025-07-14 21:39:02 +02:00 · 2023-02-07 19:46:09 +08:00
parent 179852a343 fefdd3858a
commit b56e5b63a9
3 changed files with 21 additions and 25 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -5,7 +5,7 @@ name: Python application

 on:
  push:
-    branches: [ "master" ]
+    branches: [ "master", "dev" ]
  pull_request:
    branches: [ "master" ]

@ -20,7 +20,7 @@ jobs:
    steps:
    - name: Set env
      run: |
-        echo "NHENTAI_COOKIE=csrftoken=zV4qotZJrHKTeEc9uEg5uvXV5qwTcZIHRjF3TgbkvP6OoxZNj8I6RFQeGSSiZT04; sessionid=t2x0ke3u5i1azg2kvepe7w0ej89btc7t; cf_chl_2=eff13178a8d7814; cf_clearance=Gti8UaeKBbXj2A7V0XFSGydeWbLj2VnqP83FnGx_wIU-1675675191-0-160" >> $GITHUB_ENV
+        echo "NHENTAI_COOKIE=csrftoken=zV4qotZJrHKTeEc9uEg5uvXV5qwTcZIHRjF3TgbkvP6OoxZNj8I6RFQeGSSiZT04; sessionid=t2x0ke3u5i1azg2kvepe7w0ej89btc7t; cf_clearance=Gti8UaeKBbXj2A7V0XFSGydeWbLj2VnqP83FnGx_wIU-1675675191-0-160" >> $GITHUB_ENV
        echo "NHENTAI_UA=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" >> $GITHUB_ENV

    - uses: actions/checkout@v3
--- a/nhentai/parser.py
+++ b/nhentai/parser.py
@ -241,37 +241,32 @@ def print_doujinshi(doujinshi_list):


 def legacy_search_parser(keyword, sorting, page, is_page_all=False):
-    logger.debug(f'Searching doujinshis of keyword {keyword}')
-
-    response = None
+    logger.info(f'Searching doujinshis of keyword {keyword}')
    result = []

-    if is_page_all and len(page) != 1:
-        # `--page-all` option will override the `--page` option
-        page = [1]
+    if is_page_all:
+        response = request('get', url=constant.LEGACY_SEARCH_URL,
+                           params={'q': keyword, 'page': 1, 'sort': sorting}).content
+        html = BeautifulSoup(response, 'lxml')
+        pagination = html.find(attrs={'class': 'pagination'})
+        last_page = pagination.find(attrs={'class': 'last'})
+        last_page = re.findall('page=([0-9]+)', last_page.attrs['href'])[0]
+        logger.info(f'Getting doujinshi ids of {last_page} pages')
+        pages = range(1, int(last_page))
+    else:
+        pages = page

-    for p in page:
-        logger.debug(f'Fetching page {p} ...')
+    for p in pages:
+        logger.info(f'Fetching page {p} ...')
        response = request('get', url=constant.LEGACY_SEARCH_URL,
                           params={'q': keyword, 'page': p, 'sort': sorting}).content
+        if response is None:
+            logger.warning(f'No result in response in page {p}')
+            continue
        result.extend(_get_title_and_id(response))

    if not result:
-        logger.warning(f'Not found anything of keyword {keyword} on page {page[0]}')
-        return result
-
-    if is_page_all:
-        html = BeautifulSoup(response, 'lxml')
-        pagination = html.find(attrs={'class': 'pagination'})
-        next_page = pagination.find(attrs={'class': 'next'})
-
-        if next_page is None:
-            logger.warning('Reached the last page')
-            return result
-        else:
-            next_page = re.findall('page=([0-9]+)', next_page.attrs['href'])[0]
-            result.extend(legacy_search_parser(keyword, sorting, [next_page], is_page_all))
-            return result
+        logger.warning(f'No results for keywords {keyword}')

    return result

--- a/tests/test_login.py
+++ b/tests/test_login.py
@ -15,6 +15,7 @@ class TestLogin(unittest.TestCase):
        constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA')

    def test_cookie(self):
+        raise Exception(constant.CONFIG['cookie'])
        try:
            check_cookie()
            self.assertTrue(True)