mirror of
				https://github.com/RicterZ/nhentai.git
				synced 2025-11-04 02:50:55 +01:00 
			
		
		
		
	Merge pull request #268 from RicterZ/dev
enhancement of legacy search parser
This commit is contained in:
		
							
								
								
									
										4
									
								
								.github/workflows/test.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/test.yml
									
									
									
									
										vendored
									
									
								
							@@ -5,7 +5,7 @@ name: Python application
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
on:
 | 
					on:
 | 
				
			||||||
  push:
 | 
					  push:
 | 
				
			||||||
    branches: [ "master" ]
 | 
					    branches: [ "master", "dev" ]
 | 
				
			||||||
  pull_request:
 | 
					  pull_request:
 | 
				
			||||||
    branches: [ "master" ]
 | 
					    branches: [ "master" ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -20,7 +20,7 @@ jobs:
 | 
				
			|||||||
    steps:
 | 
					    steps:
 | 
				
			||||||
    - name: Set env
 | 
					    - name: Set env
 | 
				
			||||||
      run: |
 | 
					      run: |
 | 
				
			||||||
        echo "NHENTAI_COOKIE=csrftoken=zV4qotZJrHKTeEc9uEg5uvXV5qwTcZIHRjF3TgbkvP6OoxZNj8I6RFQeGSSiZT04; sessionid=t2x0ke3u5i1azg2kvepe7w0ej89btc7t; cf_chl_2=eff13178a8d7814; cf_clearance=Gti8UaeKBbXj2A7V0XFSGydeWbLj2VnqP83FnGx_wIU-1675675191-0-160" >> $GITHUB_ENV
 | 
					        echo "NHENTAI_COOKIE=csrftoken=zV4qotZJrHKTeEc9uEg5uvXV5qwTcZIHRjF3TgbkvP6OoxZNj8I6RFQeGSSiZT04; sessionid=t2x0ke3u5i1azg2kvepe7w0ej89btc7t; cf_clearance=Gti8UaeKBbXj2A7V0XFSGydeWbLj2VnqP83FnGx_wIU-1675675191-0-160" >> $GITHUB_ENV
 | 
				
			||||||
        echo "NHENTAI_UA=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" >> $GITHUB_ENV
 | 
					        echo "NHENTAI_UA=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" >> $GITHUB_ENV
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    - uses: actions/checkout@v3
 | 
					    - uses: actions/checkout@v3
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -241,37 +241,32 @@ def print_doujinshi(doujinshi_list):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def legacy_search_parser(keyword, sorting, page, is_page_all=False):
 | 
					def legacy_search_parser(keyword, sorting, page, is_page_all=False):
 | 
				
			||||||
    logger.debug(f'Searching doujinshis of keyword {keyword}')
 | 
					    logger.info(f'Searching doujinshis of keyword {keyword}')
 | 
				
			||||||
 | 
					 | 
				
			||||||
    response = None
 | 
					 | 
				
			||||||
    result = []
 | 
					    result = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if is_page_all and len(page) != 1:
 | 
					    if is_page_all:
 | 
				
			||||||
        # `--page-all` option will override the `--page` option
 | 
					        response = request('get', url=constant.LEGACY_SEARCH_URL,
 | 
				
			||||||
        page = [1]
 | 
					                           params={'q': keyword, 'page': 1, 'sort': sorting}).content
 | 
				
			||||||
 | 
					        html = BeautifulSoup(response, 'lxml')
 | 
				
			||||||
 | 
					        pagination = html.find(attrs={'class': 'pagination'})
 | 
				
			||||||
 | 
					        last_page = pagination.find(attrs={'class': 'last'})
 | 
				
			||||||
 | 
					        last_page = re.findall('page=([0-9]+)', last_page.attrs['href'])[0]
 | 
				
			||||||
 | 
					        logger.info(f'Getting doujinshi ids of {last_page} pages')
 | 
				
			||||||
 | 
					        pages = range(1, int(last_page))
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        pages = page
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for p in page:
 | 
					    for p in pages:
 | 
				
			||||||
        logger.debug(f'Fetching page {p} ...')
 | 
					        logger.info(f'Fetching page {p} ...')
 | 
				
			||||||
        response = request('get', url=constant.LEGACY_SEARCH_URL,
 | 
					        response = request('get', url=constant.LEGACY_SEARCH_URL,
 | 
				
			||||||
                           params={'q': keyword, 'page': p, 'sort': sorting}).content
 | 
					                           params={'q': keyword, 'page': p, 'sort': sorting}).content
 | 
				
			||||||
 | 
					        if response is None:
 | 
				
			||||||
 | 
					            logger.warning(f'No result in response in page {p}')
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
        result.extend(_get_title_and_id(response))
 | 
					        result.extend(_get_title_and_id(response))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not result:
 | 
					    if not result:
 | 
				
			||||||
        logger.warning(f'Not found anything of keyword {keyword} on page {page[0]}')
 | 
					        logger.warning(f'No results for keywords {keyword}')
 | 
				
			||||||
        return result
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if is_page_all:
 | 
					 | 
				
			||||||
        html = BeautifulSoup(response, 'lxml')
 | 
					 | 
				
			||||||
        pagination = html.find(attrs={'class': 'pagination'})
 | 
					 | 
				
			||||||
        next_page = pagination.find(attrs={'class': 'next'})
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if next_page is None:
 | 
					 | 
				
			||||||
            logger.warning('Reached the last page')
 | 
					 | 
				
			||||||
            return result
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            next_page = re.findall('page=([0-9]+)', next_page.attrs['href'])[0]
 | 
					 | 
				
			||||||
            result.extend(legacy_search_parser(keyword, sorting, [next_page], is_page_all))
 | 
					 | 
				
			||||||
            return result
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return result
 | 
					    return result
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -15,6 +15,7 @@ class TestLogin(unittest.TestCase):
 | 
				
			|||||||
        constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA')
 | 
					        constant.CONFIG['useragent'] = os.getenv('NHENTAI_UA')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_cookie(self):
 | 
					    def test_cookie(self):
 | 
				
			||||||
 | 
					        raise Exception(constant.CONFIG['cookie'])
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            check_cookie()
 | 
					            check_cookie()
 | 
				
			||||||
            self.assertTrue(True)
 | 
					            self.assertTrue(True)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user