bypass the challenge

2025-07-11 03:51:40 +02:00 · 2019-04-11 22:39:20 +08:00
parent f74be0c665
commit 9160b38c3f
2 changed files with 28 additions and 8 deletions
--- a/nhentai/constant.py
+++ b/nhentai/constant.py
@ -14,6 +14,7 @@ SEARCH_URL = '%s/search/' % BASE_URL
 TAG_URL = '%s/tag' % BASE_URL
 TAG_API_URL = '%s/api/galleries/tagged' % BASE_URL
 LOGIN_URL = '%s/login/' % BASE_URL
+CHALLENGE_URL = '%s/challenge' % BASE_URL
 FAV_URL = '%s/favorites/' % BASE_URL

 u = urlparse(BASE_URL)
--- a/nhentai/parser.py
+++ b/nhentai/parser.py
@ -5,6 +5,7 @@ import os
 import re
 import threadpool
 import requests
+import time
 from bs4 import BeautifulSoup
 from tabulate import tabulate

@ -13,6 +14,10 @@ from nhentai.logger import logger


 session = requests.Session()
+session.headers.update({
+    'Referer': constant.LOGIN_URL,
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
+})


 def request(method, url, **kwargs):
@ -23,17 +28,21 @@ def request(method, url, **kwargs):
    return getattr(session, method)(url, proxies=constant.PROXY, verify=False, **kwargs)


-def login(username, password):
-    global session
-    request('get', url=constant.LOGIN_URL)
-    session.headers.update({'Referer': constant.LOGIN_URL})
-    content = request('get', url=constant.LOGIN_URL).content
+def _get_csrf_token(content):
    html = BeautifulSoup(content, 'html.parser')
    csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'})
-
    if not csrf_token_elem:
        raise Exception('Cannot find csrf token to login')
-    csrf_token = csrf_token_elem.attrs['value']
+    return csrf_token_elem.attrs['value']
+
+
+def login(username, password):
+    csrf_token = _get_csrf_token(request('get', url=constant.LOGIN_URL).content)
+    if os.getenv('DEBUG'):
+        logger.info('Getting CSRF token ...')
+
+    if os.getenv('DEBUG'):
+        logger.info('CSRF token is {}'.format(csrf_token))

    login_dict = {
        'csrfmiddlewaretoken': csrf_token,
@ -41,16 +50,26 @@ def login(username, password):
        'password': password,
    }
    resp = request('post', url=constant.LOGIN_URL, data=login_dict)
-    if 'Invalid username/email or password' in resp.text:
+
+    if 'You\'re loading pages way too quickly.' in resp.content:
+        csrf_token = _get_csrf_token(resp.content)
+        resp = request('post', url=resp.url, data={'csrfmiddlewaretoken': csrf_token, 'next': '/'})
+
+    if 'Invalid username/email or password' in resp.content:
        logger.error('Login failed, please check your username and password')
        exit(1)

+    if 'You\'re loading pages way too quickly.' in resp.content:
+        logger.error('You meet challenge again, please submit a issue at https://github.com/RicterZ/nhentai/issues')
+        exit(2)
+

 def login_parser():
    html = BeautifulSoup(request('get', constant.FAV_URL).content, 'html.parser')
    count = html.find('span', attrs={'class': 'count'})
    if not count:
        logger.error("Can't get your number of favorited doujins. Did the login failed?")
+        return

    count = int(count.text.strip('(').strip(')').replace(',', ''))
    if count == 0: