This commit is contained in:
RicterZ 2019-06-12 22:37:25 +08:00
parent 0cfec34e9e
commit 58b2b644c1
4 changed files with 119 additions and 134 deletions

View File

@ -149,18 +149,6 @@ def cmd_parser():
logger.info('Proxy \'{0}\' saved.'.format(args.proxy)) logger.info('Proxy \'{0}\' saved.'.format(args.proxy))
exit(0) exit(0)
'''
if args.login:
try:
_, _ = args.login.split(':', 1)
except ValueError:
logger.error('Invalid `username:password` pair.')
exit(1)
if not args.is_download:
logger.warning('YOU DO NOT SPECIFY `--download` OPTION !!!')
'''
if args.favorites: if args.favorites:
if not constant.COOKIE: if not constant.COOKIE:
logger.warning('Cookie has not been set, please use `nhentai --cookie \'COOKIE\'` to set it.') logger.warning('Cookie has not been set, please use `nhentai --cookie \'COOKIE\'` to set it.')

View File

@ -31,7 +31,10 @@ def main():
if not options.is_download: if not options.is_download:
logger.warning('You do not specify --download option') logger.warning('You do not specify --download option')
doujinshi_ids = favorites_parser() doujinshis = favorites_parser()
print_doujinshi(doujinshis)
if options.is_download and doujinshis:
doujinshi_ids = map(lambda d: d['id'], doujinshis)
elif options.tag: elif options.tag:
doujinshis = tag_parser(options.tag, max_page=options.max_page) doujinshis = tag_parser(options.tag, max_page=options.max_page)

View File

@ -10,25 +10,10 @@ from bs4 import BeautifulSoup
from tabulate import tabulate from tabulate import tabulate
import nhentai.constant as constant import nhentai.constant as constant
from nhentai.utils import request
from nhentai.logger import logger from nhentai.logger import logger
session = requests.Session()
session.headers.update({
'Referer': constant.LOGIN_URL,
'User-Agent': 'nhentai command line client (https://github.com/RicterZ/nhentai)',
})
def request(method, url, **kwargs):
global session
if not hasattr(session, method):
raise AttributeError('\'requests.Session\' object has no attribute \'{0}\''.format(method))
session.headers.update({'Cookie': constant.COOKIE})
return getattr(session, method)(url, proxies=constant.PROXY, verify=False, **kwargs)
def _get_csrf_token(content): def _get_csrf_token(content):
html = BeautifulSoup(content, 'html.parser') html = BeautifulSoup(content, 'html.parser')
csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'}) csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'})
@ -66,7 +51,22 @@ def login(username, password):
exit(2) exit(2)
def _get_title_and_id(response):
result = []
html = BeautifulSoup(response, 'html.parser')
doujinshi_search_result = html.find_all('div', attrs={'class': 'gallery'})
for doujinshi in doujinshi_search_result:
doujinshi_container = doujinshi.find('div', attrs={'class': 'caption'})
title = doujinshi_container.text.strip()
title = title if len(title) < 85 else title[:82] + '...'
id_ = re.search('/g/(\d+)/', doujinshi.a['href']).group(1)
result.append({'id': id_, 'title': title})
return result
def favorites_parser(): def favorites_parser():
result = []
html = BeautifulSoup(request('get', constant.FAV_URL).content, 'html.parser') html = BeautifulSoup(request('get', constant.FAV_URL).content, 'html.parser')
count = html.find('span', attrs={'class': 'count'}) count = html.find('span', attrs={'class': 'count'})
if not count: if not count:
@ -89,20 +89,16 @@ def favorites_parser():
if os.getenv('DEBUG'): if os.getenv('DEBUG'):
pages = 1 pages = 1
ret = []
doujinshi_id = re.compile('data-id="([\d]+)"')
for page in range(1, pages + 1): for page in range(1, pages + 1):
try: try:
logger.info('Getting doujinshi ids of page %d' % page) logger.info('Getting doujinshi ids of page %d' % page)
resp = request('get', constant.FAV_URL + '?page=%d' % page).text resp = request('get', constant.FAV_URL + '?page=%d' % page).content
ids = doujinshi_id.findall(resp)
ret.extend(ids)
result.extend(_get_title_and_id(resp))
except Exception as e: except Exception as e:
logger.error('Error: %s, continue', str(e)) logger.error('Error: %s, continue', str(e))
return ret return result
def doujinshi_parser(id_): def doujinshi_parser(id_):
@ -175,7 +171,6 @@ def doujinshi_parser(id_):
def search_parser(keyword, page): def search_parser(keyword, page):
logger.debug('Searching doujinshis of keyword {0}'.format(keyword)) logger.debug('Searching doujinshis of keyword {0}'.format(keyword))
result = []
try: try:
response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page}).content response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page}).content
except requests.ConnectionError as e: except requests.ConnectionError as e:
@ -183,20 +178,95 @@ def search_parser(keyword, page):
logger.warn('If you are in China, please configure the proxy to fu*k GFW.') logger.warn('If you are in China, please configure the proxy to fu*k GFW.')
raise SystemExit raise SystemExit
html = BeautifulSoup(response, 'html.parser') result = _get_title_and_id(response)
doujinshi_search_result = html.find_all('div', attrs={'class': 'gallery'})
for doujinshi in doujinshi_search_result:
doujinshi_container = doujinshi.find('div', attrs={'class': 'caption'})
title = doujinshi_container.text.strip()
title = title if len(title) < 85 else title[:82] + '...'
id_ = re.search('/g/(\d+)/', doujinshi.a['href']).group(1)
result.append({'id': id_, 'title': title})
if not result: if not result:
logger.warn('Not found anything of keyword {}'.format(keyword)) logger.warn('Not found anything of keyword {}'.format(keyword))
return result return result
def print_doujinshi(doujinshi_list):
if not doujinshi_list:
return
doujinshi_list = [(i['id'], i['title']) for i in doujinshi_list]
headers = ['id', 'doujinshi']
logger.info('Search Result\n' +
tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
def tag_parser(tag_name, max_page=1):
result = []
tag_name = tag_name.lower()
tag_name = tag_name.replace(' ', '-')
for p in range(1, max_page + 1):
logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, tag_name))
response = request('get', url='%s/%s?page=%d' % (constant.TAG_URL, tag_name, p)).content
result = _get_title_and_id(response)
if not result:
logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name))
return
if not result:
logger.warn('No results for tag \'{}\''.format(tag_name))
return result
def __api_suspended_search_parser(keyword, page):
logger.debug('Searching doujinshis using keywords {0}'.format(keyword))
result = []
i = 0
while i < 5:
try:
response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json()
except Exception as e:
i += 1
if not i < 5:
logger.critical(str(e))
logger.warn('If you are in China, please configure the proxy to fu*k GFW.')
exit(1)
continue
break
if 'result' not in response:
raise Exception('No result in response')
for row in response['result']:
title = row['title']['english']
title = title[:85] + '..' if len(title) > 85 else title
result.append({'id': row['id'], 'title': title})
if not result:
logger.warn('No results for keywords {}'.format(keyword))
return result
def __api_suspended_tag_parser(tag_id, max_page=1):
logger.info('Searching for doujinshi with tag id {0}'.format(tag_id))
result = []
response = request('get', url=constant.TAG_API_URL, params={'sort': 'popular', 'tag_id': tag_id}).json()
page = max_page if max_page <= response['num_pages'] else int(response['num_pages'])
for i in range(1, page + 1):
logger.info('Getting page {} ...'.format(i))
if page != 1:
response = request('get', url=constant.TAG_API_URL,
params={'sort': 'popular', 'tag_id': tag_id}).json()
for row in response['result']:
title = row['title']['english']
title = title[:85] + '..' if len(title) > 85 else title
result.append({'id': row['id'], 'title': title})
if not result:
logger.warn('No results for tag id {}'.format(tag_id))
return result
def __api_suspended_doujinshi_parser(id_): def __api_suspended_doujinshi_parser(id_):
if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()): if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
raise Exception('Doujinshi id({0}) is not valid'.format(id_)) raise Exception('Doujinshi id({0}) is not valid'.format(id_))
@ -244,94 +314,5 @@ def __api_suspended_doujinshi_parser(id_):
return doujinshi return doujinshi
def __api_suspended_search_parser(keyword, page):
logger.debug('Searching doujinshis using keywords {0}'.format(keyword))
result = []
i = 0
while i < 5:
try:
response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json()
except Exception as e:
i += 1
if not i < 5:
logger.critical(str(e))
logger.warn('If you are in China, please configure the proxy to fu*k GFW.')
exit(1)
continue
break
if 'result' not in response:
raise Exception('No result in response')
for row in response['result']:
title = row['title']['english']
title = title[:85] + '..' if len(title) > 85 else title
result.append({'id': row['id'], 'title': title})
if not result:
logger.warn('No results for keywords {}'.format(keyword))
return result
def print_doujinshi(doujinshi_list):
if not doujinshi_list:
return
doujinshi_list = [(i['id'], i['title']) for i in doujinshi_list]
headers = ['id', 'doujinshi']
logger.info('Search Result\n' +
tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
def __api_suspended_tag_parser(tag_id, max_page=1):
logger.info('Searching for doujinshi with tag id {0}'.format(tag_id))
result = []
response = request('get', url=constant.TAG_API_URL, params={'sort': 'popular', 'tag_id': tag_id}).json()
page = max_page if max_page <= response['num_pages'] else int(response['num_pages'])
for i in range(1, page + 1):
logger.info('Getting page {} ...'.format(i))
if page != 1:
response = request('get', url=constant.TAG_API_URL,
params={'sort': 'popular', 'tag_id': tag_id}).json()
for row in response['result']:
title = row['title']['english']
title = title[:85] + '..' if len(title) > 85 else title
result.append({'id': row['id'], 'title': title})
if not result:
logger.warn('No results for tag id {}'.format(tag_id))
return result
def tag_parser(tag_name, max_page=1):
result = []
tag_name = tag_name.lower()
tag_name = tag_name.replace(' ', '-')
for p in range(1, max_page + 1):
logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, tag_name))
response = request('get', url='%s/%s?page=%d' % (constant.TAG_URL, tag_name, p)).content
html = BeautifulSoup(response, 'html.parser')
doujinshi_items = html.find_all('div', attrs={'class': 'gallery'})
if not doujinshi_items:
logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name))
return
for i in doujinshi_items:
doujinshi_id = i.a.attrs['href'].strip('/g')
doujinshi_title = i.a.text.strip()
doujinshi_title = doujinshi_title if len(doujinshi_title) < 85 else doujinshi_title[:82] + '...'
result.append({'title': doujinshi_title, 'id': doujinshi_id})
if not result:
logger.warn('No results for tag \'{}\''.format(tag_name))
return result
if __name__ == '__main__': if __name__ == '__main__':
print(doujinshi_parser("32271")) print(doujinshi_parser("32271"))

View File

@ -6,9 +6,22 @@ import os
import string import string
import zipfile import zipfile
import shutil import shutil
import requests
from nhentai import constant
from nhentai.logger import logger from nhentai.logger import logger
def request(method, url, **kwargs):
session = requests.Session()
session.headers.update({
'Referer': constant.LOGIN_URL,
'User-Agent': 'nhentai command line client (https://github.com/RicterZ/nhentai)',
'Cookie': constant.COOKIE
})
return getattr(session, method)(url, proxies=constant.PROXY, verify=False, **kwargs)
class _Singleton(type): class _Singleton(type):
""" A metaclass that creates a Singleton base class when called. """ """ A metaclass that creates a Singleton base class when called. """
_instances = {} _instances = {}