From debf287fb0042196c816181755d72d4f2819a4e9 Mon Sep 17 00:00:00 2001 From: Ricter Z Date: Mon, 5 Mar 2018 21:45:56 +0800 Subject: [PATCH] download user fav --- README.md | 35 ++++++++++++++---------- nhentai/cmdline.py | 35 +++++++++++++++++------- nhentai/command.py | 15 ++++++----- nhentai/constant.py | 2 ++ nhentai/parser.py | 65 ++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 121 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 788115a..1c3868f 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,8 @@ nhentai 🎉🎉 nhentai 现在支持 Windows 啦! 由于 [http://nhentai.net](http://nhentai.net) 下载下来的种子速度很慢,而且官方也提供在线观看本子的功能,所以可以利用本脚本下载本子。 -### 安装 + +### Installation git clone https://github.com/RicterZ/nhentai cd nhentai @@ -22,33 +23,39 @@ nhentai layman -fa glicOne sudo emerge net-misc/nhentai - -### 用法 -+ 下载指定 id 列表的本子: +### Usage +下载指定 id 列表的本子: +```bash +nhentai --id=123855,123866 +``` +下载某关键词第一页的本子: +```bash +nhentai --search="tomori" --page=1 --download +``` - nhentai --id=123855,123866 - - -+ 下载某关键词第一页的本子(不推荐): - - - nhentai --search="tomori" --page=1 --download +下载用户 favorites 内容: +```bash +nhentai --login "username:password" --download +``` +### Options `-t, --thread`:指定下载的线程数,最多为 10 线程。 `--path`:指定下载文件的输出路径,默认为当前目录。 `--timeout`:指定下载图片的超时时间,默认为 30 秒。 `--proxy`:指定下载的代理,例如: http://127.0.0.1:8080/ +`--login`:nhentai 账号的“用户名:密码”组合 -### 自建 nhentai 镜像 +### nHentai Mirror 如果想用自建镜像下载 nhentai 的本子,需要搭建 nhentai.net 和 i.nhentai.net 的反向代理。 例如用 h.loli.club 来做反向代理的话,需要 h.loli.club 反代 nhentai.net,i.h.loli.club 反带 i.nhentai.net。 然后利用环境变量来下载: - NHENTAI=http://h.loli.club nhentai --id 123456 - +```bash +NHENTAI=http://h.loli.club nhentai --id 123456 +``` ![](./images/search.png) ![](./images/download.png) diff --git a/nhentai/cmdline.py b/nhentai/cmdline.py index 6d049ef..7995e4f 100644 --- a/nhentai/cmdline.py +++ b/nhentai/cmdline.py @@ -34,7 +34,8 @@ def cmd_parser(): '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') - parser.add_option('--download', dest='is_download', action='store_true', help='download doujinshi (for search result)') + parser.add_option('--download', dest='is_download', action='store_true', + help='download doujinshi (for search result)') parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information') parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword') @@ -49,7 +50,11 @@ def cmd_parser(): help='timeout of download doujinshi') parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', help='use proxy, example: http://127.0.0.1:1080') - parser.add_option('--html', dest='html_viewer', action='store_true', help='generate a html viewer at current directory') + parser.add_option('--html', dest='html_viewer', action='store_true', + help='generate a html viewer at current directory') + + parser.add_option('--login', '-l', type='str', dest='login', action='store', + help='username:password pair of nhentai account') try: sys.argv = list(map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv)) @@ -64,35 +69,45 @@ def cmd_parser(): generate_html() exit(0) + if args.login: + try: + _, _ = args.login.split(':', 1) + except ValueError: + logger.error('Invalid `username:password` pair.') + exit(1) + + if not args.is_download: + logger.warning('YOU DO NOT SPECIFY `--download` OPTION !!!') + if args.tags: logger.warning('`--tags` is under construction') - exit(0) + exit(1) if args.id: _ = map(lambda id: id.strip(), args.id.split(',')) - args.id = set(map(int, filter(lambda id: id.isdigit(), _))) + args.id = set(map(int, filter(lambda id_: id_.isdigit(), _))) - if (args.is_download or args.is_show) and not args.id and not args.keyword: + if (args.is_download or args.is_show) and not args.id and not args.keyword and not args.login: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() - exit(0) + exit(1) - if not args.keyword and not args.id: + if not args.keyword and not args.id and not args.login: parser.print_help() - exit(0) + exit(1) if args.threads <= 0: args.threads = 1 elif args.threads > 15: logger.critical('Maximum number of used threads is 15') - exit(0) + exit(1) if args.proxy: proxy_url = urlparse(args.proxy) if proxy_url.scheme not in ('http', 'https'): logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(proxy_url.scheme)) else: - constant.PROXY = {proxy_url.scheme: args.proxy} + constant.PROXY = {'http': args.proxy, 'https': args.proxy} return args diff --git a/nhentai/command.py b/nhentai/command.py index d96b99d..840b076 100644 --- a/nhentai/command.py +++ b/nhentai/command.py @@ -1,12 +1,11 @@ #!/usr/bin/env python2.7 # coding: utf-8 from __future__ import unicode_literals, print_function -import os import signal import platform from nhentai.cmdline import cmd_parser, banner -from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi +from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, login_parser from nhentai.doujinshi import Doujinshi from nhentai.downloader import Downloader from nhentai.logger import logger @@ -22,6 +21,12 @@ def main(): doujinshi_ids = [] doujinshi_list = [] + if options.login: + username, password = options.login.split(':', 1) + logger.info('Login to nhentai use credential \'%s:%s\'' % (username, '*' * len(password))) + for doujinshi_info in login_parser(username=username, password=password): + doujinshi_list.append(Doujinshi(**doujinshi_info)) + if options.keyword: doujinshis = search_parser(options.keyword, options.page) print_doujinshi(doujinshis) @@ -31,11 +36,9 @@ def main(): doujinshi_ids = options.id if doujinshi_ids: - for id in doujinshi_ids: - doujinshi_info = doujinshi_parser(id) + for id_ in doujinshi_ids: + doujinshi_info = doujinshi_parser(id_) doujinshi_list.append(Doujinshi(**doujinshi_info)) - else: - exit(0) if not options.is_show: downloader = Downloader(path=options.output_dir, diff --git a/nhentai/constant.py b/nhentai/constant.py index d9d4878..c5e014c 100644 --- a/nhentai/constant.py +++ b/nhentai/constant.py @@ -7,6 +7,8 @@ BASE_URL = os.getenv('NHENTAI', 'https://nhentai.net') DETAIL_URL = '%s/g' % BASE_URL SEARCH_URL = '%s/search/' % BASE_URL +LOGIN_URL = '%s/login/' % BASE_URL +FAV_URL = '%s/favorites/' % BASE_URL u = urlparse(BASE_URL) IMAGE_URL = '%s://i.%s/galleries' % (u.scheme, u.hostname) diff --git a/nhentai/parser.py b/nhentai/parser.py index deb40e2..e01c492 100644 --- a/nhentai/parser.py +++ b/nhentai/parser.py @@ -1,9 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals, print_function -from bs4 import BeautifulSoup +import os import re +import threadpool import requests +from bs4 import BeautifulSoup from tabulate import tabulate import nhentai.constant as constant @@ -17,6 +19,66 @@ def request(method, url, **kwargs): return requests.__dict__[method](url, proxies=constant.PROXY, verify=False, **kwargs) +def login_parser(username, password): + s = requests.Session() + s.proxies = constant.PROXY + s.verify = False + s.headers.update({'Referer': constant.LOGIN_URL}) + + s.get(constant.LOGIN_URL) + content = s.get(constant.LOGIN_URL).content + html = BeautifulSoup(content, 'html.parser') + csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'}) + + if not csrf_token_elem: + raise Exception('Cannot find csrf token to login') + csrf_token = csrf_token_elem.attrs['value'] + + login_dict = { + 'csrfmiddlewaretoken': csrf_token, + 'username_or_email': username, + 'password': password, + } + resp = s.post(constant.LOGIN_URL, data=login_dict) + if 'Invalid username (or email) or password' in resp.text: + logger.error('Login failed, please check your username and password') + exit(1) + + html = BeautifulSoup(s.get(constant.FAV_URL).content, 'html.parser') + count = html.find('span', attrs={'class': 'count'}) + if not count: + logger.error('Cannot get count of your favorites, maybe login failed.') + + count = int(count.text.strip('(').strip(')')) + pages = count / 25 + pages += 1 if count % (25 * pages) else 0 + logger.info('Your have %d favorites in %d pages.' % (count, pages)) + + if os.getenv('DEBUG'): + pages = 1 + + ret = [] + doujinshi_id = re.compile('data-id="([\d]+)"') + + def _callback(request, result): + ret.append(result) + + thread_pool = threadpool.ThreadPool(5) + + for page in range(1, pages+1): + try: + logger.info('Getting doujinshi id of page %d' % page) + resp = s.get(constant.FAV_URL + '?page=%d' % page).content + ids = doujinshi_id.findall(resp) + requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback) + [thread_pool.putRequest(req) for req in requests_] + thread_pool.wait() + except Exception as e: + logger.error('Error: %s, continue', str(e)) + + return ret + + def doujinshi_parser(id_): if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()): raise Exception('Doujinshi id({0}) is not valid'.format(id_)) @@ -104,5 +166,6 @@ def print_doujinshi(doujinshi_list): logger.info('Search Result\n' + tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst')) + if __name__ == '__main__': print(doujinshi_parser("32271"))