download user fav

This commit is contained in:
Ricter Z 2018-03-05 21:45:56 +08:00
parent 308c5277b8
commit debf287fb0
5 changed files with 121 additions and 31 deletions

View File

@ -12,7 +12,8 @@ nhentai
🎉🎉 nhentai 现在支持 Windows 啦! 🎉🎉 nhentai 现在支持 Windows 啦!
由于 [http://nhentai.net](http://nhentai.net) 下载下来的种子速度很慢,而且官方也提供在线观看本子的功能,所以可以利用本脚本下载本子。 由于 [http://nhentai.net](http://nhentai.net) 下载下来的种子速度很慢,而且官方也提供在线观看本子的功能,所以可以利用本脚本下载本子。
### 安装
### Installation
git clone https://github.com/RicterZ/nhentai git clone https://github.com/RicterZ/nhentai
cd nhentai cd nhentai
@ -23,32 +24,38 @@ nhentai
layman -fa glicOne layman -fa glicOne
sudo emerge net-misc/nhentai sudo emerge net-misc/nhentai
### Usage
下载指定 id 列表的本子:
```bash
nhentai --id=123855,123866
```
### 用法 下载某关键词第一页的本子:
+ 下载指定 id 列表的本子: ```bash
nhentai --search="tomori" --page=1 --download
```
下载用户 favorites 内容:
```bash
nhentai --login "username:password" --download
```
nhentai --id=123855,123866 ### Options
+ 下载某关键词第一页的本子(不推荐):
nhentai --search="tomori" --page=1 --download
`-t, --thread`:指定下载的线程数,最多为 10 线程。 `-t, --thread`:指定下载的线程数,最多为 10 线程。
`--path`:指定下载文件的输出路径,默认为当前目录。 `--path`:指定下载文件的输出路径,默认为当前目录。
`--timeout`:指定下载图片的超时时间,默认为 30 秒。 `--timeout`:指定下载图片的超时时间,默认为 30 秒。
`--proxy`:指定下载的代理,例如: http://127.0.0.1:8080/ `--proxy`:指定下载的代理,例如: http://127.0.0.1:8080/
`--login`nhentai 账号的“用户名:密码”组合
### 自建 nhentai 镜像 ### nHentai Mirror
如果想用自建镜像下载 nhentai 的本子,需要搭建 nhentai.net 和 i.nhentai.net 的反向代理。 如果想用自建镜像下载 nhentai 的本子,需要搭建 nhentai.net 和 i.nhentai.net 的反向代理。
例如用 h.loli.club 来做反向代理的话,需要 h.loli.club 反代 nhentai.neti.h.loli.club 反带 i.nhentai.net。 例如用 h.loli.club 来做反向代理的话,需要 h.loli.club 反代 nhentai.neti.h.loli.club 反带 i.nhentai.net。
然后利用环境变量来下载: 然后利用环境变量来下载:
NHENTAI=http://h.loli.club nhentai --id 123456 ```bash
NHENTAI=http://h.loli.club nhentai --id 123456
```
![](./images/search.png) ![](./images/search.png)
![](./images/download.png) ![](./images/download.png)

View File

@ -34,7 +34,8 @@ def cmd_parser():
'\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]'
'\n\nEnvironment Variable:\n' '\n\nEnvironment Variable:\n'
' NHENTAI nhentai mirror url') ' NHENTAI nhentai mirror url')
parser.add_option('--download', dest='is_download', action='store_true', help='download doujinshi (for search result)') parser.add_option('--download', dest='is_download', action='store_true',
help='download doujinshi (for search result)')
parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information') parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information')
parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3')
parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword') parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword')
@ -49,7 +50,11 @@ def cmd_parser():
help='timeout of download doujinshi') help='timeout of download doujinshi')
parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', parser.add_option('--proxy', type='string', dest='proxy', action='store', default='',
help='use proxy, example: http://127.0.0.1:1080') help='use proxy, example: http://127.0.0.1:1080')
parser.add_option('--html', dest='html_viewer', action='store_true', help='generate a html viewer at current directory') parser.add_option('--html', dest='html_viewer', action='store_true',
help='generate a html viewer at current directory')
parser.add_option('--login', '-l', type='str', dest='login', action='store',
help='username:password pair of nhentai account')
try: try:
sys.argv = list(map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv)) sys.argv = list(map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv))
@ -64,35 +69,45 @@ def cmd_parser():
generate_html() generate_html()
exit(0) exit(0)
if args.login:
try:
_, _ = args.login.split(':', 1)
except ValueError:
logger.error('Invalid `username:password` pair.')
exit(1)
if not args.is_download:
logger.warning('YOU DO NOT SPECIFY `--download` OPTION !!!')
if args.tags: if args.tags:
logger.warning('`--tags` is under construction') logger.warning('`--tags` is under construction')
exit(0) exit(1)
if args.id: if args.id:
_ = map(lambda id: id.strip(), args.id.split(',')) _ = map(lambda id: id.strip(), args.id.split(','))
args.id = set(map(int, filter(lambda id: id.isdigit(), _))) args.id = set(map(int, filter(lambda id_: id_.isdigit(), _)))
if (args.is_download or args.is_show) and not args.id and not args.keyword: if (args.is_download or args.is_show) and not args.id and not args.keyword and not args.login:
logger.critical('Doujinshi id(s) are required for downloading') logger.critical('Doujinshi id(s) are required for downloading')
parser.print_help() parser.print_help()
exit(0) exit(1)
if not args.keyword and not args.id: if not args.keyword and not args.id and not args.login:
parser.print_help() parser.print_help()
exit(0) exit(1)
if args.threads <= 0: if args.threads <= 0:
args.threads = 1 args.threads = 1
elif args.threads > 15: elif args.threads > 15:
logger.critical('Maximum number of used threads is 15') logger.critical('Maximum number of used threads is 15')
exit(0) exit(1)
if args.proxy: if args.proxy:
proxy_url = urlparse(args.proxy) proxy_url = urlparse(args.proxy)
if proxy_url.scheme not in ('http', 'https'): if proxy_url.scheme not in ('http', 'https'):
logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(proxy_url.scheme)) logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(proxy_url.scheme))
else: else:
constant.PROXY = {proxy_url.scheme: args.proxy} constant.PROXY = {'http': args.proxy, 'https': args.proxy}
return args return args

View File

@ -1,12 +1,11 @@
#!/usr/bin/env python2.7 #!/usr/bin/env python2.7
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function
import os
import signal import signal
import platform import platform
from nhentai.cmdline import cmd_parser, banner from nhentai.cmdline import cmd_parser, banner
from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, login_parser
from nhentai.doujinshi import Doujinshi from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader from nhentai.downloader import Downloader
from nhentai.logger import logger from nhentai.logger import logger
@ -22,6 +21,12 @@ def main():
doujinshi_ids = [] doujinshi_ids = []
doujinshi_list = [] doujinshi_list = []
if options.login:
username, password = options.login.split(':', 1)
logger.info('Login to nhentai use credential \'%s:%s\'' % (username, '*' * len(password)))
for doujinshi_info in login_parser(username=username, password=password):
doujinshi_list.append(Doujinshi(**doujinshi_info))
if options.keyword: if options.keyword:
doujinshis = search_parser(options.keyword, options.page) doujinshis = search_parser(options.keyword, options.page)
print_doujinshi(doujinshis) print_doujinshi(doujinshis)
@ -31,11 +36,9 @@ def main():
doujinshi_ids = options.id doujinshi_ids = options.id
if doujinshi_ids: if doujinshi_ids:
for id in doujinshi_ids: for id_ in doujinshi_ids:
doujinshi_info = doujinshi_parser(id) doujinshi_info = doujinshi_parser(id_)
doujinshi_list.append(Doujinshi(**doujinshi_info)) doujinshi_list.append(Doujinshi(**doujinshi_info))
else:
exit(0)
if not options.is_show: if not options.is_show:
downloader = Downloader(path=options.output_dir, downloader = Downloader(path=options.output_dir,

View File

@ -7,6 +7,8 @@ BASE_URL = os.getenv('NHENTAI', 'https://nhentai.net')
DETAIL_URL = '%s/g' % BASE_URL DETAIL_URL = '%s/g' % BASE_URL
SEARCH_URL = '%s/search/' % BASE_URL SEARCH_URL = '%s/search/' % BASE_URL
LOGIN_URL = '%s/login/' % BASE_URL
FAV_URL = '%s/favorites/' % BASE_URL
u = urlparse(BASE_URL) u = urlparse(BASE_URL)
IMAGE_URL = '%s://i.%s/galleries' % (u.scheme, u.hostname) IMAGE_URL = '%s://i.%s/galleries' % (u.scheme, u.hostname)

View File

@ -1,9 +1,11 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function
from bs4 import BeautifulSoup import os
import re import re
import threadpool
import requests import requests
from bs4 import BeautifulSoup
from tabulate import tabulate from tabulate import tabulate
import nhentai.constant as constant import nhentai.constant as constant
@ -17,6 +19,66 @@ def request(method, url, **kwargs):
return requests.__dict__[method](url, proxies=constant.PROXY, verify=False, **kwargs) return requests.__dict__[method](url, proxies=constant.PROXY, verify=False, **kwargs)
def login_parser(username, password):
s = requests.Session()
s.proxies = constant.PROXY
s.verify = False
s.headers.update({'Referer': constant.LOGIN_URL})
s.get(constant.LOGIN_URL)
content = s.get(constant.LOGIN_URL).content
html = BeautifulSoup(content, 'html.parser')
csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'})
if not csrf_token_elem:
raise Exception('Cannot find csrf token to login')
csrf_token = csrf_token_elem.attrs['value']
login_dict = {
'csrfmiddlewaretoken': csrf_token,
'username_or_email': username,
'password': password,
}
resp = s.post(constant.LOGIN_URL, data=login_dict)
if 'Invalid username (or email) or password' in resp.text:
logger.error('Login failed, please check your username and password')
exit(1)
html = BeautifulSoup(s.get(constant.FAV_URL).content, 'html.parser')
count = html.find('span', attrs={'class': 'count'})
if not count:
logger.error('Cannot get count of your favorites, maybe login failed.')
count = int(count.text.strip('(').strip(')'))
pages = count / 25
pages += 1 if count % (25 * pages) else 0
logger.info('Your have %d favorites in %d pages.' % (count, pages))
if os.getenv('DEBUG'):
pages = 1
ret = []
doujinshi_id = re.compile('data-id="([\d]+)"')
def _callback(request, result):
ret.append(result)
thread_pool = threadpool.ThreadPool(5)
for page in range(1, pages+1):
try:
logger.info('Getting doujinshi id of page %d' % page)
resp = s.get(constant.FAV_URL + '?page=%d' % page).content
ids = doujinshi_id.findall(resp)
requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback)
[thread_pool.putRequest(req) for req in requests_]
thread_pool.wait()
except Exception as e:
logger.error('Error: %s, continue', str(e))
return ret
def doujinshi_parser(id_): def doujinshi_parser(id_):
if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()): if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
raise Exception('Doujinshi id({0}) is not valid'.format(id_)) raise Exception('Doujinshi id({0}) is not valid'.format(id_))
@ -104,5 +166,6 @@ def print_doujinshi(doujinshi_list):
logger.info('Search Result\n' + logger.info('Search Result\n' +
tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst')) tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
if __name__ == '__main__': if __name__ == '__main__':
print(doujinshi_parser("32271")) print(doujinshi_parser("32271"))