Compare commits

...

12 Commits
0.3.0 ... 0.3.1

Author SHA1 Message Date
0660cb0fed update user-agent 2019-04-11 22:48:18 +08:00
680b004c24 update README 2019-04-11 22:47:49 +08:00
6709af2a20 0.3.1 - add login session 2019-04-11 22:44:26 +08:00
a3fead2852 pep-8 2019-04-11 22:43:42 +08:00
0728dd8c6d use text rather than content 2019-04-11 22:41:37 +08:00
9160b38c3f bypass the challenge 2019-04-11 22:39:20 +08:00
f74be0c665 add new tests 2019-04-11 22:10:16 +08:00
c30f562a83 Merge pull request #48 from onlymyflower/master
download ids from file
2019-04-11 22:09:30 +08:00
37547cc97f global login session #49 #46 2019-04-11 22:08:19 +08:00
f6fb90aab5 download ids from file 2019-03-06 16:46:47 +08:00
50be89db44 fix extension issue #44 2019-01-27 10:06:12 +08:00
fc0be35b2c 0.3.0 #40 2019-01-15 21:16:14 +08:00
9 changed files with 81 additions and 25 deletions

View File

@ -12,8 +12,10 @@ install:
- python setup.py install - python setup.py install
script: script:
- echo 268642 > /tmp/test.txt
- NHENTAI=https://nhentai.net nhentai --search umaru - NHENTAI=https://nhentai.net nhentai --search umaru
- NHENTAI=https://nhentai.net nhentai --id=152503,146134 -t 10 --output=/tmp/ - NHENTAI=https://nhentai.net nhentai --id=152503,146134 -t 10 --output=/tmp/
- NHENTAI=https://nhentai.net nhentai -l nhentai_test:nhentai --download --output=/tmp/ - NHENTAI=https://nhentai.net nhentai -l nhentai_test:nhentai --download --output=/tmp/
- NHENTAI=https://nhentai.net nhentai --tag lolicon - NHENTAI=https://nhentai.net nhentai --tag lolicon
- NHENTAI=https://nhentai.net nhentai --id 92066 --output=/tmp/ --cbz - NHENTAI=https://nhentai.net nhentai --id 92066 --output=/tmp/ --cbz
- NHENTAI=https://nhentai.net nhentai --file /tmp/test.txt

View File

@ -18,17 +18,24 @@ nHentai is a CLI tool for downloading doujinshi from [nhentai.net](http://nhenta
cd nhentai cd nhentai
python setup.py install python setup.py install
### Gentoo ### Installation (Gentoo)
layman -fa glicOne layman -fa glicOne
sudo emerge net-misc/nhentai sudo emerge net-misc/nhentai
### Usage ### Usage
**IMPORTANT**: To bypass the nhentai frequency limit, you should use `--login` option to log into nhentai.net.
Download specified doujinshi: Download specified doujinshi:
```bash ```bash
nhentai --id=123855,123866 nhentai --id=123855,123866
``` ```
Download doujinshi with ids specified in a file:
```bash
nhentai --file=doujinshi.txt
```
Search a keyword and download the first page: Search a keyword and download the first page:
```bash ```bash
nhentai --search="tomori" --page=1 --download nhentai --search="tomori" --page=1 --download

5
doujinshi.txt Normal file
View File

@ -0,0 +1,5 @@
184212
204944
222460
244502
261909

View File

@ -1,3 +1,3 @@
__version__ = '0.2.19' __version__ = '0.3.1'
__author__ = 'RicterZ' __author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com' __email__ = 'ricterzheng@gmail.com'

View File

@ -37,6 +37,7 @@ def banner():
def cmd_parser(): def cmd_parser():
parser = OptionParser('\n nhentai --search [keyword] --download' parser = OptionParser('\n nhentai --search [keyword] --download'
'\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]'
'\n nhentai --file [filename]'
'\n\nEnvironment Variable:\n' '\n\nEnvironment Variable:\n'
' NHENTAI nhentai mirror url') ' NHENTAI nhentai mirror url')
parser.add_option('--download', dest='is_download', action='store_true', parser.add_option('--download', dest='is_download', action='store_true',
@ -70,7 +71,8 @@ def cmd_parser():
help='Generate Comic Book CBZ File') help='Generate Comic Book CBZ File')
parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
help='Remove downloaded doujinshi dir when generated CBZ file.') help='Remove downloaded doujinshi dir when generated CBZ file.')
parser.add_option('--file', '-f', type='string', dest='file', action='store', help='Read gallery IDs from file.')
try: try:
sys.argv = list(map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv)) sys.argv = list(map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv))
except (NameError, TypeError): except (NameError, TypeError):
@ -98,6 +100,11 @@ def cmd_parser():
_ = map(lambda id: id.strip(), args.id.split(',')) _ = map(lambda id: id.strip(), args.id.split(','))
args.id = set(map(int, filter(lambda id_: id_.isdigit(), _))) args.id = set(map(int, filter(lambda id_: id_.isdigit(), _)))
if args.file:
with open(args.file, 'r') as f:
_ = map(lambda id: id.strip(), f.readlines())
args.id = set(map(int, filter(lambda id_: id_.isdigit(), _)))
if (args.is_download or args.is_show) and not args.id and not args.keyword and \ if (args.is_download or args.is_show) and not args.id and not args.keyword and \
not args.login and not args.tag: not args.login and not args.tag:
logger.critical('Doujinshi id(s) are required for downloading') logger.critical('Doujinshi id(s) are required for downloading')

View File

@ -5,7 +5,7 @@ import signal
import platform import platform
from nhentai.cmdline import cmd_parser, banner from nhentai.cmdline import cmd_parser, banner
from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, login_parser, tag_parser from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, login_parser, tag_parser, login
from nhentai.doujinshi import Doujinshi from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader from nhentai.downloader import Downloader
from nhentai.logger import logger from nhentai.logger import logger
@ -24,8 +24,11 @@ def main():
if options.login: if options.login:
username, password = options.login.split(':', 1) username, password = options.login.split(':', 1)
logger.info('Logging in to nhentai using credential pair \'%s:%s\'' % (username, '*' * len(password))) logger.info('Logging in to nhentai using credential pair \'%s:%s\'' % (username, '*' * len(password)))
for doujinshi_info in login_parser(username=username, password=password): login(username, password)
doujinshi_list.append(Doujinshi(**doujinshi_info))
if options.is_download:
for doujinshi_info in login_parser():
doujinshi_list.append(Doujinshi(**doujinshi_info))
if options.tag: if options.tag:
doujinshis = tag_parser(options.tag, max_page=options.max_page) doujinshis = tag_parser(options.tag, max_page=options.max_page)

View File

@ -14,6 +14,7 @@ SEARCH_URL = '%s/search/' % BASE_URL
TAG_URL = '%s/tag' % BASE_URL TAG_URL = '%s/tag' % BASE_URL
TAG_API_URL = '%s/api/galleries/tagged' % BASE_URL TAG_API_URL = '%s/api/galleries/tagged' % BASE_URL
LOGIN_URL = '%s/login/' % BASE_URL LOGIN_URL = '%s/login/' % BASE_URL
CHALLENGE_URL = '%s/challenge' % BASE_URL
FAV_URL = '%s/favorites/' % BASE_URL FAV_URL = '%s/favorites/' % BASE_URL
u = urlparse(BASE_URL) u = urlparse(BASE_URL)

View File

@ -59,8 +59,11 @@ class Doujinshi(object):
if self.downloader: if self.downloader:
download_queue = [] download_queue = []
for i in range(1, self.pages + 1): if len(self.ext) != self.pages:
download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext)) logger.warning('Page count and ext count do not equal')
for i in range(1, min(self.pages, len(self.ext)) + 1):
download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext[i-1]))
self.downloader.download(download_queue, self.filename) self.downloader.download(download_queue, self.filename)

View File

@ -13,42 +13,64 @@ import nhentai.constant as constant
from nhentai.logger import logger from nhentai.logger import logger
session = requests.Session()
session.headers.update({
'Referer': constant.LOGIN_URL,
'User-Agent': 'nhentai command line client (https://github.com/RicterZ/nhentai)',
})
def request(method, url, **kwargs): def request(method, url, **kwargs):
if not hasattr(requests, method): global session
raise AttributeError('\'requests\' object has no attribute \'{0}\''.format(method)) if not hasattr(session, method):
raise AttributeError('\'requests.Session\' object has no attribute \'{0}\''.format(method))
return requests.__dict__[method](url, proxies=constant.PROXY, verify=False, **kwargs) return getattr(session, method)(url, proxies=constant.PROXY, verify=False, **kwargs)
def login_parser(username, password): def _get_csrf_token(content):
s = requests.Session()
s.proxies = constant.PROXY
s.verify = False
s.headers.update({'Referer': constant.LOGIN_URL})
s.get(constant.LOGIN_URL)
content = s.get(constant.LOGIN_URL).content
html = BeautifulSoup(content, 'html.parser') html = BeautifulSoup(content, 'html.parser')
csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'}) csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'})
if not csrf_token_elem: if not csrf_token_elem:
raise Exception('Cannot find csrf token to login') raise Exception('Cannot find csrf token to login')
csrf_token = csrf_token_elem.attrs['value'] return csrf_token_elem.attrs['value']
def login(username, password):
csrf_token = _get_csrf_token(request('get', url=constant.LOGIN_URL).text)
if os.getenv('DEBUG'):
logger.info('Getting CSRF token ...')
if os.getenv('DEBUG'):
logger.info('CSRF token is {}'.format(csrf_token))
login_dict = { login_dict = {
'csrfmiddlewaretoken': csrf_token, 'csrfmiddlewaretoken': csrf_token,
'username_or_email': username, 'username_or_email': username,
'password': password, 'password': password,
} }
resp = s.post(constant.LOGIN_URL, data=login_dict) resp = request('post', url=constant.LOGIN_URL, data=login_dict)
if 'You\'re loading pages way too quickly.' in resp.text:
csrf_token = _get_csrf_token(resp.text)
resp = request('post', url=resp.url, data={'csrfmiddlewaretoken': csrf_token, 'next': '/'})
if 'Invalid username/email or password' in resp.text: if 'Invalid username/email or password' in resp.text:
logger.error('Login failed, please check your username and password') logger.error('Login failed, please check your username and password')
exit(1) exit(1)
html = BeautifulSoup(s.get(constant.FAV_URL).content, 'html.parser') if 'You\'re loading pages way too quickly.' in resp.text:
logger.error('You meet challenge insistently, please submit a issue'
' at https://github.com/RicterZ/nhentai/issues')
exit(2)
def login_parser():
html = BeautifulSoup(request('get', constant.FAV_URL).content, 'html.parser')
count = html.find('span', attrs={'class': 'count'}) count = html.find('span', attrs={'class': 'count'})
if not count: if not count:
logger.error("Can't get your number of favorited doujins. Did the login failed?") logger.error("Can't get your number of favorited doujins. Did the login failed?")
return
count = int(count.text.strip('(').strip(')').replace(',', '')) count = int(count.text.strip('(').strip(')').replace(',', ''))
if count == 0: if count == 0:
@ -77,7 +99,7 @@ def login_parser(username, password):
for page in range(1, pages + 1): for page in range(1, pages + 1):
try: try:
logger.info('Getting doujinshi ids of page %d' % page) logger.info('Getting doujinshi ids of page %d' % page)
resp = s.get(constant.FAV_URL + '?page=%d' % page).text resp = request('get', constant.FAV_URL + '?page=%d' % page).text
ids = doujinshi_id.findall(resp) ids = doujinshi_id.findall(resp)
requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback) requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback)
[thread_pool.putRequest(req) for req in requests_] [thread_pool.putRequest(req) for req in requests_]
@ -115,12 +137,18 @@ def doujinshi_parser(id_):
doujinshi_cover = html.find('div', attrs={'id': 'cover'}) doujinshi_cover = html.find('div', attrs={'id': 'cover'})
img_id = re.search('/galleries/([\d]+)/cover\.(jpg|png)$', doujinshi_cover.a.img.attrs['data-src']) img_id = re.search('/galleries/([\d]+)/cover\.(jpg|png)$', doujinshi_cover.a.img.attrs['data-src'])
ext = []
for i in html.find_all('div', attrs={'class': 'thumb-container'}):
_, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1)
ext.append(ext_name)
if not img_id: if not img_id:
logger.critical('Tried yo get image id failed') logger.critical('Tried yo get image id failed')
exit(1) exit(1)
doujinshi['img_id'] = img_id.group(1) doujinshi['img_id'] = img_id.group(1)
doujinshi['ext'] = img_id.group(2) doujinshi['ext'] = ext
pages = 0 pages = 0
for _ in doujinshi_info.find_all('div', class_=''): for _ in doujinshi_info.find_all('div', class_=''):