mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-07-01 07:59:29 +02:00
Compare commits
15 Commits
Author | SHA1 | Date | |
---|---|---|---|
221ff6b32c | |||
bc6ef0cf5d | |||
c8c63cbc11 | |||
a63856d076 | |||
aa4986189f | |||
0fb81599dc | |||
e9f9651d07 | |||
1860b5f0cf | |||
eff4f3bf9b | |||
501840172e | |||
e5ed6d098a | |||
98606202fb | |||
5a3f1009c9 | |||
61945a6e97 | |||
443fcdc7da |
72
README.rst
72
README.rst
@ -61,7 +61,7 @@ Installation (NixOs)
|
||||
=====
|
||||
Usage
|
||||
=====
|
||||
**IMPORTANT**: To bypass the nhentai frequency limit, you should use `--cookie` option to store your cookie.
|
||||
**⚠️IMPORTANT⚠️**: To bypass the nhentai frequency limit, you should use `--cookie` and `--useragent` options to store your cookie and your user-agent.
|
||||
|
||||
*The default download folder will be the path where you run the command (CLI path).*
|
||||
|
||||
@ -70,6 +70,7 @@ Set your nhentai cookie against captcha:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
nhentai --useragent "USER AGENT of YOUR BROWSER"
|
||||
nhentai --cookie "YOUR COOKIE FROM nhentai.net"
|
||||
|
||||
**NOTE**
|
||||
@ -86,6 +87,10 @@ Set your nhentai cookie against captcha:
|
||||
.. |ve| unicode:: U+22EE .. https://www.compart.com/en/unicode/U+22EE
|
||||
.. |ld| unicode:: U+2014 .. https://www.compart.com/en/unicode/U+2014
|
||||
|
||||
.. image:: ./images/usage.png?raw=true
|
||||
:alt: nhentai
|
||||
:align: center
|
||||
|
||||
Download specified doujinshi:
|
||||
|
||||
.. code-block:: bash
|
||||
@ -138,24 +143,34 @@ Other options:
|
||||
|
||||
.. code-block::
|
||||
|
||||
Usage:
|
||||
nhentai --search [keyword] --download
|
||||
NHENTAI=http://h.loli.club nhentai --id [ID ...]
|
||||
nhentai --file [filename]
|
||||
|
||||
Environment Variable:
|
||||
NHENTAI nhentai mirror url
|
||||
|
||||
Options:
|
||||
# Operation options
|
||||
# Operation options, control the program behaviors
|
||||
-h, --help show this help message and exit
|
||||
-D, --download download doujinshi (for search results)
|
||||
-S, --show just show the doujinshi information
|
||||
|
||||
# Doujinshi options
|
||||
# Doujinshi options, specify id, keyword, etc.
|
||||
--id=ID doujinshi ids set, e.g. 1,2,3
|
||||
-s KEYWORD, --search=KEYWORD
|
||||
search doujinshi by keyword
|
||||
--tag=TAG download doujinshi by tag
|
||||
-F, --favorites list or download your favorites.
|
||||
|
||||
# Multi-page options
|
||||
--page=PAGE page number of search results
|
||||
--max-page=MAX_PAGE The max page when recursive download tagged doujinshi
|
||||
# Page options, control the page to fetch / download
|
||||
--page-all all search results
|
||||
--page=PAGE, --page-range=PAGE
|
||||
page number of search results. e.g. 1,2-5,14
|
||||
--sorting=SORTING sorting of doujinshi (recent / popular /
|
||||
popular-[today|week])
|
||||
|
||||
# Download options
|
||||
# Download options, the output directory, threads, timeout, delay, etc.
|
||||
-o OUTPUT_DIR, --output=OUTPUT_DIR
|
||||
output dir
|
||||
-t THREADS, --threads=THREADS
|
||||
@ -164,23 +179,36 @@ Other options:
|
||||
timeout for downloading doujinshi
|
||||
-d DELAY, --delay=DELAY
|
||||
slow down between downloading every doujinshi
|
||||
-p PROXY, --proxy=PROXY
|
||||
uses a proxy, for example: http://127.0.0.1:1080
|
||||
--proxy=PROXY store a proxy, for example: -p 'http://127.0.0.1:1080'
|
||||
-f FILE, --file=FILE read gallery IDs from file.
|
||||
--format=NAME_FORMAT format the saved folder name
|
||||
-r, --dry-run Dry run, skip file download.
|
||||
|
||||
# Generating options
|
||||
# Generate options, for generate html viewer, cbz file, pdf file, etc
|
||||
--html generate a html viewer at current directory
|
||||
--no-html don't generate HTML after downloading
|
||||
--gen-main generate a main viewer contain all the doujin in the folder
|
||||
--gen-main generate a main viewer contain all the doujin in the
|
||||
folder
|
||||
-C, --cbz generate Comic Book CBZ File
|
||||
-P --pdf generate PDF file
|
||||
--rm-origin-dir remove downloaded doujinshi dir when generated CBZ
|
||||
or PDF file.
|
||||
|
||||
# nHentai options
|
||||
--cookie=COOKIE set cookie of nhentai to bypass Google recaptcha
|
||||
-P, --pdf generate PDF file
|
||||
--rm-origin-dir remove downloaded doujinshi dir when generated CBZ or
|
||||
PDF file.
|
||||
--meta generate a metadata file in doujinshi format
|
||||
--regenerate-cbz regenerate the cbz file if exists
|
||||
|
||||
# nhentai options, set cookie, user-agent, language, remove caches, histories, etc
|
||||
--cookie=COOKIE set cookie of nhentai to bypass Cloudflare captcha
|
||||
--useragent=USERAGENT
|
||||
set useragent to bypass Cloudflare captcha
|
||||
--language=LANGUAGE set default language to parse doujinshis
|
||||
--clean-language set DEFAULT as language to parse doujinshis
|
||||
--save-download-history
|
||||
save downloaded doujinshis, whose will be skipped if
|
||||
you re-download them
|
||||
--clean-download-history
|
||||
clean download history
|
||||
--template=VIEWER_TEMPLATE
|
||||
set viewer template
|
||||
|
||||
==============
|
||||
nHentai Mirror
|
||||
@ -210,14 +238,6 @@ Set `NHENTAI` env var to your nhentai mirror.
|
||||
:alt: nhentai
|
||||
:align: center
|
||||
|
||||
============
|
||||
あなたも変態
|
||||
============
|
||||
.. image:: ./images/image.jpg?raw=true
|
||||
:alt: nhentai
|
||||
:align: center
|
||||
|
||||
|
||||
|
||||
.. |travis| image:: https://travis-ci.org/RicterZ/nhentai.svg?branch=master
|
||||
:target: https://travis-ci.org/RicterZ/nhentai
|
||||
|
BIN
images/image.jpg
BIN
images/image.jpg
Binary file not shown.
Before Width: | Height: | Size: 34 KiB |
BIN
images/usage.png
Normal file
BIN
images/usage.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 679 KiB |
@ -1,3 +1,3 @@
|
||||
__version__ = '0.4.17'
|
||||
__version__ = '0.4.18'
|
||||
__author__ = 'RicterZ'
|
||||
__email__ = 'ricterzheng@gmail.com'
|
||||
|
@ -71,9 +71,9 @@ def cmd_parser():
|
||||
help='all search results')
|
||||
parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='',
|
||||
help='page number of search results. e.g. 1,2-5,14')
|
||||
parser.add_option('--sorting', dest='sorting', action='store', default='recent',
|
||||
parser.add_option('--sorting', dest='sorting', action='store', default='popular',
|
||||
help='sorting of doujinshi (recent / popular / popular-[today|week])',
|
||||
choices=['recent', 'popular', 'popular-today', 'popular-week'])
|
||||
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
|
||||
|
||||
# download options
|
||||
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='./',
|
||||
@ -106,11 +106,13 @@ def cmd_parser():
|
||||
help='remove downloaded doujinshi dir when generated CBZ or PDF file.')
|
||||
parser.add_option('--meta', dest='generate_metadata', action='store_true',
|
||||
help='generate a metadata file in doujinshi format')
|
||||
parser.add_option('--regenerate-cbz', dest='regenerate_cbz', action='store_true', default=False,
|
||||
help='regenerate the cbz file if exists')
|
||||
|
||||
# nhentai options
|
||||
parser.add_option('--cookie', type='str', dest='cookie', action='store',
|
||||
help='set cookie of nhentai to bypass Cloudflare captcha')
|
||||
parser.add_option('--useragent', type='str', dest='useragent', action='store',
|
||||
parser.add_option('--useragent', '--user-agent', type='str', dest='useragent', action='store',
|
||||
help='set useragent to bypass Cloudflare captcha')
|
||||
parser.add_option('--language', type='str', dest='language', action='store',
|
||||
help='set default language to parse doujinshis')
|
||||
@ -122,6 +124,8 @@ def cmd_parser():
|
||||
help='clean download history')
|
||||
parser.add_option('--template', dest='viewer_template', action='store',
|
||||
help='set viewer template', default='')
|
||||
parser.add_option('--legacy', dest='legacy', action='store_true', default=False,
|
||||
help='use legacy searching method')
|
||||
|
||||
try:
|
||||
sys.argv = [unicode(i.decode(sys.stdin.encoding)) for i in sys.argv]
|
||||
@ -156,7 +160,7 @@ def cmd_parser():
|
||||
elif args.useragent is not None:
|
||||
constant.CONFIG['useragent'] = args.useragent
|
||||
write_config()
|
||||
logger.info('Useragent saved.')
|
||||
logger.info('User-Agent saved.')
|
||||
exit(0)
|
||||
elif args.language is not None:
|
||||
constant.CONFIG['language'] = args.language
|
||||
|
@ -8,7 +8,7 @@ import time
|
||||
|
||||
from nhentai import constant
|
||||
from nhentai.cmdline import cmd_parser, banner
|
||||
from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, favorites_parser
|
||||
from nhentai.parser import doujinshi_parser, search_parser, legacy_search_parser, print_doujinshi, favorites_parser
|
||||
from nhentai.doujinshi import Doujinshi
|
||||
from nhentai.downloader import Downloader
|
||||
from nhentai.logger import logger
|
||||
@ -55,8 +55,10 @@ def main():
|
||||
if constant.CONFIG['language']:
|
||||
logger.info('Using default language: {0}'.format(constant.CONFIG['language']))
|
||||
options.keyword += ' language:{}'.format(constant.CONFIG['language'])
|
||||
doujinshis = search_parser(options.keyword, sorting=options.sorting, page=page_list,
|
||||
is_page_all=options.page_all)
|
||||
|
||||
_search_parser = legacy_search_parser if options.legacy else search_parser
|
||||
doujinshis = _search_parser(options.keyword, sorting=options.sorting, page=page_list,
|
||||
is_page_all=options.page_all)
|
||||
|
||||
elif not doujinshi_ids:
|
||||
doujinshi_ids = options.id
|
||||
@ -71,27 +73,20 @@ def main():
|
||||
|
||||
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
|
||||
|
||||
if doujinshi_ids:
|
||||
for i, id_ in enumerate(doujinshi_ids):
|
||||
if options.delay:
|
||||
time.sleep(options.delay)
|
||||
|
||||
doujinshi_info = doujinshi_parser(id_)
|
||||
|
||||
if doujinshi_info:
|
||||
doujinshi_list.append(Doujinshi(name_format=options.name_format, **doujinshi_info))
|
||||
|
||||
if (i + 1) % 10 == 0:
|
||||
logger.info('Progress: %d / %d' % (i + 1, len(doujinshi_ids)))
|
||||
|
||||
if not options.is_show:
|
||||
downloader = Downloader(path=options.output_dir, size=options.threads,
|
||||
timeout=options.timeout, delay=options.delay)
|
||||
|
||||
for doujinshi in doujinshi_list:
|
||||
for doujinshi_id in doujinshi_ids:
|
||||
doujinshi_info = doujinshi_parser(doujinshi_id)
|
||||
if doujinshi_info:
|
||||
doujinshi = Doujinshi(name_format=options.name_format, **doujinshi_info)
|
||||
else:
|
||||
continue
|
||||
|
||||
if not options.dryrun:
|
||||
doujinshi.downloader = downloader
|
||||
doujinshi.download()
|
||||
doujinshi.download(regenerate_cbz=options.regenerate_cbz)
|
||||
|
||||
if options.generate_metadata:
|
||||
table = doujinshi.table
|
||||
@ -117,7 +112,13 @@ def main():
|
||||
logger.log(15, 'All done.')
|
||||
|
||||
else:
|
||||
[doujinshi.show() for doujinshi in doujinshi_list]
|
||||
for doujinshi_id in doujinshi_ids:
|
||||
doujinshi_info = doujinshi_parser(doujinshi_id)
|
||||
if doujinshi_info:
|
||||
doujinshi = Doujinshi(name_format=options.name_format, **doujinshi_info)
|
||||
else:
|
||||
continue
|
||||
doujinshi.show()
|
||||
|
||||
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
@ -14,6 +14,7 @@ BASE_URL = os.getenv('NHENTAI', 'https://nhentai.net')
|
||||
__api_suspended_DETAIL_URL = '%s/api/gallery' % BASE_URL
|
||||
|
||||
DETAIL_URL = '%s/g' % BASE_URL
|
||||
LEGACY_SEARCH_URL = '%s/search/' % BASE_URL
|
||||
SEARCH_URL = '%s/api/galleries/search' % BASE_URL
|
||||
|
||||
|
||||
|
@ -6,6 +6,7 @@ from nhentai.constant import DETAIL_URL, IMAGE_URL
|
||||
from nhentai.logger import logger
|
||||
from nhentai.utils import format_filename
|
||||
|
||||
|
||||
EXT_MAP = {
|
||||
'j': 'jpg',
|
||||
'p': 'png',
|
||||
@ -37,12 +38,13 @@ class Doujinshi(object):
|
||||
self.url = '%s/%d' % (DETAIL_URL, self.id)
|
||||
self.info = DoujinshiInfo(**kwargs)
|
||||
|
||||
name_format = name_format.replace('%i', str(self.id))
|
||||
name_format = name_format.replace('%a', self.info.artists)
|
||||
name_format = name_format.replace('%t', self.name)
|
||||
name_format = name_format.replace('%p', self.pretty_name)
|
||||
name_format = name_format.replace('%s', self.info.subtitle)
|
||||
self.filename = format_filename(name_format)
|
||||
name_format = name_format.replace('%i', format_filename(str(self.id)))
|
||||
name_format = name_format.replace('%a', format_filename(self.info.artists))
|
||||
|
||||
name_format = name_format.replace('%t', format_filename(self.name))
|
||||
name_format = name_format.replace('%p', format_filename(self.pretty_name))
|
||||
name_format = name_format.replace('%s', format_filename(self.info.subtitle))
|
||||
self.filename = format_filename(name_format, 255, True)
|
||||
|
||||
self.table = [
|
||||
["Parodies", self.info.parodies],
|
||||
@ -63,7 +65,7 @@ class Doujinshi(object):
|
||||
|
||||
logger.info(u'Print doujinshi information of {0}\n{1}'.format(self.id, tabulate(self.table)))
|
||||
|
||||
def download(self):
|
||||
def download(self, regenerate_cbz=False):
|
||||
logger.info('Starting to download doujinshi: %s' % self.name)
|
||||
if self.downloader:
|
||||
download_queue = []
|
||||
@ -73,7 +75,7 @@ class Doujinshi(object):
|
||||
for i in range(1, min(self.pages, len(self.ext)) + 1):
|
||||
download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext[i - 1]))
|
||||
|
||||
self.downloader.download(download_queue, self.filename)
|
||||
self.downloader.download(download_queue, self.filename, regenerate_cbz=regenerate_cbz)
|
||||
else:
|
||||
logger.critical('Downloader has not been loaded')
|
||||
|
||||
|
@ -113,13 +113,18 @@ class Downloader(Singleton):
|
||||
else:
|
||||
logger.log(15, '{0} downloaded successfully'.format(data))
|
||||
|
||||
def download(self, queue, folder=''):
|
||||
def download(self, queue, folder='', regenerate_cbz=False):
|
||||
if not isinstance(folder, text):
|
||||
folder = str(folder)
|
||||
|
||||
if self.path:
|
||||
folder = os.path.join(self.path, folder)
|
||||
|
||||
if os.path.exists(folder + '.cbz'):
|
||||
if not regenerate_cbz:
|
||||
logger.warning('CBZ file \'{}.cbz\' exists, ignored download request'.format(folder))
|
||||
return
|
||||
|
||||
if not os.path.exists(folder):
|
||||
logger.warning('Path \'{0}\' does not exist, creating.'.format(folder))
|
||||
try:
|
||||
|
@ -177,9 +177,11 @@ def doujinshi_parser(id_):
|
||||
return doujinshi
|
||||
|
||||
|
||||
def old_search_parser(keyword, sorting='date', page=1):
|
||||
def legacy_search_parser(keyword, sorting='date', page=1, is_page_all=False):
|
||||
logger.warning('Using legacy searching method, `--all` options will not be supported')
|
||||
logger.debug('Searching doujinshis of keyword {0}'.format(keyword))
|
||||
response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page, 'sort': sorting}).content
|
||||
response = request('get', url=constant.LEGACY_SEARCH_URL,
|
||||
params={'q': keyword, 'page': page, 'sort': sorting}).content
|
||||
|
||||
result = _get_title_and_id(response)
|
||||
if not result:
|
||||
@ -200,6 +202,7 @@ def print_doujinshi(doujinshi_list):
|
||||
def search_parser(keyword, sorting, page, is_page_all=False):
|
||||
# keyword = '+'.join([i.strip().replace(' ', '-').lower() for i in keyword.split(',')])
|
||||
result = []
|
||||
response = None
|
||||
if not page:
|
||||
page = [1]
|
||||
|
||||
@ -209,6 +212,7 @@ def search_parser(keyword, sorting, page, is_page_all=False):
|
||||
page = range(1, init_response['num_pages']+1)
|
||||
|
||||
total = '/{0}'.format(page[-1]) if is_page_all else ''
|
||||
not_exists_persist = False
|
||||
for p in page:
|
||||
i = 0
|
||||
|
||||
@ -220,18 +224,21 @@ def search_parser(keyword, sorting, page, is_page_all=False):
|
||||
response = request('get', url.replace('%2B', '+')).json()
|
||||
except Exception as e:
|
||||
logger.critical(str(e))
|
||||
|
||||
response = None
|
||||
break
|
||||
|
||||
if 'result' not in response:
|
||||
if response is None or 'result' not in response:
|
||||
logger.warning('No result in response in page {}'.format(p))
|
||||
break
|
||||
if not_exists_persist is True:
|
||||
break
|
||||
continue
|
||||
|
||||
for row in response['result']:
|
||||
title = row['title']['english']
|
||||
title = title[:85] + '..' if len(title) > 85 else title
|
||||
result.append({'id': row['id'], 'title': title})
|
||||
|
||||
not_exists_persist = False
|
||||
if not result:
|
||||
logger.warning('No results for keywords {}'.format(keyword))
|
||||
|
||||
|
@ -13,6 +13,9 @@ from nhentai.logger import logger
|
||||
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
|
||||
|
||||
|
||||
MAX_FIELD_LENGTH = 100
|
||||
|
||||
|
||||
def request(method, url, **kwargs):
|
||||
session = requests.Session()
|
||||
session.headers.update({
|
||||
@ -247,7 +250,7 @@ def unicode_truncate(s, length, encoding='utf-8'):
|
||||
return encoded.decode(encoding, 'ignore')
|
||||
|
||||
|
||||
def format_filename(s):
|
||||
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
|
||||
"""
|
||||
It used to be a whitelist approach allowed only alphabet and a part of symbols.
|
||||
but most doujinshi's names include Japanese 2-byte characters and these was rejected.
|
||||
@ -255,15 +258,20 @@ def format_filename(s):
|
||||
if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' ').
|
||||
"""
|
||||
# maybe you can use `--format` to select a suitable filename
|
||||
ban_chars = '\\\'/:,;*?"<>|\t'
|
||||
filename = s.translate(str.maketrans(ban_chars, ' ' * len(ban_chars))).strip()
|
||||
filename = ' '.join(filename.split())
|
||||
|
||||
while filename.endswith('.'):
|
||||
filename = filename[:-1]
|
||||
if not _truncate_only:
|
||||
ban_chars = '\\\'/:,;*?"<>|\t'
|
||||
filename = s.translate(str.maketrans(ban_chars, ' ' * len(ban_chars))).strip()
|
||||
filename = ' '.join(filename.split())
|
||||
|
||||
if len(filename) > 100:
|
||||
filename = filename[:100] + u'…'
|
||||
while filename.endswith('.'):
|
||||
filename = filename[:-1]
|
||||
else:
|
||||
filename = s
|
||||
|
||||
# limit `length` chars
|
||||
if len(filename) >= length:
|
||||
filename = filename[:length - 1] + u'…'
|
||||
|
||||
# Remove [] from filename
|
||||
filename = filename.replace('[]', '').strip()
|
||||
|
Reference in New Issue
Block a user