Compare commits

..

9 Commits

Author SHA1 Message Date
927d5b1b39 update requirements 2023-02-05 23:45:33 +08:00
a8566482aa change log color and update images 2023-02-05 23:44:15 +08:00
8c900a833d update README 2023-02-05 23:25:41 +08:00
466fa4c094 rename some constants 2023-02-05 23:17:23 +08:00
2adf8ccc9d reformat files #266 2023-02-05 23:13:47 +08:00
06fdf0dade reformat files #266 2023-02-05 22:44:37 +08:00
a609243794 change logger 2023-02-05 07:07:19 +08:00
e89c2c0860 fix bug #265 2023-02-05 07:02:45 +08:00
e08b0659e5 improve #265 2023-02-05 06:55:03 +08:00
16 changed files with 201 additions and 201 deletions

View File

@ -95,7 +95,7 @@ Download specified doujinshi:
.. code-block:: bash
nhentai --id=123855,123866
nhentai --id 123855 123866 123877
Download doujinshi with ids specified in a file (doujinshi ids split by line):
@ -145,7 +145,7 @@ Other options:
Usage:
nhentai --search [keyword] --download
NHENTAI=http://h.loli.club nhentai --id [ID ...]
NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]
nhentai --file [filename]
Environment Variable:
@ -158,10 +158,10 @@ Other options:
-S, --show just show the doujinshi information
# Doujinshi options, specify id, keyword, etc.
--id=ID doujinshi ids set, e.g. 1,2,3
--id doujinshi ids set, e.g. 167680 167681 167682
-s KEYWORD, --search=KEYWORD
search doujinshi by keyword
-F, --favorites list or download your favorites.
-F, --favorites list or download your favorites
# Page options, control the page to fetch / download
--page-all all search results
@ -179,10 +179,10 @@ Other options:
timeout for downloading doujinshi
-d DELAY, --delay=DELAY
slow down between downloading every doujinshi
--proxy=PROXY store a proxy, for example: -p 'http://127.0.0.1:1080'
--proxy=PROXY store a proxy, for example: -p "http://127.0.0.1:1080"
-f FILE, --file=FILE read gallery IDs from file.
--format=NAME_FORMAT format the saved folder name
-r, --dry-run Dry run, skip file download.
--dry-run Dry run, skip file download
# Generate options, for generate html viewer, cbz file, pdf file, etc
--html generate a html viewer at current directory
@ -192,13 +192,13 @@ Other options:
-C, --cbz generate Comic Book CBZ File
-P, --pdf generate PDF file
--rm-origin-dir remove downloaded doujinshi dir when generated CBZ or
PDF file.
PDF file
--meta generate a metadata file in doujinshi format
--regenerate-cbz regenerate the cbz file if exists
# nhentai options, set cookie, user-agent, language, remove caches, histories, etc
--cookie=COOKIE set cookie of nhentai to bypass Cloudflare captcha
--useragent=USERAGENT
--useragent=USERAGENT, --user-agent=USERAGENT
set useragent to bypass Cloudflare captcha
--language=LANGUAGE set default language to parse doujinshis
--clean-language set DEFAULT as language to parse doujinshis
@ -209,6 +209,7 @@ Other options:
clean download history
--template=VIEWER_TEMPLATE
set viewer template
--legacy use legacy searching method
==============
nHentai Mirror
@ -225,7 +226,7 @@ Set `NHENTAI` env var to your nhentai mirror.
.. code-block:: bash
NHENTAI=http://h.loli.club nhentai --id 123456
NHENTAI=https://h.loli.club nhentai --id 123456
.. image:: ./images/search.png?raw=true

Binary file not shown.

Before

Width:  |  Height:  |  Size: 189 KiB

After

Width:  |  Height:  |  Size: 1.0 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 173 KiB

After

Width:  |  Height:  |  Size: 991 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 311 KiB

After

Width:  |  Height:  |  Size: 1.9 MiB

View File

@ -1,3 +1,3 @@
__version__ = '0.4.18'
__version__ = '0.5.0'
__author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com'

View File

@ -17,13 +17,7 @@ from nhentai.logger import logger
def banner():
logger.info(u'''nHentai ver %s: あなたも変態。 いいね?
_ _ _ _
_ __ | | | | ___ _ __ | |_ __ _(_)
| '_ \| |_| |/ _ \ '_ \| __/ _` | |
| | | | _ | __/ | | | || (_| | |
|_| |_|_| |_|\___|_| |_|\__\__,_|_|
''' % __version__)
logger.debug(f'nHentai ver {__version__}: あなたも変態。 いいね?')
def load_config():
@ -46,11 +40,27 @@ def write_config():
f.write(json.dumps(constant.CONFIG))
def callback(option, opt_str, value, parser):
if option == '--id':
pass
value = []
for arg in parser.rargs:
if arg.isdigit():
value.append(int(arg))
elif arg.startswith('-'):
break
else:
logger.warning(f'Ignore invalid id {arg}')
setattr(parser.values, option.dest, value)
def cmd_parser():
load_config()
parser = OptionParser('\n nhentai --search [keyword] --download'
'\n NHENTAI=http://h.loli.club nhentai --id [ID ...]'
'\n NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]'
'\n nhentai --file [filename]'
'\n\nEnvironment Variable:\n'
' NHENTAI nhentai mirror url')
@ -60,11 +70,12 @@ def cmd_parser():
parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information')
# doujinshi options
parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3')
parser.add_option('--id', dest='id', action='callback', callback=callback,
help='doujinshi ids set, e.g. 167680 167681 167682')
parser.add_option('--search', '-s', type='string', dest='keyword', action='store',
help='search doujinshi by keyword')
parser.add_option('--favorites', '-F', action='store_true', dest='favorites',
help='list or download your favorites.')
help='list or download your favorites')
# page options
parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
@ -85,11 +96,11 @@ def cmd_parser():
parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0,
help='slow down between downloading every doujinshi')
parser.add_option('--proxy', type='string', dest='proxy', action='store',
help='store a proxy, for example: -p \'http://127.0.0.1:1080\'')
help='store a proxy, for example: -p "http://127.0.0.1:1080"')
parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.')
parser.add_option('--format', type='string', dest='name_format', action='store',
help='format the saved folder name', default='[%i][%a][%t]')
parser.add_option('--dry-run', '-r', action='store_true', dest='dryrun', help='Dry run, skip file download.')
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
# generate options
parser.add_option('--html', dest='html_viewer', action='store_true',
@ -103,7 +114,7 @@ def cmd_parser():
parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true',
help='generate PDF file')
parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
help='remove downloaded doujinshi dir when generated CBZ or PDF file.')
help='remove downloaded doujinshi dir when generated CBZ or PDF file')
parser.add_option('--meta', dest='generate_metadata', action='store_true',
help='generate a metadata file in doujinshi format')
parser.add_option('--regenerate-cbz', dest='regenerate_cbz', action='store_true', default=False,
@ -127,13 +138,6 @@ def cmd_parser():
parser.add_option('--legacy', dest='legacy', action='store_true', default=False,
help='use legacy searching method')
try:
sys.argv = [unicode(i.decode(sys.stdin.encoding)) for i in sys.argv]
except (NameError, TypeError):
pass
except UnicodeDecodeError:
exit(0)
args, _ = parser.parse_args(sys.argv[1:])
if args.html_viewer:
@ -165,21 +169,22 @@ def cmd_parser():
elif args.language is not None:
constant.CONFIG['language'] = args.language
write_config()
logger.info('Default language now set to \'{0}\''.format(args.language))
logger.info(f'Default language now set to "{args.language}"')
exit(0)
# TODO: search without language
if args.proxy is not None:
proxy_url = urlparse(args.proxy)
if not args.proxy == '' and proxy_url.scheme not in ('http', 'https', 'socks5', 'socks5h', 'socks4', 'socks4a'):
logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(proxy_url.scheme))
if not args.proxy == '' and proxy_url.scheme not in ('http', 'https', 'socks5', 'socks5h',
'socks4', 'socks4a'):
logger.error(f'Invalid protocol "{proxy_url.scheme}" of proxy, ignored')
exit(0)
else:
constant.CONFIG['proxy'] = {
'http': args.proxy,
'https': args.proxy,
}
logger.info('Proxy now set to \'{0}\'.'.format(args.proxy))
logger.info(f'Proxy now set to "{args.proxy}"')
write_config()
exit(0)
@ -188,8 +193,8 @@ def cmd_parser():
args.viewer_template = 'default'
if not os.path.exists(os.path.join(os.path.dirname(__file__),
'viewer/{}/index.html'.format(args.viewer_template))):
logger.error('Template \'{}\' does not exists'.format(args.viewer_template))
f'viewer/{args.viewer_template}/index.html')):
logger.error(f'Template "{args.viewer_template}" does not exists')
exit(1)
else:
constant.CONFIG['template'] = args.viewer_template
@ -202,10 +207,6 @@ def cmd_parser():
logger.warning('Cookie has not been set, please use `nhentai --cookie \'COOKIE\'` to set it.')
exit(1)
if args.id:
_ = [i.strip() for i in args.id.split(',')]
args.id = set(int(i) for i in _ if i.isdigit())
if args.file:
with open(args.file, 'r') as f:
_ = [i.strip() for i in f.readlines()]

View File

@ -1,10 +1,8 @@
#!/usr/bin/env python2.7
# coding: utf-8
import sys
import signal
import platform
import time
import urllib3.exceptions
from nhentai import constant
from nhentai.cmdline import cmd_parser, banner
@ -25,16 +23,16 @@ def main():
exit(1)
options = cmd_parser()
logger.info('Using mirror: {0}'.format(BASE_URL))
logger.info(f'Using mirror: {BASE_URL}')
# CONFIG['proxy'] will be changed after cmd_parser()
if constant.CONFIG['proxy']['http']:
logger.info('Using proxy: {0}'.format(constant.CONFIG['proxy']['http']))
logger.info(f'Using proxy: {constant.CONFIG["proxy"]["http"]}')
if not constant.CONFIG['template']:
constant.CONFIG['template'] = 'default'
logger.info('Using viewer template "{}"'.format(constant.CONFIG['template']))
logger.info(f'Using viewer template "{constant.CONFIG["template"]}"')
# check your cookie
check_cookie()
@ -53,8 +51,8 @@ def main():
elif options.keyword:
if constant.CONFIG['language']:
logger.info('Using default language: {0}'.format(constant.CONFIG['language']))
options.keyword += ' language:{}'.format(constant.CONFIG['language'])
logger.info(f'Using default language: {constant.CONFIG["language"]}')
options.keyword += f' language:{constant.CONFIG["language"]}'
_search_parser = legacy_search_parser if options.legacy else search_parser
doujinshis = _search_parser(options.keyword, sorting=options.sorting, page=page_list,
@ -107,9 +105,9 @@ def main():
generate_main_html(options.output_dir)
if not platform.system() == 'Windows':
logger.log(15, '🍻 All done.')
logger.log(16, '🍻 All done.')
else:
logger.log(15, 'All done.')
logger.log(16, 'All done.')
else:
for doujinshi_id in doujinshi_ids:
@ -121,6 +119,7 @@ def main():
doujinshi.show()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
signal.signal(signal.SIGINT, signal_handler)
if __name__ == '__main__':

View File

@ -1,35 +1,29 @@
# coding: utf-8
import os
import tempfile
try:
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
from urllib.parse import urlparse
BASE_URL = os.getenv('NHENTAI', 'https://nhentai.net')
__api_suspended_DETAIL_URL = '%s/api/gallery' % BASE_URL
DETAIL_URL = f'{BASE_URL}/g'
LEGACY_SEARCH_URL = f'{BASE_URL}/search/'
SEARCH_URL = f'{BASE_URL}/api/galleries/search'
DETAIL_URL = '%s/g' % BASE_URL
LEGACY_SEARCH_URL = '%s/search/' % BASE_URL
SEARCH_URL = '%s/api/galleries/search' % BASE_URL
TAG_API_URL = f'{BASE_URL}/api/galleries/tagged'
LOGIN_URL = f'{BASE_URL}/login/'
CHALLENGE_URL = f'{BASE_URL}/challenge'
FAV_URL = f'{BASE_URL}/favorites/'
TAG_API_URL = '%s/api/galleries/tagged' % BASE_URL
LOGIN_URL = '%s/login/' % BASE_URL
CHALLENGE_URL = '%s/challenge' % BASE_URL
FAV_URL = '%s/favorites/' % BASE_URL
u = urlparse(BASE_URL)
IMAGE_URL = '%s://i.%s/galleries' % (u.scheme, u.hostname)
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
NHENTAI_HOME = os.path.join(os.getenv('HOME', tempfile.gettempdir()), '.nhentai')
NHENTAI_HISTORY = os.path.join(NHENTAI_HOME, 'history.sqlite3')
NHENTAI_CONFIG_FILE = os.path.join(NHENTAI_HOME, 'config.json')
__api_suspended_DETAIL_URL = f'{BASE_URL}/api/gallery'
CONFIG = {
'proxy': {'http': '', 'https': ''},
'cookie': '',
@ -38,9 +32,9 @@ CONFIG = {
'useragent': 'nhentai command line client (https://github.com/RicterZ/nhentai)'
}
LANGUAGEISO ={
'english' : 'en',
'chinese' : 'zh',
'japanese' : 'ja',
'translated' : 'translated'
LANGUAGE_ISO = {
'english': 'en',
'chinese': 'zh',
'japanese': 'ja',
'translated': 'translated'
}

View File

@ -35,7 +35,7 @@ class Doujinshi(object):
self.ext = ext
self.pages = pages
self.downloader = None
self.url = '%s/%d' % (DETAIL_URL, self.id)
self.url = f'{DETAIL_URL}/{self.id}'
self.info = DoujinshiInfo(**kwargs)
name_format = name_format.replace('%i', format_filename(str(self.id)))
@ -59,23 +59,22 @@ class Doujinshi(object):
]
def __repr__(self):
return '<Doujinshi: {0}>'.format(self.name)
return f'<Doujinshi: {self.name}>'
def show(self):
logger.info(u'Print doujinshi information of {0}\n{1}'.format(self.id, tabulate(self.table)))
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
def download(self, regenerate_cbz=False):
logger.info('Starting to download doujinshi: %s' % self.name)
logger.info(f'Starting to download doujinshi: {self.name}')
if self.downloader:
download_queue = []
if len(self.ext) != self.pages:
logger.warning('Page count and ext count do not equal')
for i in range(1, min(self.pages, len(self.ext)) + 1):
download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext[i - 1]))
download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}')
self.downloader.download(download_queue, self.filename, regenerate_cbz=regenerate_cbz)
self.downloader.start_download(download_queue, self.filename, regenerate_cbz=regenerate_cbz)
else:
logger.critical('Downloader has not been loaded')
@ -87,4 +86,4 @@ if __name__ == '__main__':
try:
test.download()
except Exception as e:
print('Exception: %s' % str(e))
print(f'Exception: {e}')

View File

@ -3,23 +3,20 @@
import multiprocessing
import signal
from future.builtins import str as text
import sys
import os
import requests
import time
import urllib3.exceptions
try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
from urllib.parse import urlparse
from nhentai import constant
from nhentai.logger import logger
from nhentai.parser import request
from nhentai.utils import Singleton
requests.packages.urllib3.disable_warnings()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
semaphore = multiprocessing.Semaphore(1)
@ -27,6 +24,21 @@ class NHentaiImageNotExistException(Exception):
pass
def download_callback(result):
result, data = result
if result == 0:
logger.warning('fatal errors occurred, ignored')
elif result == -1:
logger.warning(f'url {data} return status code 404')
elif result == -2:
logger.warning('Ctrl-C pressed, exiting sub processes ...')
elif result == -3:
# workers won't be run, just pass
pass
else:
logger.log(16, f'{data} downloaded successfully')
class Downloader(Singleton):
def __init__(self, path='', size=5, timeout=30, delay=0):
@ -35,20 +47,21 @@ class Downloader(Singleton):
self.timeout = timeout
self.delay = delay
def download_(self, url, folder='', filename='', retried=0, proxy=None):
def download(self, url, folder='', filename='', retried=0, proxy=None):
if self.delay:
time.sleep(self.delay)
logger.info('Starting to download {0} ...'.format(url))
logger.info(f'Starting to download {url} ...')
filename = filename if filename else os.path.basename(urlparse(url).path)
base_filename, extension = os.path.splitext(filename)
save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
try:
if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)):
logger.warning('File: {0} exists, ignoring'.format(os.path.join(folder, base_filename.zfill(3) +
extension)))
if os.path.exists(save_file_path):
logger.warning(f'Ignored exists file: {save_file_path}')
return 1, url
response = None
with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
with open(save_file_path, "wb") as f:
i = 0
while i < 10:
try:
@ -77,14 +90,14 @@ class Downloader(Singleton):
except (requests.HTTPError, requests.Timeout) as e:
if retried < 3:
logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
return 0, self.download_(url=url, folder=folder, filename=filename,
retried=retried+1, proxy=proxy)
logger.warning(f'Warning: {e}, retrying({retried}) ...')
return 0, self.download(url=url, folder=folder, filename=filename,
retried=retried+1, proxy=proxy)
else:
return 0, None
except NHentaiImageNotExistException as e:
os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
os.remove(save_file_path)
return -1, url
except Exception as e:
@ -98,23 +111,8 @@ class Downloader(Singleton):
return 1, url
def _download_callback(self, result):
result, data = result
if result == 0:
logger.warning('fatal errors occurred, ignored')
# exit(1)
elif result == -1:
logger.warning('url {} return status code 404'.format(data))
elif result == -2:
logger.warning('Ctrl-C pressed, exiting sub processes ...')
elif result == -3:
# workers wont be run, just pass
pass
else:
logger.log(15, '{0} downloaded successfully'.format(data))
def download(self, queue, folder='', regenerate_cbz=False):
if not isinstance(folder, text):
def start_download(self, queue, folder='', regenerate_cbz=False):
if not isinstance(folder, (str, )):
folder = str(folder)
if self.path:
@ -122,18 +120,17 @@ class Downloader(Singleton):
if os.path.exists(folder + '.cbz'):
if not regenerate_cbz:
logger.warning('CBZ file \'{}.cbz\' exists, ignored download request'.format(folder))
logger.warning(f'CBZ file "{folder}.cbz" exists, ignored download request')
return
if not os.path.exists(folder):
logger.warning('Path \'{0}\' does not exist, creating.'.format(folder))
try:
os.makedirs(folder)
except EnvironmentError as e:
logger.critical('{0}'.format(str(e)))
logger.critical(str(e))
else:
logger.warning('Path \'{0}\' already exist.'.format(folder))
logger.warning(f'Path "{folder}" already exist.')
queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
@ -146,7 +143,7 @@ class Downloader(Singleton):
def download_wrapper(obj, url, folder='', proxy=None):
if sys.platform == 'darwin' or semaphore.get_value():
return Downloader.download_(obj, url=url, folder=folder, proxy=proxy)
return Downloader.download(obj, url=url, folder=folder, proxy=proxy)
else:
return -3, None
@ -155,7 +152,7 @@ def init_worker():
signal.signal(signal.SIGINT, subprocess_signal)
def subprocess_signal(signal, frame):
def subprocess_signal(sig, frame):
if semaphore.acquire(timeout=1):
logger.warning('Ctrl-C pressed, exiting sub processes ...')

View File

@ -34,7 +34,7 @@ class ColorizingStreamHandler(logging.StreamHandler):
# levels to (background, foreground, bold/intense)
level_map = {
logging.DEBUG: (None, 'blue', False),
logging.INFO: (None, 'green', False),
logging.INFO: (None, 'white', False),
logging.WARNING: (None, 'yellow', False),
logging.ERROR: (None, 'red', False),
logging.CRITICAL: ('red', 'white', False)
@ -160,18 +160,18 @@ class ColorizingStreamHandler(logging.StreamHandler):
return self.colorize(message, record)
logging.addLevelName(15, "INFO")
logging.addLevelName(16, "SUCCESS")
logger = logging.getLogger('nhentai')
LOGGER_HANDLER = ColorizingStreamHandler(sys.stdout)
FORMATTER = logging.Formatter("\r[%(asctime)s] [%(levelname)s] %(message)s", "%H:%M:%S")
FORMATTER = logging.Formatter("\r[%(asctime)s] %(funcName)s: %(message)s", "%H:%M:%S")
LOGGER_HANDLER.setFormatter(FORMATTER)
LOGGER_HANDLER.level_map[logging.getLevelName("INFO")] = (None, "cyan", False)
LOGGER_HANDLER.level_map[logging.getLevelName("SUCCESS")] = (None, "green", False)
logger.addHandler(LOGGER_HANDLER)
logger.setLevel(logging.DEBUG)
if __name__ == '__main__':
logger.log(15, 'nhentai')
logger.log(16, 'nhentai')
logger.info('info')
logger.warning('warning')
logger.debug('debug')

View File

@ -26,7 +26,7 @@ def login(username, password):
logger.info('Getting CSRF token ...')
if os.getenv('DEBUG'):
logger.info('CSRF token is {}'.format(csrf_token))
logger.info(f'CSRF token is {csrf_token}')
login_dict = {
'csrfmiddlewaretoken': csrf_token,
@ -56,7 +56,7 @@ def _get_title_and_id(response):
doujinshi_container = doujinshi.find('div', attrs={'class': 'caption'})
title = doujinshi_container.text.strip()
title = title if len(title) < 85 else title[:82] + '...'
id_ = re.search('/g/(\d+)/', doujinshi.a['href']).group(1)
id_ = re.search('/g/([0-9]+)/', doujinshi.a['href']).group(1)
result.append({'id': id_, 'title': title})
return result
@ -67,7 +67,7 @@ def favorites_parser(page=None):
html = BeautifulSoup(request('get', constant.FAV_URL).content, 'html.parser')
count = html.find('span', attrs={'class': 'count'})
if not count:
logger.error("Can't get your number of favorited doujins. Did the login failed?")
logger.error("Can't get your number of favorite doujinshis. Did the login failed?")
return []
count = int(count.text.strip('(').strip(')').replace(',', ''))
@ -84,7 +84,7 @@ def favorites_parser(page=None):
else:
pages = 1
logger.info('You have %d favorites in %d pages.' % (count, pages))
logger.info(f'You have {count} favorites in {pages} pages.')
if os.getenv('DEBUG'):
pages = 1
@ -93,40 +93,40 @@ def favorites_parser(page=None):
for page in page_range_list:
try:
logger.info('Getting doujinshi ids of page %d' % page)
resp = request('get', constant.FAV_URL + '?page=%d' % page).content
logger.info(f'Getting doujinshi ids of page {page}')
resp = request('get', f'{constant.FAV_URL}?page={page}').content
result.extend(_get_title_and_id(resp))
except Exception as e:
logger.error('Error: %s, continue', str(e))
logger.error(f'Error: {e}, continue')
return result
def doujinshi_parser(id_):
if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
raise Exception('Doujinshi id({0}) is not valid'.format(id_))
raise Exception(f'Doujinshi id({id_}) is not valid')
id_ = int(id_)
logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_))
logger.info(f'Fetching doujinshi information of id {id_}')
doujinshi = dict()
doujinshi['id'] = id_
url = '{0}/{1}/'.format(constant.DETAIL_URL, id_)
url = f'{constant.DETAIL_URL}/{id_}/'
try:
response = request('get', url)
if response.status_code in (200, ):
response = response.content
elif response.status_code in (404,):
logger.error("Doujinshi with id {0} cannot be found".format(id_))
logger.error(f'Doujinshi with id {id_} cannot be found')
return []
else:
logger.debug('Slow down and retry ({}) ...'.format(id_))
logger.debug(f'Slow down and retry ({id_}) ...')
time.sleep(1)
return doujinshi_parser(str(id_))
except Exception as e:
logger.warning('Error: {}, ignored'.format(str(e)))
logger.warning(f'Error: {e}, ignored')
return None
html = BeautifulSoup(response, 'html.parser')
@ -156,6 +156,7 @@ def doujinshi_parser(id_):
doujinshi['img_id'] = img_id.group(1)
doujinshi['ext'] = ext
pages = 0
for _ in doujinshi_info.find_all('div', class_='tag-container field-name'):
if re.search('Pages:', _.text):
pages = _.find('span', class_='name').string
@ -177,15 +178,38 @@ def doujinshi_parser(id_):
return doujinshi
def legacy_search_parser(keyword, sorting='date', page=1, is_page_all=False):
logger.warning('Using legacy searching method, `--all` options will not be supported')
logger.debug('Searching doujinshis of keyword {0}'.format(keyword))
response = request('get', url=constant.LEGACY_SEARCH_URL,
params={'q': keyword, 'page': page, 'sort': sorting}).content
def legacy_search_parser(keyword, sorting, page, is_page_all=False):
logger.debug(f'Searching doujinshis of keyword {keyword}')
response = None
result = []
if is_page_all and len(page) != 1:
# `--page-all` option will override the `--page` option
page = [1]
for p in page:
logger.debug(f'Fetching page {p} ...')
response = request('get', url=constant.LEGACY_SEARCH_URL,
params={'q': keyword, 'page': p, 'sort': sorting}).content
result.extend(_get_title_and_id(response))
result = _get_title_and_id(response)
if not result:
logger.warning('Not found anything of keyword {}'.format(keyword))
logger.warning(f'Not found anything of keyword {keyword} on page {page[0]}')
return result
if is_page_all:
html = BeautifulSoup(response, 'lxml')
pagination = html.find(attrs={'class': 'pagination'})
next_page = pagination.find(attrs={'class': 'next'})
if next_page is None:
logger.warning('Reached the last page')
return result
else:
next_page = re.findall('page=([0-9]+)', next_page.attrs['href'])[0]
result.extend(legacy_search_parser(keyword, sorting, [next_page], is_page_all))
return result
return result
@ -195,12 +219,11 @@ def print_doujinshi(doujinshi_list):
return
doujinshi_list = [(i['id'], i['title']) for i in doujinshi_list]
headers = ['id', 'doujinshi']
logger.info('Search Result || Found %i doujinshis \n' % doujinshi_list.__len__() +
tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
logger.info(f'Search Result || Found {doujinshi_list.__len__()} doujinshis')
print(tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
def search_parser(keyword, sorting, page, is_page_all=False):
# keyword = '+'.join([i.strip().replace(' ', '-').lower() for i in keyword.split(',')])
result = []
response = None
if not page:
@ -211,12 +234,12 @@ def search_parser(keyword, sorting, page, is_page_all=False):
init_response = request('get', url.replace('%2B', '+')).json()
page = range(1, init_response['num_pages']+1)
total = '/{0}'.format(page[-1]) if is_page_all else ''
total = f'/{page[-1]}' if is_page_all else ''
not_exists_persist = False
for p in page:
i = 0
logger.info('Searching doujinshis using keywords "{0}" on page {1}{2}'.format(keyword, p, total))
logger.info(f'Searching doujinshis using keywords "{keyword}" on page {p}{total}')
while i < 3:
try:
url = request('get', url=constant.SEARCH_URL, params={'query': keyword,
@ -228,7 +251,7 @@ def search_parser(keyword, sorting, page, is_page_all=False):
break
if response is None or 'result' not in response:
logger.warning('No result in response in page {}'.format(p))
logger.warning(f'No result in response in page {p}')
if not_exists_persist is True:
break
continue
@ -240,20 +263,20 @@ def search_parser(keyword, sorting, page, is_page_all=False):
not_exists_persist = False
if not result:
logger.warning('No results for keywords {}'.format(keyword))
logger.warning(f'No results for keywords {keyword}')
return result
def __api_suspended_doujinshi_parser(id_):
if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
raise Exception('Doujinshi id({0}) is not valid'.format(id_))
raise Exception(f'Doujinshi id({id_}) is not valid')
id_ = int(id_)
logger.log(15, 'Fetching information of doujinshi id {0}'.format(id_))
logger.info(f'Fetching information of doujinshi id {id_}')
doujinshi = dict()
doujinshi['id'] = id_
url = '{0}/{1}'.format(constant.DETAIL_URL, id_)
url = f'{constant.DETAIL_URL}/{id_}'
i = 0
while 5 > i:
try:

View File

@ -2,10 +2,10 @@
import json
import os
from xml.sax.saxutils import escape
from nhentai.constant import LANGUAGEISO
from nhentai.constant import LANGUAGE_ISO
def serialize_json(doujinshi, dir):
def serialize_json(doujinshi, output_dir):
metadata = {'title': doujinshi.name,
'subtitle': doujinshi.info.subtitle}
if doujinshi.info.date:
@ -26,13 +26,13 @@ def serialize_json(doujinshi, dir):
metadata['URL'] = doujinshi.url
metadata['Pages'] = doujinshi.pages
with open(os.path.join(dir, 'metadata.json'), 'w') as f:
with open(os.path.join(output_dir, 'metadata.json'), 'w') as f:
json.dump(metadata, f, separators=(',', ':'))
def serialize_comic_xml(doujinshi, dir):
def serialize_comic_xml(doujinshi, output_dir):
from iso8601 import parse_date
with open(os.path.join(dir, 'ComicInfo.xml'), 'w', encoding="utf-8") as f:
with open(os.path.join(output_dir, 'ComicInfo.xml'), 'w', encoding="utf-8") as f:
f.write('<?xml version="1.0" encoding="utf-8"?>\n')
f.write('<ComicInfo xmlns:xsd="http://www.w3.org/2001/XMLSchema" '
'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\n')
@ -67,14 +67,14 @@ def serialize_comic_xml(doujinshi, dir):
if doujinshi.info.languages:
languages = [i.strip() for i in doujinshi.info.languages.split(',')]
xml_write_simple_tag(f, 'Translated', 'Yes' if 'translated' in languages else 'No')
[xml_write_simple_tag(f, 'LanguageISO', LANGUAGEISO[i]) for i in languages
if (i != 'translated' and i in LANGUAGEISO)]
[xml_write_simple_tag(f, 'LanguageISO', LANGUAGE_ISO[i]) for i in languages
if (i != 'translated' and i in LANGUAGE_ISO)]
f.write('</ComicInfo>')
def xml_write_simple_tag(f, name, val, indent=1):
f.write('{}<{}>{}</{}>\n'.format(' ' * indent, name, escape(str(val)), name))
f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n')
def merge_json():

View File

@ -36,11 +36,11 @@ def check_cookie():
logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
exit(-1)
username = re.findall('"/users/\d+/(.*?)"', response.text)
username = re.findall('"/users/[0-9]+/(.*?)"', response.text)
if not username:
logger.warning('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
else:
logger.info('Login successfully! Your username: {}'.format(username[0]))
logger.log(16, f'Login successfully! Your username: {username[0]}')
class _Singleton(type):
@ -82,11 +82,11 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
doujinshi_dir = '.'
if not os.path.exists(doujinshi_dir):
logger.warning('Path \'{0}\' does not exist, creating.'.format(doujinshi_dir))
logger.warning(f'Path "{doujinshi_dir}" does not exist, creating.')
try:
os.makedirs(doujinshi_dir)
except EnvironmentError as e:
logger.critical('{0}'.format(str(e)))
logger.critical(e)
file_list = os.listdir(doujinshi_dir)
file_list.sort()
@ -94,18 +94,15 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
for image in file_list:
if not os.path.splitext(image)[1] in ('.jpg', '.png'):
continue
image_html += f'<img src="{image}" class="image-item"/>\n'
image_html += '<img src="{0}" class="image-item"/>\n' \
.format(image)
html = readfile('viewer/{}/index.html'.format(template))
css = readfile('viewer/{}/styles.css'.format(template))
js = readfile('viewer/{}/scripts.js'.format(template))
html = readfile(f'viewer/{template}/index.html')
css = readfile(f'viewer/{template}/styles.css')
js = readfile(f'viewer/{template}/scripts.js')
if doujinshi_obj is not None:
serialize_json(doujinshi_obj, doujinshi_dir)
name = doujinshi_obj.name
if sys.version_info < (3, 0):
name = doujinshi_obj.name.encode('utf-8')
else:
name = {'title': 'nHentai HTML Viewer'}
@ -118,14 +115,14 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f:
f.write(data.encode('utf-8'))
logger.log(15, 'HTML Viewer has been written to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html')))
logger.log(16, f'HTML Viewer has been written to "{os.path.join(doujinshi_dir, "index.html")}"')
except Exception as e:
logger.warning('Writing HTML Viewer failed ({})'.format(str(e)))
logger.warning(f'Writing HTML Viewer failed ({e})')
def generate_main_html(output_dir='./'):
"""
Generate a main html to show all the contain doujinshi.
Generate a main html to show all the contains doujinshi.
With a link to their `index.html`.
Default output folder will be the CLI path.
"""
@ -154,7 +151,7 @@ def generate_main_html(output_dir='./'):
files.sort()
if 'index.html' in files:
logger.info('Add doujinshi \'{}\''.format(folder))
logger.info(f'Add doujinshi "{folder}"')
else:
continue
@ -178,10 +175,9 @@ def generate_main_html(output_dir='./'):
f.write(data.encode('utf-8'))
shutil.copy(os.path.dirname(__file__) + '/viewer/logo.png', './')
set_js_database()
logger.log(
15, 'Main Viewer has been written to \'{0}main.html\''.format(output_dir))
logger.log(16, f'Main Viewer has been written to "{output_dir}main.html"')
except Exception as e:
logger.warning('Writing Main Viewer failed ({})'.format(str(e)))
logger.warning(f'Writing Main Viewer failed ({e})')
def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True):
@ -189,7 +185,7 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
if write_comic_info:
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), '{}.cbz'.format(doujinshi_obj.filename))
cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), f'{doujinshi_obj.filename}.cbz')
else:
cbz_filename = './doujinshi.cbz'
doujinshi_dir = '.'
@ -197,7 +193,7 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_
file_list = os.listdir(doujinshi_dir)
file_list.sort()
logger.info('Writing CBZ file to path: {}'.format(cbz_filename))
logger.info(f'Writing CBZ file to path: {cbz_filename}')
with zipfile.ZipFile(cbz_filename, 'w') as cbz_pf:
for image in file_list:
image_path = os.path.join(doujinshi_dir, image)
@ -206,7 +202,7 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_
if rm_origin_dir:
shutil.rmtree(doujinshi_dir, ignore_errors=True)
logger.log(15, 'Comic Book CBZ file has been written to \'{0}\''.format(doujinshi_dir))
logger.log(16, f'Comic Book CBZ file has been written to "{doujinshi_dir}"')
def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
@ -218,7 +214,7 @@ def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
pdf_filename = os.path.join(
os.path.join(doujinshi_dir, '..'),
'{}.pdf'.format(doujinshi_obj.filename)
f'{doujinshi_obj.filename}.pdf'
)
else:
pdf_filename = './doujinshi.pdf'
@ -227,7 +223,7 @@ def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
file_list = os.listdir(doujinshi_dir)
file_list.sort()
logger.info('Writing PDF file to path: {}'.format(pdf_filename))
logger.info(f'Writing PDF file to path: {pdf_filename}')
with open(pdf_filename, 'wb') as pdf_f:
full_path_list = (
[os.path.join(doujinshi_dir, image) for image in file_list]
@ -237,19 +233,12 @@ def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
if rm_origin_dir:
shutil.rmtree(doujinshi_dir, ignore_errors=True)
logger.log(15, 'PDF file has been written to \'{0}\''.format(doujinshi_dir))
logger.log(16, f'PDF file has been written to "{doujinshi_dir}"')
except ImportError:
logger.error("Please install img2pdf package by using pip.")
def unicode_truncate(s, length, encoding='utf-8'):
"""https://stackoverflow.com/questions/1809531/truncating-unicode-so-it-fits-a-maximum-size-when-encoded-for-wire-transfer
"""
encoded = s.encode(encoding)[:length]
return encoded.decode(encoding, 'ignore')
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
"""
It used to be a whitelist approach allowed only alphabet and a part of symbols.
@ -323,7 +312,7 @@ def generate_metadata_file(output_dir, table, doujinshi_obj=None):
'LANGUAGE', 'TAGS', 'URL', 'PAGES']
for i in range(len(fields)):
f.write('{}: '.format(fields[i]))
f.write(f'{fields[i]}: ')
if fields[i] in special_fields:
f.write(str(table[special_fields.index(fields[i])][1]))
f.write('\n')

View File

@ -2,5 +2,5 @@ requests>=2.5.0
soupsieve
BeautifulSoup4>=4.0.0
tabulate>=0.7.5
future>=0.15.2
iso8601 >= 0.1
urllib3

View File

@ -1,6 +1,4 @@
# coding: utf-8
from __future__ import print_function, unicode_literals
import sys
import codecs
from setuptools import setup, find_packages
from nhentai import __version__, __author__, __email__
@ -11,9 +9,8 @@ with open('requirements.txt') as f:
def long_description():
with codecs.open('README.rst', 'rb') as readme:
if not sys.version_info < (3, 0, 0):
return readme.read().decode('utf-8')
with codecs.open('README.rst', 'r') as readme:
return readme.read()
setup(