mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-07-01 16:09:28 +02:00
Compare commits
13 Commits
Author | SHA1 | Date | |
---|---|---|---|
a83c571ec4 | |||
e7ff5dab3d | |||
a166898b60 | |||
ce25051fa3 | |||
41fba6b5ac | |||
8944ece4a8 | |||
6b4c4bdc70 | |||
d1d0c22af8 | |||
803957ba88 | |||
13b584a820 | |||
be08fcf4cb | |||
b585225308 | |||
54af682848 |
@ -129,7 +129,7 @@ Download your favorites with delay:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
nhentai --favorites --download --delay 1
|
||||
nhentai --favorites --download --delay 1 --page 3-5,7
|
||||
|
||||
Format output doujinshi folder name:
|
||||
|
||||
|
@ -1,3 +1,3 @@
|
||||
__version__ = '0.5.17.2'
|
||||
__version__ = '0.5.19'
|
||||
__author__ = 'RicterZ'
|
||||
__email__ = 'ricterzheng@gmail.com'
|
||||
|
@ -11,6 +11,7 @@ from optparse import OptionParser
|
||||
from nhentai import __version__
|
||||
from nhentai.utils import generate_html, generate_main_html, DB
|
||||
from nhentai.logger import logger
|
||||
from nhentai.constant import PATH_SEPARATOR
|
||||
|
||||
|
||||
def banner():
|
||||
@ -64,7 +65,8 @@ def cmd_parser():
|
||||
# operation options
|
||||
parser.add_option('--download', '-D', dest='is_download', action='store_true',
|
||||
help='download doujinshi (for search results)')
|
||||
parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information')
|
||||
parser.add_option('--show', '-S', dest='is_show', action='store_true',
|
||||
help='just show the doujinshi information')
|
||||
|
||||
# doujinshi options
|
||||
parser.add_option('--id', dest='id', action='callback', callback=callback,
|
||||
@ -79,14 +81,15 @@ def cmd_parser():
|
||||
# page options
|
||||
parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
|
||||
help='all search results')
|
||||
parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='1',
|
||||
parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
|
||||
help='page number of search results. e.g. 1,2-5,14')
|
||||
parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
|
||||
help='sorting of doujinshi (recent / popular / popular-[today|week])',
|
||||
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
|
||||
|
||||
# download options
|
||||
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='./',
|
||||
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store',
|
||||
default=f'.{PATH_SEPARATOR}',
|
||||
help='output dir')
|
||||
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
|
||||
help='thread count for downloading doujinshi')
|
||||
@ -96,7 +99,8 @@ def cmd_parser():
|
||||
help='slow down between downloading every doujinshi')
|
||||
parser.add_option('--proxy', type='string', dest='proxy', action='store',
|
||||
help='store a proxy, for example: -p "http://127.0.0.1:1080"')
|
||||
parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.')
|
||||
parser.add_option('--file', '-f', type='string', dest='file', action='store',
|
||||
help='read gallery IDs from file.')
|
||||
parser.add_option('--format', type='string', dest='name_format', action='store',
|
||||
help='format the saved folder name', default='[%i][%a][%t]')
|
||||
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
|
||||
|
@ -48,7 +48,7 @@ def main():
|
||||
if not options.is_download:
|
||||
logger.warning('You do not specify --download option')
|
||||
|
||||
doujinshis = favorites_parser() if options.page_all else favorites_parser(page=page_list)
|
||||
doujinshis = favorites_parser(page=page_list) if options.page else favorites_parser()
|
||||
|
||||
elif options.keyword:
|
||||
if constant.CONFIG['language']:
|
||||
|
@ -35,11 +35,16 @@ LOGIN_URL = f'{BASE_URL}/login/'
|
||||
CHALLENGE_URL = f'{BASE_URL}/challenge'
|
||||
FAV_URL = f'{BASE_URL}/favorites/'
|
||||
|
||||
PATH_SEPARATOR = os.path.sep
|
||||
|
||||
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
|
||||
|
||||
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
|
||||
IMAGE_URL_MIRRORS = [
|
||||
f'{urlparse(BASE_URL).scheme}://i2.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i4.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i6.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
|
||||
]
|
||||
|
||||
|
@ -65,7 +65,7 @@ class Doujinshi(object):
|
||||
['Groups', self.info.groups],
|
||||
['Languages', self.info.languages],
|
||||
['Tags', self.info.tags],
|
||||
['Favorite Counts', self.info.favorite_counts],
|
||||
['Favorite Counts', self.favorite_counts],
|
||||
['URL', self.url],
|
||||
['Pages', self.pages],
|
||||
]
|
||||
|
@ -47,7 +47,10 @@ class Downloader(Singleton):
|
||||
for completed_task in asyncio.as_completed(tasks):
|
||||
try:
|
||||
result = await completed_task
|
||||
logger.info(f'{result[1]} download completed')
|
||||
if result[1]:
|
||||
logger.info(f'{result[1]} download completed')
|
||||
else:
|
||||
logger.warning(f'{result[1]} download failed, return value {result[0]}')
|
||||
except Exception as e:
|
||||
logger.error(f'An error occurred: {e}')
|
||||
|
||||
@ -85,11 +88,11 @@ class Downloader(Singleton):
|
||||
|
||||
if not await self.save(filename, response):
|
||||
logger.error(f'Can not download image {url}')
|
||||
return 1, None
|
||||
return 1, url
|
||||
|
||||
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
|
||||
if retried < 3:
|
||||
logger.info(f'Download {filename} failed, retrying({retried + 1}) times...')
|
||||
logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
|
||||
return await self.download(
|
||||
url=url,
|
||||
folder=folder,
|
||||
@ -98,7 +101,8 @@ class Downloader(Singleton):
|
||||
proxy=proxy,
|
||||
)
|
||||
else:
|
||||
return 0, None
|
||||
logger.warning(f'Download {filename} failed with 3 times retried, skipped')
|
||||
return 0, url
|
||||
|
||||
except NHentaiImageNotExistException as e:
|
||||
os.remove(save_file_path)
|
||||
@ -110,10 +114,10 @@ class Downloader(Singleton):
|
||||
logger.error(f"Exception type: {type(e)}")
|
||||
traceback.print_stack()
|
||||
logger.critical(str(e))
|
||||
return 0, None
|
||||
return 0, url
|
||||
|
||||
except KeyboardInterrupt:
|
||||
return -3, None
|
||||
return -3, url
|
||||
|
||||
return 1, url
|
||||
|
||||
@ -152,6 +156,7 @@ class Downloader(Singleton):
|
||||
return True
|
||||
|
||||
digit_length = len(str(len(queue)))
|
||||
logger.info(f'Total download pages: {len(queue)}')
|
||||
coroutines = [
|
||||
self._semaphore_download(url, filename=os.path.basename(urlparse(url).path), length=digit_length)
|
||||
for url in queue
|
||||
|
@ -141,23 +141,26 @@ def doujinshi_parser(id_, counter=0):
|
||||
title = doujinshi_info.find('h1').text
|
||||
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
|
||||
subtitle = doujinshi_info.find('h2')
|
||||
favorite_counts = str(doujinshi_info.find('span', class_='nobold').find('span', class_='count'))
|
||||
if favorite_counts is None:
|
||||
favorite_counts = '0'
|
||||
favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')
|
||||
|
||||
doujinshi['name'] = title
|
||||
doujinshi['pretty_name'] = pretty_name
|
||||
doujinshi['subtitle'] = subtitle.text if subtitle else ''
|
||||
doujinshi['favorite_counts'] = favorite_counts.strip()
|
||||
doujinshi['favorite_counts'] = int(favorite_counts.text.strip()) if favorite_counts else 0
|
||||
|
||||
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
|
||||
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
|
||||
doujinshi_cover.a.img.attrs['data-src'])
|
||||
# img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
|
||||
# doujinshi_cover.a.img.attrs['data-src'])
|
||||
img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
|
||||
|
||||
ext = []
|
||||
for i in html.find_all('div', attrs={'class': 'thumb-container'}):
|
||||
_, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1)
|
||||
ext.append(ext_name)
|
||||
base_name = os.path.basename(i.img.attrs['data-src'])
|
||||
ext_name = base_name.split('.')
|
||||
if len(ext_name) == 3:
|
||||
ext.append(ext_name[1])
|
||||
else:
|
||||
ext.append(ext_name[-1])
|
||||
|
||||
if not img_id:
|
||||
logger.critical(f'Tried yo get image id failed of id: {id_}')
|
||||
|
@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
import json
|
||||
import os
|
||||
|
||||
from nhentai.constant import PATH_SEPARATOR
|
||||
from xml.sax.saxutils import escape
|
||||
from nhentai.constant import LANGUAGE_ISO
|
||||
|
||||
@ -82,7 +84,7 @@ def xml_write_simple_tag(f, name, val, indent=1):
|
||||
|
||||
def merge_json():
|
||||
lst = []
|
||||
output_dir = "./"
|
||||
output_dir = f".{PATH_SEPARATOR}"
|
||||
os.chdir(output_dir)
|
||||
doujinshi_dirs = next(os.walk('.'))[1]
|
||||
for folder in doujinshi_dirs:
|
||||
|
@ -14,10 +14,12 @@ from typing import Tuple
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from nhentai import constant
|
||||
from nhentai.constant import PATH_SEPARATOR
|
||||
from nhentai.logger import logger
|
||||
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
|
||||
|
||||
MAX_FIELD_LENGTH = 100
|
||||
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
|
||||
|
||||
|
||||
def request(method, url, **kwargs):
|
||||
@ -99,9 +101,9 @@ def parse_doujinshi_obj(
|
||||
file_type: str = ''
|
||||
) -> Tuple[str, str]:
|
||||
|
||||
filename = f'./doujinshi.{file_type}'
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
filename = f'.{PATH_SEPARATOR}doujinshi.{file_type}'
|
||||
if doujinshi_obj is not None:
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
_filename = f'{doujinshi_obj.filename}.{file_type}'
|
||||
|
||||
if file_type == 'cbz':
|
||||
@ -111,6 +113,8 @@ def parse_doujinshi_obj(
|
||||
_filename = _filename.replace('/', '-')
|
||||
|
||||
filename = os.path.join(output_dir, _filename)
|
||||
else:
|
||||
doujinshi_dir = f'.{PATH_SEPARATOR}'
|
||||
|
||||
return doujinshi_dir, filename
|
||||
|
||||
@ -130,7 +134,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
||||
file_list.sort()
|
||||
|
||||
for image in file_list:
|
||||
if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'):
|
||||
if not os.path.splitext(image)[1] in EXTENSIONS:
|
||||
continue
|
||||
image_html += f'<img src="{image}" class="image-item"/>\n'
|
||||
|
||||
@ -175,7 +179,7 @@ def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
|
||||
shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
|
||||
|
||||
|
||||
def generate_main_html(output_dir='./'):
|
||||
def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
|
||||
"""
|
||||
Generate a main html to show all the contains doujinshi.
|
||||
With a link to their `index.html`.
|
||||
@ -256,7 +260,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
|
||||
import img2pdf
|
||||
|
||||
"""Write images to a PDF file using img2pdf."""
|
||||
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))]
|
||||
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(EXTENSIONS)]
|
||||
file_list.sort()
|
||||
|
||||
logger.info(f'Writing PDF file to path: {filename}')
|
||||
@ -308,7 +312,8 @@ def signal_handler(_signal, _frame):
|
||||
def paging(page_string):
|
||||
# 1,3-5,14 -> [1, 3, 4, 5, 14]
|
||||
if not page_string:
|
||||
return []
|
||||
# default, the first page
|
||||
return [1]
|
||||
|
||||
page_list = []
|
||||
for i in page_string.split(','):
|
||||
@ -333,7 +338,7 @@ def generate_metadata_file(output_dir, doujinshi_obj):
|
||||
|
||||
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
|
||||
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
|
||||
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
||||
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
||||
'SERIES', 'PARODY', 'URL']
|
||||
|
||||
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
|
||||
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "nhentai"
|
||||
version = "0.5.17.2"
|
||||
version = "0.5.19"
|
||||
description = "nhentai doujinshi downloader"
|
||||
authors = ["Ricter Z <ricterzheng@gmail.com>"]
|
||||
license = "MIT"
|
||||
|
Reference in New Issue
Block a user