Compare commits

...

13 Commits

Author SHA1 Message Date
a83c571ec4 0.5.19 2025-01-15 19:47:24 +08:00
e7ff5dab3d Merge pull request #373 from nicojust/fix-favorite-metadata-output
fix favorite_counts output in metadata
2025-01-15 12:26:24 +08:00
a166898b60 fix #374 2025-01-15 12:26:01 +08:00
ce25051fa3 fix: output favorite_counts as an int 2025-01-13 19:51:40 +01:00
41fba6b5ac fix: add missing favorite_counts in metadata file 2025-01-13 19:51:04 +01:00
8944ece4a8 use os.path.sep as path separator 2025-01-11 08:48:43 +08:00
6b4c4bdc70 0.5.18 2025-01-11 08:35:40 +08:00
d1d0c22af8 fix #349 2025-01-11 08:34:30 +08:00
803957ba88 fix #349 2025-01-11 08:33:59 +08:00
13b584a820 fix #371 and #324 2025-01-11 08:02:36 +08:00
be08fcf4cb fix #368 2025-01-11 07:54:28 +08:00
b585225308 fix #370 2025-01-11 07:52:51 +08:00
54af682848 fix #369 2025-01-11 07:50:41 +08:00
11 changed files with 57 additions and 33 deletions

View File

@ -129,7 +129,7 @@ Download your favorites with delay:
.. code-block:: bash
nhentai --favorites --download --delay 1
nhentai --favorites --download --delay 1 --page 3-5,7
Format output doujinshi folder name:

View File

@ -1,3 +1,3 @@
__version__ = '0.5.17.2'
__version__ = '0.5.19'
__author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com'

View File

@ -11,6 +11,7 @@ from optparse import OptionParser
from nhentai import __version__
from nhentai.utils import generate_html, generate_main_html, DB
from nhentai.logger import logger
from nhentai.constant import PATH_SEPARATOR
def banner():
@ -64,7 +65,8 @@ def cmd_parser():
# operation options
parser.add_option('--download', '-D', dest='is_download', action='store_true',
help='download doujinshi (for search results)')
parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information')
parser.add_option('--show', '-S', dest='is_show', action='store_true',
help='just show the doujinshi information')
# doujinshi options
parser.add_option('--id', dest='id', action='callback', callback=callback,
@ -79,14 +81,15 @@ def cmd_parser():
# page options
parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
help='all search results')
parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='1',
parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
help='page number of search results. e.g. 1,2-5,14')
parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
help='sorting of doujinshi (recent / popular / popular-[today|week])',
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
# download options
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='./',
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store',
default=f'.{PATH_SEPARATOR}',
help='output dir')
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
help='thread count for downloading doujinshi')
@ -96,7 +99,8 @@ def cmd_parser():
help='slow down between downloading every doujinshi')
parser.add_option('--proxy', type='string', dest='proxy', action='store',
help='store a proxy, for example: -p "http://127.0.0.1:1080"')
parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.')
parser.add_option('--file', '-f', type='string', dest='file', action='store',
help='read gallery IDs from file.')
parser.add_option('--format', type='string', dest='name_format', action='store',
help='format the saved folder name', default='[%i][%a][%t]')
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')

View File

@ -48,7 +48,7 @@ def main():
if not options.is_download:
logger.warning('You do not specify --download option')
doujinshis = favorites_parser() if options.page_all else favorites_parser(page=page_list)
doujinshis = favorites_parser(page=page_list) if options.page else favorites_parser()
elif options.keyword:
if constant.CONFIG['language']:

View File

@ -35,11 +35,16 @@ LOGIN_URL = f'{BASE_URL}/login/'
CHALLENGE_URL = f'{BASE_URL}/challenge'
FAV_URL = f'{BASE_URL}/favorites/'
PATH_SEPARATOR = os.path.sep
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
IMAGE_URL_MIRRORS = [
f'{urlparse(BASE_URL).scheme}://i2.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i4.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i6.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
]

View File

@ -65,7 +65,7 @@ class Doujinshi(object):
['Groups', self.info.groups],
['Languages', self.info.languages],
['Tags', self.info.tags],
['Favorite Counts', self.info.favorite_counts],
['Favorite Counts', self.favorite_counts],
['URL', self.url],
['Pages', self.pages],
]

View File

@ -47,7 +47,10 @@ class Downloader(Singleton):
for completed_task in asyncio.as_completed(tasks):
try:
result = await completed_task
logger.info(f'{result[1]} download completed')
if result[1]:
logger.info(f'{result[1]} download completed')
else:
logger.warning(f'{result[1]} download failed, return value {result[0]}')
except Exception as e:
logger.error(f'An error occurred: {e}')
@ -85,11 +88,11 @@ class Downloader(Singleton):
if not await self.save(filename, response):
logger.error(f'Can not download image {url}')
return 1, None
return 1, url
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
if retried < 3:
logger.info(f'Download {filename} failed, retrying({retried + 1}) times...')
logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
return await self.download(
url=url,
folder=folder,
@ -98,7 +101,8 @@ class Downloader(Singleton):
proxy=proxy,
)
else:
return 0, None
logger.warning(f'Download {filename} failed with 3 times retried, skipped')
return 0, url
except NHentaiImageNotExistException as e:
os.remove(save_file_path)
@ -110,10 +114,10 @@ class Downloader(Singleton):
logger.error(f"Exception type: {type(e)}")
traceback.print_stack()
logger.critical(str(e))
return 0, None
return 0, url
except KeyboardInterrupt:
return -3, None
return -3, url
return 1, url
@ -152,6 +156,7 @@ class Downloader(Singleton):
return True
digit_length = len(str(len(queue)))
logger.info(f'Total download pages: {len(queue)}')
coroutines = [
self._semaphore_download(url, filename=os.path.basename(urlparse(url).path), length=digit_length)
for url in queue

View File

@ -141,23 +141,26 @@ def doujinshi_parser(id_, counter=0):
title = doujinshi_info.find('h1').text
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
subtitle = doujinshi_info.find('h2')
favorite_counts = str(doujinshi_info.find('span', class_='nobold').find('span', class_='count'))
if favorite_counts is None:
favorite_counts = '0'
favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')
doujinshi['name'] = title
doujinshi['pretty_name'] = pretty_name
doujinshi['subtitle'] = subtitle.text if subtitle else ''
doujinshi['favorite_counts'] = favorite_counts.strip()
doujinshi['favorite_counts'] = int(favorite_counts.text.strip()) if favorite_counts else 0
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
doujinshi_cover.a.img.attrs['data-src'])
# img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
# doujinshi_cover.a.img.attrs['data-src'])
img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
ext = []
for i in html.find_all('div', attrs={'class': 'thumb-container'}):
_, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1)
ext.append(ext_name)
base_name = os.path.basename(i.img.attrs['data-src'])
ext_name = base_name.split('.')
if len(ext_name) == 3:
ext.append(ext_name[1])
else:
ext.append(ext_name[-1])
if not img_id:
logger.critical(f'Tried yo get image id failed of id: {id_}')

View File

@ -1,6 +1,8 @@
# coding: utf-8
import json
import os
from nhentai.constant import PATH_SEPARATOR
from xml.sax.saxutils import escape
from nhentai.constant import LANGUAGE_ISO
@ -82,7 +84,7 @@ def xml_write_simple_tag(f, name, val, indent=1):
def merge_json():
lst = []
output_dir = "./"
output_dir = f".{PATH_SEPARATOR}"
os.chdir(output_dir)
doujinshi_dirs = next(os.walk('.'))[1]
for folder in doujinshi_dirs:

View File

@ -14,10 +14,12 @@ from typing import Tuple
from requests.structures import CaseInsensitiveDict
from nhentai import constant
from nhentai.constant import PATH_SEPARATOR
from nhentai.logger import logger
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
MAX_FIELD_LENGTH = 100
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
def request(method, url, **kwargs):
@ -99,9 +101,9 @@ def parse_doujinshi_obj(
file_type: str = ''
) -> Tuple[str, str]:
filename = f'./doujinshi.{file_type}'
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
filename = f'.{PATH_SEPARATOR}doujinshi.{file_type}'
if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
_filename = f'{doujinshi_obj.filename}.{file_type}'
if file_type == 'cbz':
@ -111,6 +113,8 @@ def parse_doujinshi_obj(
_filename = _filename.replace('/', '-')
filename = os.path.join(output_dir, _filename)
else:
doujinshi_dir = f'.{PATH_SEPARATOR}'
return doujinshi_dir, filename
@ -130,7 +134,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
file_list.sort()
for image in file_list:
if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'):
if not os.path.splitext(image)[1] in EXTENSIONS:
continue
image_html += f'<img src="{image}" class="image-item"/>\n'
@ -175,7 +179,7 @@ def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
def generate_main_html(output_dir='./'):
def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
"""
Generate a main html to show all the contains doujinshi.
With a link to their `index.html`.
@ -256,7 +260,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
import img2pdf
"""Write images to a PDF file using img2pdf."""
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))]
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(EXTENSIONS)]
file_list.sort()
logger.info(f'Writing PDF file to path: {filename}')
@ -308,7 +312,8 @@ def signal_handler(_signal, _frame):
def paging(page_string):
# 1,3-5,14 -> [1, 3, 4, 5, 14]
if not page_string:
return []
# default, the first page
return [1]
page_list = []
for i in page_string.split(','):
@ -333,7 +338,7 @@ def generate_metadata_file(output_dir, doujinshi_obj):
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "nhentai"
version = "0.5.17.2"
version = "0.5.19"
description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT"
@ -22,4 +22,4 @@ requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.scripts]
nhentai = 'nhentai.command:main'
nhentai = 'nhentai.command:main'