Compare commits

..

13 Commits

Author SHA1 Message Date
a83c571ec4 0.5.19 2025-01-15 19:47:24 +08:00
e7ff5dab3d Merge pull request #373 from nicojust/fix-favorite-metadata-output
fix favorite_counts output in metadata
2025-01-15 12:26:24 +08:00
a166898b60 fix #374 2025-01-15 12:26:01 +08:00
ce25051fa3 fix: output favorite_counts as an int 2025-01-13 19:51:40 +01:00
41fba6b5ac fix: add missing favorite_counts in metadata file 2025-01-13 19:51:04 +01:00
8944ece4a8 use os.path.sep as path separator 2025-01-11 08:48:43 +08:00
6b4c4bdc70 0.5.18 2025-01-11 08:35:40 +08:00
d1d0c22af8 fix #349 2025-01-11 08:34:30 +08:00
803957ba88 fix #349 2025-01-11 08:33:59 +08:00
13b584a820 fix #371 and #324 2025-01-11 08:02:36 +08:00
be08fcf4cb fix #368 2025-01-11 07:54:28 +08:00
b585225308 fix #370 2025-01-11 07:52:51 +08:00
54af682848 fix #369 2025-01-11 07:50:41 +08:00
11 changed files with 57 additions and 33 deletions

View File

@ -129,7 +129,7 @@ Download your favorites with delay:
.. code-block:: bash .. code-block:: bash
nhentai --favorites --download --delay 1 nhentai --favorites --download --delay 1 --page 3-5,7
Format output doujinshi folder name: Format output doujinshi folder name:

View File

@ -1,3 +1,3 @@
__version__ = '0.5.17.2' __version__ = '0.5.19'
__author__ = 'RicterZ' __author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com' __email__ = 'ricterzheng@gmail.com'

View File

@ -11,6 +11,7 @@ from optparse import OptionParser
from nhentai import __version__ from nhentai import __version__
from nhentai.utils import generate_html, generate_main_html, DB from nhentai.utils import generate_html, generate_main_html, DB
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import PATH_SEPARATOR
def banner(): def banner():
@ -64,7 +65,8 @@ def cmd_parser():
# operation options # operation options
parser.add_option('--download', '-D', dest='is_download', action='store_true', parser.add_option('--download', '-D', dest='is_download', action='store_true',
help='download doujinshi (for search results)') help='download doujinshi (for search results)')
parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information') parser.add_option('--show', '-S', dest='is_show', action='store_true',
help='just show the doujinshi information')
# doujinshi options # doujinshi options
parser.add_option('--id', dest='id', action='callback', callback=callback, parser.add_option('--id', dest='id', action='callback', callback=callback,
@ -79,14 +81,15 @@ def cmd_parser():
# page options # page options
parser.add_option('--page-all', dest='page_all', action='store_true', default=False, parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
help='all search results') help='all search results')
parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='1', parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
help='page number of search results. e.g. 1,2-5,14') help='page number of search results. e.g. 1,2-5,14')
parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular', parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
help='sorting of doujinshi (recent / popular / popular-[today|week])', help='sorting of doujinshi (recent / popular / popular-[today|week])',
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date']) choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
# download options # download options
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='./', parser.add_option('--output', '-o', type='string', dest='output_dir', action='store',
default=f'.{PATH_SEPARATOR}',
help='output dir') help='output dir')
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
help='thread count for downloading doujinshi') help='thread count for downloading doujinshi')
@ -96,7 +99,8 @@ def cmd_parser():
help='slow down between downloading every doujinshi') help='slow down between downloading every doujinshi')
parser.add_option('--proxy', type='string', dest='proxy', action='store', parser.add_option('--proxy', type='string', dest='proxy', action='store',
help='store a proxy, for example: -p "http://127.0.0.1:1080"') help='store a proxy, for example: -p "http://127.0.0.1:1080"')
parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.') parser.add_option('--file', '-f', type='string', dest='file', action='store',
help='read gallery IDs from file.')
parser.add_option('--format', type='string', dest='name_format', action='store', parser.add_option('--format', type='string', dest='name_format', action='store',
help='format the saved folder name', default='[%i][%a][%t]') help='format the saved folder name', default='[%i][%a][%t]')
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download') parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')

View File

@ -48,7 +48,7 @@ def main():
if not options.is_download: if not options.is_download:
logger.warning('You do not specify --download option') logger.warning('You do not specify --download option')
doujinshis = favorites_parser() if options.page_all else favorites_parser(page=page_list) doujinshis = favorites_parser(page=page_list) if options.page else favorites_parser()
elif options.keyword: elif options.keyword:
if constant.CONFIG['language']: if constant.CONFIG['language']:

View File

@ -35,11 +35,16 @@ LOGIN_URL = f'{BASE_URL}/login/'
CHALLENGE_URL = f'{BASE_URL}/challenge' CHALLENGE_URL = f'{BASE_URL}/challenge'
FAV_URL = f'{BASE_URL}/favorites/' FAV_URL = f'{BASE_URL}/favorites/'
PATH_SEPARATOR = os.path.sep
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
IMAGE_URL_MIRRORS = [ IMAGE_URL_MIRRORS = [
f'{urlparse(BASE_URL).scheme}://i2.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}', f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i4.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}', f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i6.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}', f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
] ]

View File

@ -65,7 +65,7 @@ class Doujinshi(object):
['Groups', self.info.groups], ['Groups', self.info.groups],
['Languages', self.info.languages], ['Languages', self.info.languages],
['Tags', self.info.tags], ['Tags', self.info.tags],
['Favorite Counts', self.info.favorite_counts], ['Favorite Counts', self.favorite_counts],
['URL', self.url], ['URL', self.url],
['Pages', self.pages], ['Pages', self.pages],
] ]

View File

@ -47,7 +47,10 @@ class Downloader(Singleton):
for completed_task in asyncio.as_completed(tasks): for completed_task in asyncio.as_completed(tasks):
try: try:
result = await completed_task result = await completed_task
logger.info(f'{result[1]} download completed') if result[1]:
logger.info(f'{result[1]} download completed')
else:
logger.warning(f'{result[1]} download failed, return value {result[0]}')
except Exception as e: except Exception as e:
logger.error(f'An error occurred: {e}') logger.error(f'An error occurred: {e}')
@ -85,11 +88,11 @@ class Downloader(Singleton):
if not await self.save(filename, response): if not await self.save(filename, response):
logger.error(f'Can not download image {url}') logger.error(f'Can not download image {url}')
return 1, None return 1, url
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e: except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
if retried < 3: if retried < 3:
logger.info(f'Download {filename} failed, retrying({retried + 1}) times...') logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
return await self.download( return await self.download(
url=url, url=url,
folder=folder, folder=folder,
@ -98,7 +101,8 @@ class Downloader(Singleton):
proxy=proxy, proxy=proxy,
) )
else: else:
return 0, None logger.warning(f'Download {filename} failed with 3 times retried, skipped')
return 0, url
except NHentaiImageNotExistException as e: except NHentaiImageNotExistException as e:
os.remove(save_file_path) os.remove(save_file_path)
@ -110,10 +114,10 @@ class Downloader(Singleton):
logger.error(f"Exception type: {type(e)}") logger.error(f"Exception type: {type(e)}")
traceback.print_stack() traceback.print_stack()
logger.critical(str(e)) logger.critical(str(e))
return 0, None return 0, url
except KeyboardInterrupt: except KeyboardInterrupt:
return -3, None return -3, url
return 1, url return 1, url
@ -152,6 +156,7 @@ class Downloader(Singleton):
return True return True
digit_length = len(str(len(queue))) digit_length = len(str(len(queue)))
logger.info(f'Total download pages: {len(queue)}')
coroutines = [ coroutines = [
self._semaphore_download(url, filename=os.path.basename(urlparse(url).path), length=digit_length) self._semaphore_download(url, filename=os.path.basename(urlparse(url).path), length=digit_length)
for url in queue for url in queue

View File

@ -141,23 +141,26 @@ def doujinshi_parser(id_, counter=0):
title = doujinshi_info.find('h1').text title = doujinshi_info.find('h1').text
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
subtitle = doujinshi_info.find('h2') subtitle = doujinshi_info.find('h2')
favorite_counts = str(doujinshi_info.find('span', class_='nobold').find('span', class_='count')) favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')
if favorite_counts is None:
favorite_counts = '0'
doujinshi['name'] = title doujinshi['name'] = title
doujinshi['pretty_name'] = pretty_name doujinshi['pretty_name'] = pretty_name
doujinshi['subtitle'] = subtitle.text if subtitle else '' doujinshi['subtitle'] = subtitle.text if subtitle else ''
doujinshi['favorite_counts'] = favorite_counts.strip() doujinshi['favorite_counts'] = int(favorite_counts.text.strip()) if favorite_counts else 0
doujinshi_cover = html.find('div', attrs={'id': 'cover'}) doujinshi_cover = html.find('div', attrs={'id': 'cover'})
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$', # img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
doujinshi_cover.a.img.attrs['data-src']) # doujinshi_cover.a.img.attrs['data-src'])
img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
ext = [] ext = []
for i in html.find_all('div', attrs={'class': 'thumb-container'}): for i in html.find_all('div', attrs={'class': 'thumb-container'}):
_, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1) base_name = os.path.basename(i.img.attrs['data-src'])
ext.append(ext_name) ext_name = base_name.split('.')
if len(ext_name) == 3:
ext.append(ext_name[1])
else:
ext.append(ext_name[-1])
if not img_id: if not img_id:
logger.critical(f'Tried yo get image id failed of id: {id_}') logger.critical(f'Tried yo get image id failed of id: {id_}')

View File

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
import json import json
import os import os
from nhentai.constant import PATH_SEPARATOR
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from nhentai.constant import LANGUAGE_ISO from nhentai.constant import LANGUAGE_ISO
@ -82,7 +84,7 @@ def xml_write_simple_tag(f, name, val, indent=1):
def merge_json(): def merge_json():
lst = [] lst = []
output_dir = "./" output_dir = f".{PATH_SEPARATOR}"
os.chdir(output_dir) os.chdir(output_dir)
doujinshi_dirs = next(os.walk('.'))[1] doujinshi_dirs = next(os.walk('.'))[1]
for folder in doujinshi_dirs: for folder in doujinshi_dirs:

View File

@ -14,10 +14,12 @@ from typing import Tuple
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from nhentai import constant from nhentai import constant
from nhentai.constant import PATH_SEPARATOR
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
MAX_FIELD_LENGTH = 100 MAX_FIELD_LENGTH = 100
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
def request(method, url, **kwargs): def request(method, url, **kwargs):
@ -99,9 +101,9 @@ def parse_doujinshi_obj(
file_type: str = '' file_type: str = ''
) -> Tuple[str, str]: ) -> Tuple[str, str]:
filename = f'./doujinshi.{file_type}' filename = f'.{PATH_SEPARATOR}doujinshi.{file_type}'
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
if doujinshi_obj is not None: if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
_filename = f'{doujinshi_obj.filename}.{file_type}' _filename = f'{doujinshi_obj.filename}.{file_type}'
if file_type == 'cbz': if file_type == 'cbz':
@ -111,6 +113,8 @@ def parse_doujinshi_obj(
_filename = _filename.replace('/', '-') _filename = _filename.replace('/', '-')
filename = os.path.join(output_dir, _filename) filename = os.path.join(output_dir, _filename)
else:
doujinshi_dir = f'.{PATH_SEPARATOR}'
return doujinshi_dir, filename return doujinshi_dir, filename
@ -130,7 +134,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
file_list.sort() file_list.sort()
for image in file_list: for image in file_list:
if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'): if not os.path.splitext(image)[1] in EXTENSIONS:
continue continue
image_html += f'<img src="{image}" class="image-item"/>\n' image_html += f'<img src="{image}" class="image-item"/>\n'
@ -175,7 +179,7 @@ def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename))) shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
def generate_main_html(output_dir='./'): def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
""" """
Generate a main html to show all the contains doujinshi. Generate a main html to show all the contains doujinshi.
With a link to their `index.html`. With a link to their `index.html`.
@ -256,7 +260,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
import img2pdf import img2pdf
"""Write images to a PDF file using img2pdf.""" """Write images to a PDF file using img2pdf."""
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))] file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(EXTENSIONS)]
file_list.sort() file_list.sort()
logger.info(f'Writing PDF file to path: {filename}') logger.info(f'Writing PDF file to path: {filename}')
@ -308,7 +312,8 @@ def signal_handler(_signal, _frame):
def paging(page_string): def paging(page_string):
# 1,3-5,14 -> [1, 3, 4, 5, 14] # 1,3-5,14 -> [1, 3, 4, 5, 14]
if not page_string: if not page_string:
return [] # default, the first page
return [1]
page_list = [] page_list = []
for i in page_string.split(','): for i in page_string.split(','):
@ -333,7 +338,7 @@ def generate_metadata_file(output_dir, doujinshi_obj):
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR', fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES', 'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS', 'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL'] 'SERIES', 'PARODY', 'URL']
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table)) temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "nhentai" name = "nhentai"
version = "0.5.17.2" version = "0.5.19"
description = "nhentai doujinshi downloader" description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"] authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT" license = "MIT"
@ -22,4 +22,4 @@ requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.poetry.scripts] [tool.poetry.scripts]
nhentai = 'nhentai.command:main' nhentai = 'nhentai.command:main'