Compare commits

..

25 Commits

Author SHA1 Message Date
a83c571ec4 0.5.19 2025-01-15 19:47:24 +08:00
e7ff5dab3d Merge pull request #373 from nicojust/fix-favorite-metadata-output
fix favorite_counts output in metadata
2025-01-15 12:26:24 +08:00
a166898b60 fix #374 2025-01-15 12:26:01 +08:00
ce25051fa3 fix: output favorite_counts as an int 2025-01-13 19:51:40 +01:00
41fba6b5ac fix: add missing favorite_counts in metadata file 2025-01-13 19:51:04 +01:00
8944ece4a8 use os.path.sep as path separator 2025-01-11 08:48:43 +08:00
6b4c4bdc70 0.5.18 2025-01-11 08:35:40 +08:00
d1d0c22af8 fix #349 2025-01-11 08:34:30 +08:00
803957ba88 fix #349 2025-01-11 08:33:59 +08:00
13b584a820 fix #371 and #324 2025-01-11 08:02:36 +08:00
be08fcf4cb fix #368 2025-01-11 07:54:28 +08:00
b585225308 fix #370 2025-01-11 07:52:51 +08:00
54af682848 fix #369 2025-01-11 07:50:41 +08:00
d74fd103f0 remove setup.py 2025-01-08 09:35:44 +08:00
0cb2411955 Merge branch 'master' of github.com:RicterZ/nhentai 2025-01-08 09:17:01 +08:00
de08d3daaa 0.5.17.1 2025-01-07 14:26:38 +08:00
946b85ace9 tiny fix 2024-12-21 09:32:33 +08:00
5bde24f159 remove debug print 2024-12-21 09:18:34 +08:00
3cae13e76f fix #363 2024-12-18 23:37:00 +08:00
7483b8f923 workaround of #359 2024-12-11 23:58:48 +08:00
eae42c8eb5 fix #356 2024-12-11 23:57:01 +08:00
b841747761 fix #356 2024-12-11 23:47:48 +08:00
1f3528afad try to fix #361 2024-12-09 14:36:44 +08:00
bb41e502c1 0.5.17 for fix #360 2024-12-09 09:26:33 +08:00
7089144ac6 fix #360 #359 2024-12-09 09:25:40 +08:00
15 changed files with 113 additions and 147 deletions

View File

@ -5,7 +5,7 @@ COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
RUN python setup.py install
RUN pip install .
WORKDIR /output
ENTRYPOINT ["nhentai"]

View File

@ -59,7 +59,7 @@ On Gentoo Linux:
.. code-block::
layman -fa glicOne
layman -fa glibOne
sudo emerge net-misc/nhentai
On NixOS:
@ -129,7 +129,7 @@ Download your favorites with delay:
.. code-block:: bash
nhentai --favorites --download --delay 1
nhentai --favorites --download --delay 1 --page 3-5,7
Format output doujinshi folder name:

View File

@ -1,3 +1,3 @@
__version__ = '0.5.16'
__version__ = '0.5.19'
__author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com'

View File

@ -11,6 +11,7 @@ from optparse import OptionParser
from nhentai import __version__
from nhentai.utils import generate_html, generate_main_html, DB
from nhentai.logger import logger
from nhentai.constant import PATH_SEPARATOR
def banner():
@ -37,7 +38,7 @@ def write_config():
f.write(json.dumps(constant.CONFIG))
def callback(option, opt_str, value, parser):
def callback(option, _opt_str, _value, parser):
if option == '--id':
pass
value = []
@ -64,7 +65,8 @@ def cmd_parser():
# operation options
parser.add_option('--download', '-D', dest='is_download', action='store_true',
help='download doujinshi (for search results)')
parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information')
parser.add_option('--show', '-S', dest='is_show', action='store_true',
help='just show the doujinshi information')
# doujinshi options
parser.add_option('--id', dest='id', action='callback', callback=callback,
@ -79,14 +81,15 @@ def cmd_parser():
# page options
parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
help='all search results')
parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='1',
parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
help='page number of search results. e.g. 1,2-5,14')
parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
help='sorting of doujinshi (recent / popular / popular-[today|week])',
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
# download options
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='./',
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store',
default=f'.{PATH_SEPARATOR}',
help='output dir')
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
help='thread count for downloading doujinshi')
@ -96,7 +99,8 @@ def cmd_parser():
help='slow down between downloading every doujinshi')
parser.add_option('--proxy', type='string', dest='proxy', action='store',
help='store a proxy, for example: -p "http://127.0.0.1:1080"')
parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.')
parser.add_option('--file', '-f', type='string', dest='file', action='store',
help='read gallery IDs from file.')
parser.add_option('--format', type='string', dest='name_format', action='store',
help='format the saved folder name', default='[%i][%a][%t]')
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')

View File

@ -48,7 +48,7 @@ def main():
if not options.is_download:
logger.warning('You do not specify --download option')
doujinshis = favorites_parser() if options.page_all else favorites_parser(page=page_list)
doujinshis = favorites_parser(page=page_list) if options.page else favorites_parser()
elif options.keyword:
if constant.CONFIG['language']:

View File

@ -35,11 +35,16 @@ LOGIN_URL = f'{BASE_URL}/login/'
CHALLENGE_URL = f'{BASE_URL}/challenge'
FAV_URL = f'{BASE_URL}/favorites/'
PATH_SEPARATOR = os.path.sep
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
IMAGE_URL_MIRRORS = [
f'{urlparse(BASE_URL).scheme}://i2.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i4.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i6.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
]

View File

@ -57,7 +57,7 @@ class Doujinshi(object):
self.table = [
['Parodies', self.info.parodies],
['Doujinshi', self.name],
['Title', self.name],
['Subtitle', self.info.subtitle],
['Date', self.info.date],
['Characters', self.info.characters],
@ -65,7 +65,7 @@ class Doujinshi(object):
['Groups', self.info.groups],
['Languages', self.info.languages],
['Tags', self.info.tags],
['Favorite Counts', self.info.favorite_counts],
['Favorite Counts', self.favorite_counts],
['URL', self.url],
['Pages', self.pages],
]

View File

@ -13,6 +13,7 @@ from nhentai.utils import Singleton, async_request
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class NHentaiImageNotExistException(Exception):
pass
@ -32,35 +33,40 @@ def download_callback(result):
logger.log(16, f'{data} downloaded successfully')
class Downloader(Singleton):
def __init__(self, path='', threads=5, timeout=30, delay=0):
self.threads = threads
self.path = str(path)
self.timeout = timeout
self.delay = delay
self.folder = None
self.semaphore = None
async def fiber(self, tasks):
self.semaphore = asyncio.Semaphore(self.threads)
for completed_task in asyncio.as_completed(tasks):
try:
result = await completed_task
logger.info(f'{result[1]} download completed')
if result[1]:
logger.info(f'{result[1]} download completed')
else:
logger.warning(f'{result[1]} download failed, return value {result[0]}')
except Exception as e:
logger.error(f'An error occurred: {e}')
async def _semaphore_download(self, *args, **kwargs):
async with self.semaphore:
return await self.download(*args, **kwargs)
async def download(self, url, folder='', filename='', retried=0, proxy=None):
async def download(self, url, folder='', filename='', retried=0, proxy=None, length=0):
logger.info(f'Starting to download {url} ...')
if self.delay:
await asyncio.sleep(self.delay)
filename = filename if filename else os.path.basename(urlparse(url).path)
base_filename, extension = os.path.splitext(filename)
filename = base_filename.zfill(length) + extension
save_file_path = os.path.join(self.folder, filename)
@ -82,11 +88,11 @@ class Downloader(Singleton):
if not await self.save(filename, response):
logger.error(f'Can not download image {url}')
return 1, None
return 1, url
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
if retried < 3:
logger.info(f'Download {filename} failed, retrying({retried + 1}) times...')
logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
return await self.download(
url=url,
folder=folder,
@ -95,7 +101,8 @@ class Downloader(Singleton):
proxy=proxy,
)
else:
return 0, None
logger.warning(f'Download {filename} failed with 3 times retried, skipped')
return 0, url
except NHentaiImageNotExistException as e:
os.remove(save_file_path)
@ -107,10 +114,10 @@ class Downloader(Singleton):
logger.error(f"Exception type: {type(e)}")
traceback.print_stack()
logger.critical(str(e))
return 0, None
return 0, url
except KeyboardInterrupt:
return -3, None
return -3, url
return 1, url
@ -129,7 +136,6 @@ class Downloader(Singleton):
f.write(chunk)
return True
def start_download(self, queue, folder='') -> bool:
if not isinstance(folder, (str,)):
folder = str(folder)
@ -149,9 +155,10 @@ class Downloader(Singleton):
# Assuming we want to continue with rest of process.
return True
digit_length = len(str(len(queue)))
logger.info(f'Total download pages: {len(queue)}')
coroutines = [
self._semaphore_download(url, filename=os.path.basename(urlparse(url).path))
self._semaphore_download(url, filename=os.path.basename(urlparse(url).path), length=digit_length)
for url in queue
]

View File

@ -135,37 +135,36 @@ def doujinshi_parser(id_, counter=0):
logger.warning(f'Error: {e}, ignored')
return None
# print(response)
html = BeautifulSoup(response, 'html.parser')
doujinshi_info = html.find('div', attrs={'id': 'info'})
title = doujinshi_info.find('h1').text
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
subtitle = doujinshi_info.find('h2')
favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')\
if favorite_counts:
favorite_counts = favorite_counts.text.strip()
else:
favorite_counts = 0
favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')
doujinshi['name'] = title
doujinshi['pretty_name'] = pretty_name
doujinshi['subtitle'] = subtitle.text if subtitle else ''
doujinshi['favorite_counts'] = favorite_counts
doujinshi['favorite_counts'] = int(favorite_counts.text.strip()) if favorite_counts else 0
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
doujinshi_cover.a.img.attrs['data-src'])
# img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
# doujinshi_cover.a.img.attrs['data-src'])
img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
ext = []
for i in html.find_all('div', attrs={'class': 'thumb-container'}):
_, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1)
ext.append(ext_name)
base_name = os.path.basename(i.img.attrs['data-src'])
ext_name = base_name.split('.')
if len(ext_name) == 3:
ext.append(ext_name[1])
else:
ext.append(ext_name[-1])
if not img_id:
logger.critical('Tried yo get image id failed')
sys.exit(1)
logger.critical(f'Tried yo get image id failed of id: {id_}')
return None
doujinshi['img_id'] = img_id.group(1)
doujinshi['ext'] = ext
@ -192,53 +191,6 @@ def doujinshi_parser(id_, counter=0):
return doujinshi
def legacy_doujinshi_parser(id_):
if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
raise Exception(f'Doujinshi id({id_}) is not valid')
id_ = int(id_)
logger.info(f'Fetching information of doujinshi id {id_}')
doujinshi = dict()
doujinshi['id'] = id_
url = f'{constant.DETAIL_URL}/{id_}'
i = 0
while 5 > i:
try:
response = request('get', url).json()
except Exception as e:
i += 1
if not i < 5:
logger.critical(str(e))
sys.exit(1)
continue
break
doujinshi['name'] = response['title']['english']
doujinshi['subtitle'] = response['title']['japanese']
doujinshi['img_id'] = response['media_id']
doujinshi['ext'] = ''.join([i['t'] for i in response['images']['pages']])
doujinshi['pages'] = len(response['images']['pages'])
# gain information of the doujinshi
needed_fields = ['character', 'artist', 'language', 'tag', 'parody', 'group', 'category']
for tag in response['tags']:
tag_type = tag['type']
if tag_type in needed_fields:
if tag_type == 'tag':
if tag_type not in doujinshi:
doujinshi[tag_type] = {}
tag['name'] = tag['name'].replace(' ', '-')
tag['name'] = tag['name'].lower()
doujinshi[tag_type][tag['name']] = tag['id']
elif tag_type not in doujinshi:
doujinshi[tag_type] = tag['name']
else:
doujinshi[tag_type] += ', ' + tag['name']
return doujinshi
def print_doujinshi(doujinshi_list):
if not doujinshi_list:
return

View File

@ -1,6 +1,8 @@
# coding: utf-8
import json
import os
from nhentai.constant import PATH_SEPARATOR
from xml.sax.saxutils import escape
from nhentai.constant import LANGUAGE_ISO
@ -82,7 +84,7 @@ def xml_write_simple_tag(f, name, val, indent=1):
def merge_json():
lst = []
output_dir = "./"
output_dir = f".{PATH_SEPARATOR}"
os.chdir(output_dir)
doujinshi_dirs = next(os.walk('.'))[1]
for folder in doujinshi_dirs:

View File

@ -5,19 +5,21 @@ import re
import os
import zipfile
import shutil
import copy
import httpx
import requests
import sqlite3
import urllib.parse
from typing import Optional, Tuple
from typing import Tuple
from requests.structures import CaseInsensitiveDict
from nhentai import constant
from nhentai.constant import PATH_SEPARATOR
from nhentai.logger import logger
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
MAX_FIELD_LENGTH = 100
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
def request(method, url, **kwargs):
@ -99,9 +101,9 @@ def parse_doujinshi_obj(
file_type: str = ''
) -> Tuple[str, str]:
filename = f'./doujinshi.{file_type}'
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
filename = f'.{PATH_SEPARATOR}doujinshi.{file_type}'
if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
_filename = f'{doujinshi_obj.filename}.{file_type}'
if file_type == 'cbz':
@ -111,6 +113,8 @@ def parse_doujinshi_obj(
_filename = _filename.replace('/', '-')
filename = os.path.join(output_dir, _filename)
else:
doujinshi_dir = f'.{PATH_SEPARATOR}'
return doujinshi_dir, filename
@ -130,7 +134,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
file_list.sort()
for image in file_list:
if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'):
if not os.path.splitext(image)[1] in EXTENSIONS:
continue
image_html += f'<img src="{image}" class="image-item"/>\n'
@ -175,7 +179,7 @@ def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
def generate_main_html(output_dir='./'):
def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
"""
Generate a main html to show all the contains doujinshi.
With a link to their `index.html`.
@ -256,7 +260,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
import img2pdf
"""Write images to a PDF file using img2pdf."""
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))]
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(EXTENSIONS)]
file_list.sort()
logger.info(f'Writing PDF file to path: {filename}')
@ -277,7 +281,7 @@ def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
It used to be a whitelist approach allowed only alphabet and a part of symbols.
but most doujinshi's names include Japanese 2-byte characters and these was rejected.
so it is using blacklist approach now.
if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' ').
if filename include forbidden characters (\'/:,;*?"<>|) ,it replaces space character(" ").
"""
# maybe you can use `--format` to select a suitable filename
@ -300,7 +304,7 @@ def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
return filename
def signal_handler(signal, frame):
def signal_handler(_signal, _frame):
logger.error('Ctrl-C signal received. Stopping...')
sys.exit(1)
@ -308,7 +312,8 @@ def signal_handler(signal, frame):
def paging(page_string):
# 1,3-5,14 -> [1, 3, 4, 5, 14]
if not page_string:
return []
# default, the first page
return [1]
page_list = []
for i in page_string.split(','):
@ -333,16 +338,16 @@ def generate_metadata_file(output_dir, doujinshi_obj):
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'DATE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
'LANGUAGE', 'TAGS', 'URL', 'PAGES']
for i in range(len(fields)):
f.write(f'{fields[i]}: ')
if fields[i] in special_fields:
f.write(str(doujinshi_obj.table[special_fields.index(fields[i])][1]))
f.write('\n')
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
for i in fields:
v = temp_dict.get(i)
v = temp_dict.get(f'{i}s') if v is None else v
v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
f.write(f'{i}: {v}\n')
f.close()
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "nhentai"
version = "0.5.15"
version = "0.5.19"
description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT"
@ -20,3 +20,6 @@ httpx = "0.27.2"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.scripts]
nhentai = 'nhentai.command:main'

29
qodana.yaml Executable file
View File

@ -0,0 +1,29 @@
#-------------------------------------------------------------------------------#
# Qodana analysis is configured by qodana.yaml file #
# https://www.jetbrains.com/help/qodana/qodana-yaml.html #
#-------------------------------------------------------------------------------#
version: "1.0"
#Specify inspection profile for code analysis
profile:
name: qodana.starter
#Enable inspections
#include:
# - name: <SomeEnabledInspectionId>
#Disable inspections
#exclude:
# - name: <SomeDisabledInspectionId>
# paths:
# - <path/where/not/run/inspection>
#Execute shell command before Qodana execution (Applied in CI/CD pipeline)
#bootstrap: sh ./prepare-qodana.sh
#Install IDE plugins before Qodana execution (Applied in CI/CD pipeline)
#plugins:
# - id: <plugin.id> #(plugin id can be found at https://plugins.jetbrains.com)
#Specify Qodana linter for analysis (Applied in CI/CD pipeline)
linter: jetbrains/qodana-python:2024.3

View File

@ -1,3 +0,0 @@
[metadata]
description_file = README.rst

View File

@ -1,38 +0,0 @@
# coding: utf-8
import codecs
from setuptools import setup, find_packages
from nhentai import __version__, __author__, __email__
with open('requirements.txt') as f:
requirements = [l for l in f.read().splitlines() if l]
def long_description():
with codecs.open('README.rst', 'rb') as readme:
return readme.read().decode('utf-8')
setup(
name='nhentai',
version=__version__,
packages=find_packages(),
author=__author__,
author_email=__email__,
keywords=['nhentai', 'doujinshi', 'downloader'],
description='nhentai.net doujinshis downloader',
long_description=long_description(),
url='https://github.com/RicterZ/nhentai',
download_url='https://github.com/RicterZ/nhentai/tarball/master',
include_package_data=True,
zip_safe=False,
install_requires=requirements,
entry_points={
'console_scripts': [
'nhentai = nhentai.command:main',
]
},
license='MIT',
)