Compare commits

...

20 Commits

Author SHA1 Message Date
d74fd103f0 remove setup.py 2025-01-08 09:35:44 +08:00
0cb2411955 Merge branch 'master' of github.com:RicterZ/nhentai 2025-01-08 09:17:01 +08:00
de08d3daaa 0.5.17.1 2025-01-07 14:26:38 +08:00
946b85ace9 tiny fix 2024-12-21 09:32:33 +08:00
5bde24f159 remove debug print 2024-12-21 09:18:34 +08:00
3cae13e76f fix #363 2024-12-18 23:37:00 +08:00
7483b8f923 workaround of #359 2024-12-11 23:58:48 +08:00
eae42c8eb5 fix #356 2024-12-11 23:57:01 +08:00
b841747761 fix #356 2024-12-11 23:47:48 +08:00
1f3528afad try to fix #361 2024-12-09 14:36:44 +08:00
bb41e502c1 0.5.17 for fix #360 2024-12-09 09:26:33 +08:00
7089144ac6 fix #360 #359 2024-12-09 09:25:40 +08:00
0a9f7c3d3e 0.5.15 fix some bugs 2024-12-04 11:04:04 +08:00
40536ad456 Merge branch 'master' of github.com:RicterZ/nhentai 2024-12-04 11:03:48 +08:00
edb571c9dd fix #358 2024-12-04 11:00:50 +08:00
b2befd3473 Merge pull request #357 from FelixJS123/favorite_metadata
add favorites count metadata
2024-12-04 10:47:32 +08:00
c2e880f172 fix asyncio proxies settings and update httpx version 2024-12-04 10:46:45 +08:00
841988bc29 Updated README 2024-11-30 22:58:54 -08:00
390948e252 add favorites count metadata 2024-11-30 22:53:45 -08:00
b9b8468bfe 0.5.14 2024-12-01 10:37:59 +08:00
15 changed files with 206 additions and 128 deletions

View File

@ -5,7 +5,7 @@ COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
RUN python setup.py install
RUN pip install .
WORKDIR /output
ENTRYPOINT ["nhentai"]

View File

@ -59,7 +59,7 @@ On Gentoo Linux:
.. code-block::
layman -fa glicOne
layman -fa glibOne
sudo emerge net-misc/nhentai
On NixOS:
@ -140,6 +140,7 @@ Format output doujinshi folder name:
Supported doujinshi folder formatter:
- %i: Doujinshi id
- %f: Doujinshi favorite count
- %t: Doujinshi name
- %s: Doujinshi subtitle (translated name)
- %a: Doujinshi authors' name

View File

@ -1,3 +1,3 @@
__version__ = '0.5.12'
__version__ = '0.5.17.2'
__author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com'

View File

@ -37,7 +37,7 @@ def write_config():
f.write(json.dumps(constant.CONFIG))
def callback(option, opt_str, value, parser):
def callback(option, _opt_str, _value, parser):
if option == '--id':
pass
value = []

View File

@ -29,11 +29,12 @@ class DoujinshiInfo(dict):
class Doujinshi(object):
def __init__(self, name=None, pretty_name=None, id=None, img_id=None,
def __init__(self, name=None, pretty_name=None, id=None, favorite_counts=0, img_id=None,
ext='', pages=0, name_format='[%i][%a][%t]', **kwargs):
self.name = name
self.pretty_name = pretty_name
self.id = id
self.favorite_counts = favorite_counts
self.img_id = img_id
self.ext = ext
self.pages = pages
@ -45,6 +46,7 @@ class Doujinshi(object):
name_format = name_format.replace('%ag', format_filename(ag_value))
name_format = name_format.replace('%i', format_filename(str(self.id)))
name_format = name_format.replace('%f', format_filename(str(self.favorite_counts)))
name_format = name_format.replace('%a', format_filename(self.info.artists))
name_format = name_format.replace('%g', format_filename(self.info.groups))
@ -55,7 +57,7 @@ class Doujinshi(object):
self.table = [
['Parodies', self.info.parodies],
['Doujinshi', self.name],
['Title', self.name],
['Subtitle', self.info.subtitle],
['Date', self.info.date],
['Characters', self.info.characters],
@ -63,6 +65,7 @@ class Doujinshi(object):
['Groups', self.info.groups],
['Languages', self.info.languages],
['Tags', self.info.tags],
['Favorite Counts', self.info.favorite_counts],
['URL', self.url],
['Pages', self.pages],
]

View File

@ -13,6 +13,7 @@ from nhentai.utils import Singleton, async_request
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class NHentaiImageNotExistException(Exception):
pass
@ -32,33 +33,37 @@ def download_callback(result):
logger.log(16, f'{data} downloaded successfully')
async def fiber(tasks):
for completed_task in asyncio.as_completed(tasks):
try:
result = await completed_task
logger.info(f'{result[1]} download completed')
except Exception as e:
logger.error(f'An error occurred: {e}')
class Downloader(Singleton):
def __init__(self, path='', threads=5, timeout=30, delay=0):
self.threads = threads
self.path = str(path)
self.timeout = timeout
self.delay = delay
self.folder = None
self.semaphore = None
async def _semaphore_download(self, semaphore, *args, **kwargs):
async with semaphore:
async def fiber(self, tasks):
self.semaphore = asyncio.Semaphore(self.threads)
for completed_task in asyncio.as_completed(tasks):
try:
result = await completed_task
logger.info(f'{result[1]} download completed')
except Exception as e:
logger.error(f'An error occurred: {e}')
async def _semaphore_download(self, *args, **kwargs):
async with self.semaphore:
return await self.download(*args, **kwargs)
async def download(self, url, folder='', filename='', retried=0, proxy=None):
async def download(self, url, folder='', filename='', retried=0, proxy=None, length=0):
logger.info(f'Starting to download {url} ...')
if self.delay:
await asyncio.sleep(self.delay)
filename = filename if filename else os.path.basename(urlparse(url).path)
base_filename, extension = os.path.splitext(filename)
filename = base_filename.zfill(length) + extension
save_file_path = os.path.join(self.folder, filename)
@ -127,7 +132,6 @@ class Downloader(Singleton):
f.write(chunk)
return True
def start_download(self, queue, folder='') -> bool:
if not isinstance(folder, (str,)):
folder = str(folder)
@ -147,14 +151,13 @@ class Downloader(Singleton):
# Assuming we want to continue with rest of process.
return True
semaphore = asyncio.Semaphore(self.threads)
digit_length = len(str(len(queue)))
coroutines = [
self._semaphore_download(semaphore, url, filename=os.path.basename(urlparse(url).path))
self._semaphore_download(url, filename=os.path.basename(urlparse(url).path), length=digit_length)
for url in queue
]
# Prevent coroutines infection
asyncio.run(fiber(coroutines))
asyncio.run(self.fiber(coroutines))
return True

View File

@ -135,17 +135,20 @@ def doujinshi_parser(id_, counter=0):
logger.warning(f'Error: {e}, ignored')
return None
# print(response)
html = BeautifulSoup(response, 'html.parser')
doujinshi_info = html.find('div', attrs={'id': 'info'})
title = doujinshi_info.find('h1').text
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
subtitle = doujinshi_info.find('h2')
favorite_counts = str(doujinshi_info.find('span', class_='nobold').find('span', class_='count'))
if favorite_counts is None:
favorite_counts = '0'
doujinshi['name'] = title
doujinshi['pretty_name'] = pretty_name
doujinshi['subtitle'] = subtitle.text if subtitle else ''
doujinshi['favorite_counts'] = favorite_counts.strip()
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
@ -157,8 +160,8 @@ def doujinshi_parser(id_, counter=0):
ext.append(ext_name)
if not img_id:
logger.critical('Tried yo get image id failed')
sys.exit(1)
logger.critical(f'Tried yo get image id failed of id: {id_}')
return None
doujinshi['img_id'] = img_id.group(1)
doujinshi['ext'] = ext
@ -185,53 +188,6 @@ def doujinshi_parser(id_, counter=0):
return doujinshi
def legacy_doujinshi_parser(id_):
if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
raise Exception(f'Doujinshi id({id_}) is not valid')
id_ = int(id_)
logger.info(f'Fetching information of doujinshi id {id_}')
doujinshi = dict()
doujinshi['id'] = id_
url = f'{constant.DETAIL_URL}/{id_}'
i = 0
while 5 > i:
try:
response = request('get', url).json()
except Exception as e:
i += 1
if not i < 5:
logger.critical(str(e))
sys.exit(1)
continue
break
doujinshi['name'] = response['title']['english']
doujinshi['subtitle'] = response['title']['japanese']
doujinshi['img_id'] = response['media_id']
doujinshi['ext'] = ''.join([i['t'] for i in response['images']['pages']])
doujinshi['pages'] = len(response['images']['pages'])
# gain information of the doujinshi
needed_fields = ['character', 'artist', 'language', 'tag', 'parody', 'group', 'category']
for tag in response['tags']:
tag_type = tag['type']
if tag_type in needed_fields:
if tag_type == 'tag':
if tag_type not in doujinshi:
doujinshi[tag_type] = {}
tag['name'] = tag['name'].replace(' ', '-')
tag['name'] = tag['name'].lower()
doujinshi[tag_type][tag['name']] = tag['id']
elif tag_type not in doujinshi:
doujinshi[tag_type] = tag['name']
else:
doujinshi[tag_type] += ', ' + tag['name']
return doujinshi
def print_doujinshi(doujinshi_list):
if not doujinshi_list:
return

View File

@ -8,6 +8,8 @@ from nhentai.constant import LANGUAGE_ISO
def serialize_json(doujinshi, output_dir):
metadata = {'title': doujinshi.name,
'subtitle': doujinshi.info.subtitle}
if doujinshi.info.favorite_counts:
metadata['favorite_counts'] = doujinshi.favorite_counts
if doujinshi.info.date:
metadata['upload_date'] = doujinshi.info.date
if doujinshi.info.parodies:
@ -44,6 +46,7 @@ def serialize_comic_xml(doujinshi, output_dir):
xml_write_simple_tag(f, 'PageCount', doujinshi.pages)
xml_write_simple_tag(f, 'URL', doujinshi.url)
xml_write_simple_tag(f, 'NhentaiId', doujinshi.id)
xml_write_simple_tag(f, 'Favorites', doujinshi.favorite_counts)
xml_write_simple_tag(f, 'Genre', doujinshi.info.categories)
xml_write_simple_tag(f, 'BlackAndWhite', 'No' if doujinshi.info.tags and

View File

@ -10,7 +10,8 @@ import httpx
import requests
import sqlite3
import urllib.parse
from typing import Optional, Tuple
from typing import Tuple
from requests.structures import CaseInsensitiveDict
from nhentai import constant
from nhentai.logger import logger
@ -46,6 +47,10 @@ async def async_request(method, url, proxies = None, **kwargs):
if proxies.get('http') == '' and proxies.get('https') == '':
proxies = None
if proxies:
_proxies = {f'{k}://': v for k, v in proxies.items() if v}
proxies = _proxies
async with httpx.AsyncClient(headers=headers, verify=False, proxies=proxies, **kwargs) as client:
response = await client.request(method, url, **kwargs)
@ -272,7 +277,7 @@ def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
It used to be a whitelist approach allowed only alphabet and a part of symbols.
but most doujinshi's names include Japanese 2-byte characters and these was rejected.
so it is using blacklist approach now.
if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' ').
if filename include forbidden characters (\'/:,;*?"<>|) ,it replaces space character(" ").
"""
# maybe you can use `--format` to select a suitable filename
@ -295,7 +300,7 @@ def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
return filename
def signal_handler(signal, frame):
def signal_handler(_signal, _frame):
logger.error('Ctrl-C signal received. Stopping...')
sys.exit(1)
@ -330,14 +335,14 @@ def generate_metadata_file(output_dir, doujinshi_obj):
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'DATE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
'LANGUAGE', 'TAGS', 'URL', 'PAGES']
for i in range(len(fields)):
f.write(f'{fields[i]}: ')
if fields[i] in special_fields:
f.write(str(doujinshi_obj.table[special_fields.index(fields[i])][1]))
f.write('\n')
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
for i in fields:
v = temp_dict.get(i)
v = temp_dict.get(f'{i}s') if v is None else v
v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
f.write(f'{i}: {v}\n')
f.close()
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')

119
poetry.lock generated
View File

@ -1,4 +1,26 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
[[package]]
name = "anyio"
version = "4.5.2"
description = "High level compatibility layer for multiple asynchronous event loop implementations"
optional = false
python-versions = ">=3.8"
files = [
{file = "anyio-4.5.2-py3-none-any.whl", hash = "sha256:c011ee36bc1e8ba40e5a81cb9df91925c218fe9b778554e0b56a21e1b5d4716f"},
{file = "anyio-4.5.2.tar.gz", hash = "sha256:23009af4ed04ce05991845451e11ef02fc7c5ed29179ac9a420e5ad0ac7ddc5b"},
]
[package.dependencies]
exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
idna = ">=2.8"
sniffio = ">=1.1"
typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
[package.extras]
doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"]
trio = ["trio (>=0.26.1)"]
[[package]]
name = "beautifulsoup4"
@ -126,6 +148,77 @@ files = [
{file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"},
]
[[package]]
name = "exceptiongroup"
version = "1.2.2"
description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
files = [
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
]
[package.extras]
test = ["pytest (>=6)"]
[[package]]
name = "h11"
version = "0.14.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
optional = false
python-versions = ">=3.7"
files = [
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
]
[[package]]
name = "httpcore"
version = "1.0.7"
description = "A minimal low-level HTTP client."
optional = false
python-versions = ">=3.8"
files = [
{file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
{file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
]
[package.dependencies]
certifi = "*"
h11 = ">=0.13,<0.15"
[package.extras]
asyncio = ["anyio (>=4.0,<5.0)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
trio = ["trio (>=0.22.0,<1.0)"]
[[package]]
name = "httpx"
version = "0.27.2"
description = "The next generation HTTP client."
optional = false
python-versions = ">=3.8"
files = [
{file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
{file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
]
[package.dependencies]
anyio = "*"
certifi = "*"
httpcore = "==1.*"
idna = "*"
sniffio = "*"
[package.extras]
brotli = ["brotli", "brotlicffi"]
cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "idna"
version = "3.7"
@ -169,6 +262,17 @@ urllib3 = ">=1.21.1,<3"
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "sniffio"
version = "1.3.1"
description = "Sniff out which async library your code is running under"
optional = false
python-versions = ">=3.7"
files = [
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
]
[[package]]
name = "soupsieve"
version = "2.4"
@ -194,6 +298,17 @@ files = [
[package.extras]
widechars = ["wcwidth"]
[[package]]
name = "typing-extensions"
version = "4.12.2"
description = "Backported and Experimental Type Hints for Python 3.8+"
optional = false
python-versions = ">=3.8"
files = [
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
]
[[package]]
name = "urllib3"
version = "1.26.19"
@ -213,4 +328,4 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.8"
content-hash = "0a1d5abd47a669c7a1f2dc7b43824a449e29ba94908a4338d2ea0f2dfb4f805e"
content-hash = "a69dbf5dcfd6dcc5afc0fd2de4ab153841f7d210d4be60c426e332e36a79d679"

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "nhentai"
version = "0.5.12"
version = "0.5.17.2"
description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT"
@ -14,8 +14,12 @@ beautifulsoup4 = "^4.11.2"
tabulate = "^0.9.0"
iso8601 = "^1.1.0"
urllib3 = "^1.26.14"
httpx = "0.27.2"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry.scripts]
nhentai = 'nhentai.command:main'

29
qodana.yaml Executable file
View File

@ -0,0 +1,29 @@
#-------------------------------------------------------------------------------#
# Qodana analysis is configured by qodana.yaml file #
# https://www.jetbrains.com/help/qodana/qodana-yaml.html #
#-------------------------------------------------------------------------------#
version: "1.0"
#Specify inspection profile for code analysis
profile:
name: qodana.starter
#Enable inspections
#include:
# - name: <SomeEnabledInspectionId>
#Disable inspections
#exclude:
# - name: <SomeDisabledInspectionId>
# paths:
# - <path/where/not/run/inspection>
#Execute shell command before Qodana execution (Applied in CI/CD pipeline)
#bootstrap: sh ./prepare-qodana.sh
#Install IDE plugins before Qodana execution (Applied in CI/CD pipeline)
#plugins:
# - id: <plugin.id> #(plugin id can be found at https://plugins.jetbrains.com)
#Specify Qodana linter for analysis (Applied in CI/CD pipeline)
linter: jetbrains/qodana-python:2024.3

View File

@ -1,4 +1,4 @@
httpx
httpx==0.27.2
requests
soupsieve
setuptools

View File

@ -1,3 +0,0 @@
[metadata]
description_file = README.rst

View File

@ -1,38 +0,0 @@
# coding: utf-8
import codecs
from setuptools import setup, find_packages
from nhentai import __version__, __author__, __email__
with open('requirements.txt') as f:
requirements = [l for l in f.read().splitlines() if l]
def long_description():
with codecs.open('README.rst', 'rb') as readme:
return readme.read().decode('utf-8')
setup(
name='nhentai',
version=__version__,
packages=find_packages(),
author=__author__,
author_email=__email__,
keywords=['nhentai', 'doujinshi', 'downloader'],
description='nhentai.net doujinshis downloader',
long_description=long_description(),
url='https://github.com/RicterZ/nhentai',
download_url='https://github.com/RicterZ/nhentai/tarball/master',
include_package_data=True,
zip_safe=False,
install_requires=requirements,
entry_points={
'console_scripts': [
'nhentai = nhentai.command:main',
]
},
license='MIT',
)