Compare commits

...

10 Commits

Author SHA1 Message Date
405d879db6 0.5.16 2024-12-08 12:32:10 +08:00
41342a6da0 fix #359 2024-12-08 12:31:58 +08:00
0a9f7c3d3e 0.5.15 fix some bugs 2024-12-04 11:04:04 +08:00
40536ad456 Merge branch 'master' of github.com:RicterZ/nhentai 2024-12-04 11:03:48 +08:00
edb571c9dd fix #358 2024-12-04 11:00:50 +08:00
b2befd3473 Merge pull request #357 from FelixJS123/favorite_metadata
add favorites count metadata
2024-12-04 10:47:32 +08:00
c2e880f172 fix asyncio proxies settings and update httpx version 2024-12-04 10:46:45 +08:00
841988bc29 Updated README 2024-11-30 22:58:54 -08:00
390948e252 add favorites count metadata 2024-11-30 22:53:45 -08:00
b9b8468bfe 0.5.14 2024-12-01 10:37:59 +08:00
10 changed files with 155 additions and 19 deletions

View File

@ -140,6 +140,7 @@ Format output doujinshi folder name:
Supported doujinshi folder formatter:
- %i: Doujinshi id
- %f: Doujinshi favorite count
- %t: Doujinshi name
- %s: Doujinshi subtitle (translated name)
- %a: Doujinshi authors' name

View File

@ -1,3 +1,3 @@
__version__ = '0.5.12'
__version__ = '0.5.16'
__author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com'

View File

@ -29,11 +29,12 @@ class DoujinshiInfo(dict):
class Doujinshi(object):
def __init__(self, name=None, pretty_name=None, id=None, img_id=None,
def __init__(self, name=None, pretty_name=None, id=None, favorite_counts=0, img_id=None,
ext='', pages=0, name_format='[%i][%a][%t]', **kwargs):
self.name = name
self.pretty_name = pretty_name
self.id = id
self.favorite_counts = favorite_counts
self.img_id = img_id
self.ext = ext
self.pages = pages
@ -45,6 +46,7 @@ class Doujinshi(object):
name_format = name_format.replace('%ag', format_filename(ag_value))
name_format = name_format.replace('%i', format_filename(str(self.id)))
name_format = name_format.replace('%f', format_filename(str(self.favorite_counts)))
name_format = name_format.replace('%a', format_filename(self.info.artists))
name_format = name_format.replace('%g', format_filename(self.info.groups))
@ -63,6 +65,7 @@ class Doujinshi(object):
['Groups', self.info.groups],
['Languages', self.info.languages],
['Tags', self.info.tags],
['Favorite Counts', self.info.favorite_counts],
['URL', self.url],
['Pages', self.pages],
]

View File

@ -32,14 +32,6 @@ def download_callback(result):
logger.log(16, f'{data} downloaded successfully')
async def fiber(tasks):
for completed_task in asyncio.as_completed(tasks):
try:
result = await completed_task
logger.info(f'{result[1]} download completed')
except Exception as e:
logger.error(f'An error occurred: {e}')
class Downloader(Singleton):
def __init__(self, path='', threads=5, timeout=30, delay=0):
@ -48,8 +40,18 @@ class Downloader(Singleton):
self.timeout = timeout
self.delay = delay
async def _semaphore_download(self, semaphore, *args, **kwargs):
async with semaphore:
async def fiber(self, tasks):
self.semaphore = asyncio.Semaphore(self.threads)
for completed_task in asyncio.as_completed(tasks):
try:
result = await completed_task
logger.info(f'{result[1]} download completed')
except Exception as e:
logger.error(f'An error occurred: {e}')
async def _semaphore_download(self, *args, **kwargs):
async with self.semaphore:
return await self.download(*args, **kwargs)
async def download(self, url, folder='', filename='', retried=0, proxy=None):
@ -147,14 +149,13 @@ class Downloader(Singleton):
# Assuming we want to continue with rest of process.
return True
semaphore = asyncio.Semaphore(self.threads)
coroutines = [
self._semaphore_download(semaphore, url, filename=os.path.basename(urlparse(url).path))
self._semaphore_download(url, filename=os.path.basename(urlparse(url).path))
for url in queue
]
# Prevent coroutines infection
asyncio.run(fiber(coroutines))
asyncio.run(self.fiber(coroutines))
return True

View File

@ -142,10 +142,17 @@ def doujinshi_parser(id_, counter=0):
title = doujinshi_info.find('h1').text
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
subtitle = doujinshi_info.find('h2')
favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')\
if favorite_counts:
favorite_counts = favorite_counts.text.strip()
else:
favorite_counts = 0
doujinshi['name'] = title
doujinshi['pretty_name'] = pretty_name
doujinshi['subtitle'] = subtitle.text if subtitle else ''
doujinshi['favorite_counts'] = favorite_counts
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',

View File

@ -8,6 +8,8 @@ from nhentai.constant import LANGUAGE_ISO
def serialize_json(doujinshi, output_dir):
metadata = {'title': doujinshi.name,
'subtitle': doujinshi.info.subtitle}
if doujinshi.info.favorite_counts:
metadata['favorite_counts'] = doujinshi.favorite_counts
if doujinshi.info.date:
metadata['upload_date'] = doujinshi.info.date
if doujinshi.info.parodies:
@ -44,6 +46,7 @@ def serialize_comic_xml(doujinshi, output_dir):
xml_write_simple_tag(f, 'PageCount', doujinshi.pages)
xml_write_simple_tag(f, 'URL', doujinshi.url)
xml_write_simple_tag(f, 'NhentaiId', doujinshi.id)
xml_write_simple_tag(f, 'Favorites', doujinshi.favorite_counts)
xml_write_simple_tag(f, 'Genre', doujinshi.info.categories)
xml_write_simple_tag(f, 'BlackAndWhite', 'No' if doujinshi.info.tags and

View File

@ -5,6 +5,7 @@ import re
import os
import zipfile
import shutil
import copy
import httpx
import requests
@ -46,6 +47,10 @@ async def async_request(method, url, proxies = None, **kwargs):
if proxies.get('http') == '' and proxies.get('https') == '':
proxies = None
if proxies:
_proxies = {f'{k}://': v for k, v in proxies.items() if v}
proxies = _proxies
async with httpx.AsyncClient(headers=headers, verify=False, proxies=proxies, **kwargs) as client:
response = await client.request(method, url, **kwargs)

119
poetry.lock generated
View File

@ -1,4 +1,26 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
[[package]]
name = "anyio"
version = "4.5.2"
description = "High level compatibility layer for multiple asynchronous event loop implementations"
optional = false
python-versions = ">=3.8"
files = [
{file = "anyio-4.5.2-py3-none-any.whl", hash = "sha256:c011ee36bc1e8ba40e5a81cb9df91925c218fe9b778554e0b56a21e1b5d4716f"},
{file = "anyio-4.5.2.tar.gz", hash = "sha256:23009af4ed04ce05991845451e11ef02fc7c5ed29179ac9a420e5ad0ac7ddc5b"},
]
[package.dependencies]
exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
idna = ">=2.8"
sniffio = ">=1.1"
typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
[package.extras]
doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"]
trio = ["trio (>=0.26.1)"]
[[package]]
name = "beautifulsoup4"
@ -126,6 +148,77 @@ files = [
{file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"},
]
[[package]]
name = "exceptiongroup"
version = "1.2.2"
description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
files = [
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
]
[package.extras]
test = ["pytest (>=6)"]
[[package]]
name = "h11"
version = "0.14.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
optional = false
python-versions = ">=3.7"
files = [
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
]
[[package]]
name = "httpcore"
version = "1.0.7"
description = "A minimal low-level HTTP client."
optional = false
python-versions = ">=3.8"
files = [
{file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
{file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
]
[package.dependencies]
certifi = "*"
h11 = ">=0.13,<0.15"
[package.extras]
asyncio = ["anyio (>=4.0,<5.0)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
trio = ["trio (>=0.22.0,<1.0)"]
[[package]]
name = "httpx"
version = "0.27.2"
description = "The next generation HTTP client."
optional = false
python-versions = ">=3.8"
files = [
{file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
{file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
]
[package.dependencies]
anyio = "*"
certifi = "*"
httpcore = "==1.*"
idna = "*"
sniffio = "*"
[package.extras]
brotli = ["brotli", "brotlicffi"]
cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "idna"
version = "3.7"
@ -169,6 +262,17 @@ urllib3 = ">=1.21.1,<3"
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "sniffio"
version = "1.3.1"
description = "Sniff out which async library your code is running under"
optional = false
python-versions = ">=3.7"
files = [
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
]
[[package]]
name = "soupsieve"
version = "2.4"
@ -194,6 +298,17 @@ files = [
[package.extras]
widechars = ["wcwidth"]
[[package]]
name = "typing-extensions"
version = "4.12.2"
description = "Backported and Experimental Type Hints for Python 3.8+"
optional = false
python-versions = ">=3.8"
files = [
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
]
[[package]]
name = "urllib3"
version = "1.26.19"
@ -213,4 +328,4 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.8"
content-hash = "0a1d5abd47a669c7a1f2dc7b43824a449e29ba94908a4338d2ea0f2dfb4f805e"
content-hash = "a69dbf5dcfd6dcc5afc0fd2de4ab153841f7d210d4be60c426e332e36a79d679"

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "nhentai"
version = "0.5.12"
version = "0.5.15"
description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT"
@ -14,6 +14,7 @@ beautifulsoup4 = "^4.11.2"
tabulate = "^0.9.0"
iso8601 = "^1.1.0"
urllib3 = "^1.26.14"
httpx = "0.27.2"
[build-system]

View File

@ -1,4 +1,4 @@
httpx
httpx==0.27.2
requests
soupsieve
setuptools