Compare commits

..

42 Commits

Author SHA1 Message Date
405d879db6 0.5.16 2024-12-08 12:32:10 +08:00
41342a6da0 fix #359 2024-12-08 12:31:58 +08:00
0a9f7c3d3e 0.5.15 fix some bugs 2024-12-04 11:04:04 +08:00
40536ad456 Merge branch 'master' of github.com:RicterZ/nhentai 2024-12-04 11:03:48 +08:00
edb571c9dd fix #358 2024-12-04 11:00:50 +08:00
b2befd3473 Merge pull request #357 from FelixJS123/favorite_metadata
add favorites count metadata
2024-12-04 10:47:32 +08:00
c2e880f172 fix asyncio proxies settings and update httpx version 2024-12-04 10:46:45 +08:00
841988bc29 Updated README 2024-11-30 22:58:54 -08:00
390948e252 add favorites count metadata 2024-11-30 22:53:45 -08:00
b9b8468bfe 0.5.14 2024-12-01 10:37:59 +08:00
3d6263cf11 Merge pull request #354 from normalizedwater546/master
asyncio: fix downloader being run sequentially + httpx: fix proxy and missing headers
2024-11-24 13:50:22 +08:00
e3410f5a9a fix: add headers, proxy to async_request 2024-11-23 13:11:25 +00:00
feb7f45533 fix: semaphore bound to different event loop 2024-11-23 12:19:36 +00:00
0754caaeb7 fix: update threads argument 2024-11-23 11:20:58 +00:00
49e5a3094a fix: recent asyncio change resulting in sequential downloads
This was due to AsyncIO completely ignoring the thread (size) argument, and not updating sleep to be non-blocking.
2024-11-23 11:17:09 +00:00
c044b64beb Merge pull request #353 from hzxjy1/master
Fix issue #7
2024-11-19 02:10:34 +08:00
f8334c09b5 Add dependence httpx 2024-11-19 01:16:51 +08:00
c90c486fb4 Add a fix fatch for downloader 2024-11-19 01:13:16 +08:00
90b17832cc Merge pull request #351 from hzxjy1/master
Use coroutine in url download
2024-11-17 10:10:54 +08:00
14c6db9cc3 Use coroutine in url download and improve the extensibility of class Downloader 2024-11-16 15:57:59 +08:00
f30ff59b2b Merge pull request #348 from JustAHumanBean/webp
add webp support
2024-11-08 16:33:21 +08:00
1504ee779f Update utils.py 2024-11-08 07:49:20 +00:00
98d9eecf6d Update parser.py 2024-11-08 07:47:50 +00:00
e16e623b9d Update doujinshi.py 2024-11-08 07:46:53 +00:00
c3f3182df3 0.5.12 2024-10-01 22:55:01 +09:00
12aad842f8 fix #347 2024-10-01 22:42:26 +09:00
f9f76ab0f5 0.5.11 2024-10-01 12:48:28 +09:00
744a9e4418 Merge branch 'master' of github.com:RicterZ/nhentai 2024-10-01 12:47:48 +09:00
c3e9fff491 fix bug #345 2024-10-01 12:47:13 +09:00
a84e2c5714 fix bug #341 2024-10-01 12:47:10 +09:00
c814c35c50 fix bug #341 2024-10-01 12:39:28 +09:00
e2f71437e2 fix setuptools warning 2024-09-22 16:37:49 +08:00
2fa45ae4df 0.5.10 2024-09-22 16:36:50 +08:00
17bc33c6cb fix arguments pass issue #344 2024-09-22 16:34:53 +08:00
09bb8460f6 fix overwrite issue #344 2024-09-22 16:32:01 +08:00
eb5b93d654 fix: pdf/cbz file already exists, but download process continues 2024-09-22 07:33:52 +00:00
cb6cf6df1a regression: pdf/cbz file already exists, but origin files are downloaded anyways.
- call download with `--cbz --rm-origin-dir`, and run command twice.
- user should pass `--regenerate` option to get back origin dir.
2024-09-22 07:24:16 +00:00
98a66a3cb0 0.5.9 2024-09-22 15:09:36 +08:00
02d47632cf fix bug of move-to-dir 2024-09-22 15:07:53 +08:00
f932b1fbbe update README: mirror setup 2024-09-22 14:45:07 +08:00
fd9e92f9d4 update README 2024-09-22 14:44:42 +08:00
a8a48c6ce7 Merge pull request #343 from RicterZ/pull-342
improve #342
2024-09-22 14:42:32 +08:00
15 changed files with 317 additions and 142 deletions

1
.gitignore vendored
View File

@ -8,3 +8,4 @@ dist/
output/ output/
venv/ venv/
.vscode/ .vscode/
test-output

View File

@ -140,6 +140,7 @@ Format output doujinshi folder name:
Supported doujinshi folder formatter: Supported doujinshi folder formatter:
- %i: Doujinshi id - %i: Doujinshi id
- %f: Doujinshi favorite count
- %t: Doujinshi name - %t: Doujinshi name
- %s: Doujinshi subtitle (translated name) - %s: Doujinshi subtitle (translated name)
- %a: Doujinshi authors' name - %a: Doujinshi authors' name
@ -161,25 +162,21 @@ Other options:
NHENTAI nhentai mirror url NHENTAI nhentai mirror url
Options: Options:
# Operation options, control the program behaviors
-h, --help show this help message and exit -h, --help show this help message and exit
-D, --download download doujinshi (for search results) -D, --download download doujinshi (for search results)
-S, --show just show the doujinshi information -S, --show just show the doujinshi information
# Doujinshi options, specify id, keyword, etc.
--id doujinshi ids set, e.g. 167680 167681 167682 --id doujinshi ids set, e.g. 167680 167681 167682
-s KEYWORD, --search=KEYWORD -s KEYWORD, --search=KEYWORD
search doujinshi by keyword search doujinshi by keyword
-F, --favorites list or download your favorites -F, --favorites list or download your favorites
-a ARTIST, --artist=ARTIST
# Page options, control the page to fetch / download list doujinshi by artist name
--page-all all search results --page-all all search results
--page=PAGE, --page-range=PAGE --page=PAGE, --page-range=PAGE
page number of search results. e.g. 1,2-5,14 page number of search results. e.g. 1,2-5,14
--sorting=SORTING sorting of doujinshi (recent / popular / --sorting=SORTING, --sort=SORTING
sorting of doujinshi (recent / popular /
popular-[today|week]) popular-[today|week])
# Download options, the output directory, threads, timeout, delay, etc.
-o OUTPUT_DIR, --output=OUTPUT_DIR -o OUTPUT_DIR, --output=OUTPUT_DIR
output dir output dir
-t THREADS, --threads=THREADS -t THREADS, --threads=THREADS
@ -192,8 +189,6 @@ Other options:
-f FILE, --file=FILE read gallery IDs from file. -f FILE, --file=FILE read gallery IDs from file.
--format=NAME_FORMAT format the saved folder name --format=NAME_FORMAT format the saved folder name
--dry-run Dry run, skip file download --dry-run Dry run, skip file download
# Generate options, for generate html viewer, cbz file, pdf file, etc
--html generate a html viewer at current directory --html generate a html viewer at current directory
--no-html don't generate HTML after downloading --no-html don't generate HTML after downloading
--gen-main generate a main viewer contain all the doujin in the --gen-main generate a main viewer contain all the doujin in the
@ -202,12 +197,10 @@ Other options:
-P, --pdf generate PDF file -P, --pdf generate PDF file
--rm-origin-dir remove downloaded doujinshi dir when generated CBZ or --rm-origin-dir remove downloaded doujinshi dir when generated CBZ or
PDF file PDF file
--move-to-folder remove files in doujinshi dir then move new file to folder --move-to-folder remove files in doujinshi dir then move new file to
when generated CBZ or PDF file folder when generated CBZ or PDF file
--meta generate a metadata file in doujinshi format --meta generate a metadata file in doujinshi format
--regenerate-cbz regenerate the cbz file if exists --regenerate regenerate the cbz or pdf file if exists
# nhentai options, set cookie, user-agent, language, remove caches, histories, etc
--cookie=COOKIE set cookie of nhentai to bypass Cloudflare captcha --cookie=COOKIE set cookie of nhentai to bypass Cloudflare captcha
--useragent=USERAGENT, --user-agent=USERAGENT --useragent=USERAGENT, --user-agent=USERAGENT
set useragent to bypass Cloudflare captcha set useragent to bypass Cloudflare captcha
@ -231,6 +224,9 @@ For example:
.. code-block:: .. code-block::
i.h.loli.club -> i.nhentai.net i.h.loli.club -> i.nhentai.net
i3.h.loli.club -> i3.nhentai.net
i5.h.loli.club -> i5.nhentai.net
i7.h.loli.club -> i7.nhentai.net
h.loli.club -> nhentai.net h.loli.club -> nhentai.net
Set `NHENTAI` env var to your nhentai mirror. Set `NHENTAI` env var to your nhentai mirror.

View File

@ -1,3 +1,3 @@
__version__ = '0.5.8' __version__ = '0.5.16'
__author__ = 'RicterZ' __author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com' __email__ = 'ricterzheng@gmail.com'

View File

@ -1,4 +1,6 @@
# coding: utf-8 # coding: utf-8
import os
import shutil
import sys import sys
import signal import signal
import platform import platform
@ -12,7 +14,7 @@ from nhentai.downloader import Downloader
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import BASE_URL from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \ from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
paging, check_cookie, signal_handler, DB paging, check_cookie, signal_handler, DB, move_to_folder
def main(): def main():
@ -75,7 +77,7 @@ def main():
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data)) doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
if not options.is_show: if not options.is_show:
downloader = Downloader(path=options.output_dir, size=options.threads, downloader = Downloader(path=options.output_dir, threads=options.threads,
timeout=options.timeout, delay=options.delay) timeout=options.timeout, delay=options.delay)
for doujinshi_id in doujinshi_ids: for doujinshi_id in doujinshi_ids:
@ -92,6 +94,7 @@ def main():
doujinshi.download() doujinshi.download()
else: else:
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}') logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
continue
if options.generate_metadata: if options.generate_metadata:
generate_metadata_file(options.output_dir, doujinshi) generate_metadata_file(options.output_dir, doujinshi)
@ -104,12 +107,22 @@ def main():
generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template']) generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
if options.is_cbz: if options.is_cbz:
generate_doc('cbz', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder, generate_doc('cbz', options.output_dir, doujinshi, options.regenerate)
options.regenerate)
if options.is_pdf: if options.is_pdf:
generate_doc('pdf', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder, generate_doc('pdf', options.output_dir, doujinshi, options.regenerate)
options.regenerate)
if options.move_to_folder:
if options.is_cbz:
move_to_folder(options.output_dir, doujinshi, 'cbz')
if options.is_pdf:
move_to_folder(options.output_dir, doujinshi, 'pdf')
if options.rm_origin_dir:
if options.move_to_folder:
logger.critical('You specified both --move-to-folder and --rm-origin-dir options, '
'you will not get anything :(')
shutil.rmtree(os.path.join(options.output_dir, doujinshi.filename), ignore_errors=True)
if options.main_viewer: if options.main_viewer:
generate_main_html(options.output_dir) generate_main_html(options.output_dir)

View File

@ -38,9 +38,9 @@ FAV_URL = f'{BASE_URL}/favorites/'
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries' IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
IMAGE_URL_MIRRORS = [ IMAGE_URL_MIRRORS = [
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}' f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}' f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}' f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
] ]
NHENTAI_HOME = get_nhentai_home() NHENTAI_HOME = get_nhentai_home()

View File

@ -12,6 +12,7 @@ EXT_MAP = {
'j': 'jpg', 'j': 'jpg',
'p': 'png', 'p': 'png',
'g': 'gif', 'g': 'gif',
'w': 'webp',
} }
@ -28,11 +29,12 @@ class DoujinshiInfo(dict):
class Doujinshi(object): class Doujinshi(object):
def __init__(self, name=None, pretty_name=None, id=None, img_id=None, def __init__(self, name=None, pretty_name=None, id=None, favorite_counts=0, img_id=None,
ext='', pages=0, name_format='[%i][%a][%t]', **kwargs): ext='', pages=0, name_format='[%i][%a][%t]', **kwargs):
self.name = name self.name = name
self.pretty_name = pretty_name self.pretty_name = pretty_name
self.id = id self.id = id
self.favorite_counts = favorite_counts
self.img_id = img_id self.img_id = img_id
self.ext = ext self.ext = ext
self.pages = pages self.pages = pages
@ -44,6 +46,7 @@ class Doujinshi(object):
name_format = name_format.replace('%ag', format_filename(ag_value)) name_format = name_format.replace('%ag', format_filename(ag_value))
name_format = name_format.replace('%i', format_filename(str(self.id))) name_format = name_format.replace('%i', format_filename(str(self.id)))
name_format = name_format.replace('%f', format_filename(str(self.favorite_counts)))
name_format = name_format.replace('%a', format_filename(self.info.artists)) name_format = name_format.replace('%a', format_filename(self.info.artists))
name_format = name_format.replace('%g', format_filename(self.info.groups)) name_format = name_format.replace('%g', format_filename(self.info.groups))
@ -62,6 +65,7 @@ class Doujinshi(object):
['Groups', self.info.groups], ['Groups', self.info.groups],
['Languages', self.info.languages], ['Languages', self.info.languages],
['Tags', self.info.tags], ['Tags', self.info.tags],
['Favorite Counts', self.info.favorite_counts],
['URL', self.url], ['URL', self.url],
['Pages', self.pages], ['Pages', self.pages],
] ]
@ -75,26 +79,28 @@ class Doujinshi(object):
def check_if_need_download(self, options): def check_if_need_download(self, options):
base_path = os.path.join(self.downloader.path, self.filename) base_path = os.path.join(self.downloader.path, self.filename)
# doujinshi directory is not exist, we need to download definitely # regenerate, re-download
if not (os.path.exists(base_path) and os.path.isdir(base_path)):
return True
# regenerate, we need to re-download from nhentai
if options.regenerate: if options.regenerate:
return True return True
# pdf or cbz file exists, skip re-download
# doujinshi directory may not exist b/c of --rm-origin-dir option set.
# user should pass --regenerate option to get back origin dir.
ret_pdf = ret_cbz = None
if options.is_pdf: if options.is_pdf:
file_ext = 'pdf' ret_pdf = os.path.exists(f'{base_path}.pdf') or os.path.exists(f'{base_path}/{self.filename}.pdf')
elif options.is_cbz:
file_ext = 'cbz'
else:
# re-download
return True
# pdf or cbz file exists, we needn't to re-download it if options.is_cbz:
if os.path.exists(f'{base_path}.{file_ext}') or os.path.exists(f'{base_path}/{self.filename}.{file_ext}'): ret_cbz = os.path.exists(f'{base_path}.cbz') or os.path.exists(f'{base_path}/{self.filename}.cbz')
ret = list(filter(lambda s: s is not None, [ret_cbz, ret_pdf]))
if ret and all(ret):
return False return False
# doujinshi directory doesn't exist, re-download
if not (os.path.exists(base_path) and os.path.isdir(base_path)):
return True
# fallback # fallback
return True return True

View File

@ -1,24 +1,17 @@
# coding: utf- # coding: utf-
import multiprocessing
import signal
import sys
import os import os
import requests import asyncio
import time import httpx
import urllib3.exceptions import urllib3.exceptions
from urllib.parse import urlparse from urllib.parse import urlparse
from nhentai import constant from nhentai import constant
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.parser import request from nhentai.utils import Singleton, async_request
from nhentai.utils import Singleton
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
semaphore = multiprocessing.Semaphore(1)
class NHentaiImageNotExistException(Exception): class NHentaiImageNotExistException(Exception):
pass pass
@ -39,64 +32,68 @@ def download_callback(result):
logger.log(16, f'{data} downloaded successfully') logger.log(16, f'{data} downloaded successfully')
class Downloader(Singleton):
def __init__(self, path='', size=5, timeout=30, delay=0): class Downloader(Singleton):
self.size = size def __init__(self, path='', threads=5, timeout=30, delay=0):
self.threads = threads
self.path = str(path) self.path = str(path)
self.timeout = timeout self.timeout = timeout
self.delay = delay self.delay = delay
def download(self, url, folder='', filename='', retried=0, proxy=None): async def fiber(self, tasks):
if self.delay: self.semaphore = asyncio.Semaphore(self.threads)
time.sleep(self.delay) for completed_task in asyncio.as_completed(tasks):
logger.info(f'Starting to download {url} ...') try:
filename = filename if filename else os.path.basename(urlparse(url).path) result = await completed_task
base_filename, extension = os.path.splitext(filename) logger.info(f'{result[1]} download completed')
except Exception as e:
logger.error(f'An error occurred: {e}')
async def _semaphore_download(self, *args, **kwargs):
async with self.semaphore:
return await self.download(*args, **kwargs)
async def download(self, url, folder='', filename='', retried=0, proxy=None):
logger.info(f'Starting to download {url} ...')
if self.delay:
await asyncio.sleep(self.delay)
filename = filename if filename else os.path.basename(urlparse(url).path)
save_file_path = os.path.join(self.folder, filename)
save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
try: try:
if os.path.exists(save_file_path): if os.path.exists(save_file_path):
logger.warning(f'Skipped download: {save_file_path} already exists') logger.warning(f'Skipped download: {save_file_path} already exists')
return 1, url return 1, url
response = None response = await async_request('GET', url, timeout=self.timeout, proxies=proxy)
with open(save_file_path, "wb") as f:
i = 0
while i < 10:
try:
response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy)
if response.status_code != 200:
path = urlparse(url).path
for mirror in constant.IMAGE_URL_MIRRORS:
print(f'{mirror}{path}')
mirror_url = f'{mirror}{path}'
response = request('get', mirror_url, stream=True,
timeout=self.timeout, proxies=proxy)
if response.status_code == 200:
break
except Exception as e: if response.status_code != 200:
i += 1 path = urlparse(url).path
if not i < 10: for mirror in constant.IMAGE_URL_MIRRORS:
logger.critical(str(e)) logger.info(f"Try mirror: {mirror}{path}")
return 0, None mirror_url = f'{mirror}{path}'
continue response = await async_request('GET', mirror_url, timeout=self.timeout, proxies=proxy)
if response.status_code == 200:
break
break if not await self.save(filename, response):
logger.error(f'Can not download image {url}')
return 1, None
length = response.headers.get('content-length') except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
if length is None:
f.write(response.content)
else:
for chunk in response.iter_content(2048):
f.write(chunk)
except (requests.HTTPError, requests.Timeout) as e:
if retried < 3: if retried < 3:
logger.warning(f'Warning: {e}, retrying({retried}) ...') logger.info(f'Download {filename} failed, retrying({retried + 1}) times...')
return 0, self.download(url=url, folder=folder, filename=filename, return await self.download(
retried=retried+1, proxy=proxy) url=url,
folder=folder,
filename=filename,
retried=retried + 1,
proxy=proxy,
)
else: else:
return 0, None return 0, None
@ -106,6 +103,8 @@ class Downloader(Singleton):
except Exception as e: except Exception as e:
import traceback import traceback
logger.error(f"Exception type: {type(e)}")
traceback.print_stack() traceback.print_stack()
logger.critical(str(e)) logger.critical(str(e))
return 0, None return 0, None
@ -115,8 +114,24 @@ class Downloader(Singleton):
return 1, url return 1, url
async def save(self, save_file_path, response) -> bool:
if response is None:
logger.error('Error: Response is None')
return False
save_file_path = os.path.join(self.folder, save_file_path)
with open(save_file_path, 'wb') as f:
if response is not None:
length = response.headers.get('content-length')
if length is None:
f.write(response.content)
else:
async for chunk in response.aiter_bytes(2048):
f.write(chunk)
return True
def start_download(self, queue, folder='') -> bool: def start_download(self, queue, folder='') -> bool:
if not isinstance(folder, (str, )): if not isinstance(folder, (str,)):
folder = str(folder) folder = str(folder)
if self.path: if self.path:
@ -128,34 +143,19 @@ class Downloader(Singleton):
os.makedirs(folder) os.makedirs(folder)
except EnvironmentError as e: except EnvironmentError as e:
logger.critical(str(e)) logger.critical(str(e))
self.folder = folder
if os.getenv('DEBUG', None) == 'NODOWNLOAD': if os.getenv('DEBUG', None) == 'NODOWNLOAD':
# Assuming we want to continue with rest of process. # Assuming we want to continue with rest of process.
return True return True
queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
pool = multiprocessing.Pool(self.size, init_worker)
[pool.apply_async(download_wrapper, args=item) for item in queue]
pool.close() coroutines = [
pool.join() self._semaphore_download(url, filename=os.path.basename(urlparse(url).path))
for url in queue
]
# Prevent coroutines infection
asyncio.run(self.fiber(coroutines))
return True return True
def download_wrapper(obj, url, folder='', proxy=None):
if sys.platform == 'darwin' or semaphore.get_value():
return Downloader.download(obj, url=url, folder=folder, proxy=proxy)
else:
return -3, None
def init_worker():
signal.signal(signal.SIGINT, subprocess_signal)
def subprocess_signal(sig, frame):
if semaphore.acquire(timeout=1):
logger.warning('Ctrl-C pressed, exiting sub processes ...')
raise KeyboardInterrupt

View File

@ -142,13 +142,20 @@ def doujinshi_parser(id_, counter=0):
title = doujinshi_info.find('h1').text title = doujinshi_info.find('h1').text
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
subtitle = doujinshi_info.find('h2') subtitle = doujinshi_info.find('h2')
favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')\
if favorite_counts:
favorite_counts = favorite_counts.text.strip()
else:
favorite_counts = 0
doujinshi['name'] = title doujinshi['name'] = title
doujinshi['pretty_name'] = pretty_name doujinshi['pretty_name'] = pretty_name
doujinshi['subtitle'] = subtitle.text if subtitle else '' doujinshi['subtitle'] = subtitle.text if subtitle else ''
doujinshi['favorite_counts'] = favorite_counts
doujinshi_cover = html.find('div', attrs={'id': 'cover'}) doujinshi_cover = html.find('div', attrs={'id': 'cover'})
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif)$', img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
doujinshi_cover.a.img.attrs['data-src']) doujinshi_cover.a.img.attrs['data-src'])
ext = [] ext = []

View File

@ -8,6 +8,8 @@ from nhentai.constant import LANGUAGE_ISO
def serialize_json(doujinshi, output_dir): def serialize_json(doujinshi, output_dir):
metadata = {'title': doujinshi.name, metadata = {'title': doujinshi.name,
'subtitle': doujinshi.info.subtitle} 'subtitle': doujinshi.info.subtitle}
if doujinshi.info.favorite_counts:
metadata['favorite_counts'] = doujinshi.favorite_counts
if doujinshi.info.date: if doujinshi.info.date:
metadata['upload_date'] = doujinshi.info.date metadata['upload_date'] = doujinshi.info.date
if doujinshi.info.parodies: if doujinshi.info.parodies:
@ -44,6 +46,7 @@ def serialize_comic_xml(doujinshi, output_dir):
xml_write_simple_tag(f, 'PageCount', doujinshi.pages) xml_write_simple_tag(f, 'PageCount', doujinshi.pages)
xml_write_simple_tag(f, 'URL', doujinshi.url) xml_write_simple_tag(f, 'URL', doujinshi.url)
xml_write_simple_tag(f, 'NhentaiId', doujinshi.id) xml_write_simple_tag(f, 'NhentaiId', doujinshi.id)
xml_write_simple_tag(f, 'Favorites', doujinshi.favorite_counts)
xml_write_simple_tag(f, 'Genre', doujinshi.info.categories) xml_write_simple_tag(f, 'Genre', doujinshi.info.categories)
xml_write_simple_tag(f, 'BlackAndWhite', 'No' if doujinshi.info.tags and xml_write_simple_tag(f, 'BlackAndWhite', 'No' if doujinshi.info.tags and

View File

@ -5,7 +5,9 @@ import re
import os import os
import zipfile import zipfile
import shutil import shutil
import copy
import httpx
import requests import requests
import sqlite3 import sqlite3
import urllib.parse import urllib.parse
@ -32,8 +34,32 @@ def request(method, url, **kwargs):
return getattr(session, method)(url, verify=False, **kwargs) return getattr(session, method)(url, verify=False, **kwargs)
async def async_request(method, url, proxies = None, **kwargs):
headers = {
'Referer': constant.LOGIN_URL,
'User-Agent': constant.CONFIG['useragent'],
'Cookie': constant.CONFIG['cookie'],
}
if proxies is None:
proxies = constant.CONFIG['proxy']
if proxies.get('http') == '' and proxies.get('https') == '':
proxies = None
if proxies:
_proxies = {f'{k}://': v for k, v in proxies.items() if v}
proxies = _proxies
async with httpx.AsyncClient(headers=headers, verify=False, proxies=proxies, **kwargs) as client:
response = await client.request(method, url, **kwargs)
return response
def check_cookie(): def check_cookie():
response = request('get', constant.BASE_URL) response = request('get', constant.BASE_URL)
if response.status_code == 403 and 'Just a moment...' in response.text: if response.status_code == 403 and 'Just a moment...' in response.text:
logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent') logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
sys.exit(1) sys.exit(1)
@ -72,8 +98,8 @@ def parse_doujinshi_obj(
doujinshi_obj=None, doujinshi_obj=None,
file_type: str = '' file_type: str = ''
) -> Tuple[str, str]: ) -> Tuple[str, str]:
filename = './doujinshi' + file_type
filename = f'./doujinshi.{file_type}'
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
if doujinshi_obj is not None: if doujinshi_obj is not None:
_filename = f'{doujinshi_obj.filename}.{file_type}' _filename = f'{doujinshi_obj.filename}.{file_type}'
@ -104,7 +130,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
file_list.sort() file_list.sort()
for image in file_list: for image in file_list:
if not os.path.splitext(image)[1] in ('.jpg', '.png'): if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'):
continue continue
image_html += f'<img src="{image}" class="image-item"/>\n' image_html += f'<img src="{image}" class="image-item"/>\n'
@ -128,6 +154,27 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
logger.warning(f'Writing HTML Viewer failed ({e})') logger.warning(f'Writing HTML Viewer failed ({e})')
def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
if not file_type:
raise RuntimeError('no file_type specified')
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
for fn in os.listdir(doujinshi_dir):
file_path = os.path.join(doujinshi_dir, fn)
_, ext = os.path.splitext(file_path)
if ext in ['.pdf', '.cbz']:
continue
if os.path.isfile(file_path):
try:
os.remove(file_path)
except Exception as e:
print(f"Error deleting file: {e}")
shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
def generate_main_html(output_dir='./'): def generate_main_html(output_dir='./'):
""" """
Generate a main html to show all the contains doujinshi. Generate a main html to show all the contains doujinshi.
@ -185,8 +232,7 @@ def generate_main_html(output_dir='./'):
logger.warning(f'Writing Main Viewer failed ({e})') logger.warning(f'Writing Main Viewer failed ({e})')
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir=False, def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
move_to_folder=False, regenerate=False):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type) doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
@ -210,7 +256,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir
import img2pdf import img2pdf
"""Write images to a PDF file using img2pdf.""" """Write images to a PDF file using img2pdf."""
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))] file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))]
file_list.sort() file_list.sort()
logger.info(f'Writing PDF file to path: {filename}') logger.info(f'Writing PDF file to path: {filename}')
@ -225,20 +271,6 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir
except ImportError: except ImportError:
logger.error("Please install img2pdf package by using pip.") logger.error("Please install img2pdf package by using pip.")
if rm_origin_dir:
shutil.rmtree(doujinshi_dir, ignore_errors=True)
if move_to_folder:
for filename in os.listdir(doujinshi_dir):
file_path = os.path.join(doujinshi_dir, filename)
if os.path.isfile(file_path):
try:
os.remove(file_path)
except Exception as e:
print(f"Error deleting file: {e}")
shutil.move(filename, doujinshi_dir)
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False): def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
""" """
@ -303,7 +335,7 @@ def generate_metadata_file(output_dir, doujinshi_obj):
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES', 'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS', 'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL'] 'SERIES', 'PARODY', 'URL']
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'CHARACTERS', 'AUTHOR', 'GROUPS', special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'DATE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
'LANGUAGE', 'TAGS', 'URL', 'PAGES'] 'LANGUAGE', 'TAGS', 'URL', 'PAGES']
for i in range(len(fields)): for i in range(len(fields)):

119
poetry.lock generated
View File

@ -1,4 +1,26 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. # This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
[[package]]
name = "anyio"
version = "4.5.2"
description = "High level compatibility layer for multiple asynchronous event loop implementations"
optional = false
python-versions = ">=3.8"
files = [
{file = "anyio-4.5.2-py3-none-any.whl", hash = "sha256:c011ee36bc1e8ba40e5a81cb9df91925c218fe9b778554e0b56a21e1b5d4716f"},
{file = "anyio-4.5.2.tar.gz", hash = "sha256:23009af4ed04ce05991845451e11ef02fc7c5ed29179ac9a420e5ad0ac7ddc5b"},
]
[package.dependencies]
exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
idna = ">=2.8"
sniffio = ">=1.1"
typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
[package.extras]
doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"]
trio = ["trio (>=0.26.1)"]
[[package]] [[package]]
name = "beautifulsoup4" name = "beautifulsoup4"
@ -126,6 +148,77 @@ files = [
{file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"}, {file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"},
] ]
[[package]]
name = "exceptiongroup"
version = "1.2.2"
description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
files = [
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
]
[package.extras]
test = ["pytest (>=6)"]
[[package]]
name = "h11"
version = "0.14.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
optional = false
python-versions = ">=3.7"
files = [
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
]
[[package]]
name = "httpcore"
version = "1.0.7"
description = "A minimal low-level HTTP client."
optional = false
python-versions = ">=3.8"
files = [
{file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
{file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
]
[package.dependencies]
certifi = "*"
h11 = ">=0.13,<0.15"
[package.extras]
asyncio = ["anyio (>=4.0,<5.0)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
trio = ["trio (>=0.22.0,<1.0)"]
[[package]]
name = "httpx"
version = "0.27.2"
description = "The next generation HTTP client."
optional = false
python-versions = ">=3.8"
files = [
{file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
{file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
]
[package.dependencies]
anyio = "*"
certifi = "*"
httpcore = "==1.*"
idna = "*"
sniffio = "*"
[package.extras]
brotli = ["brotli", "brotlicffi"]
cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
zstd = ["zstandard (>=0.18.0)"]
[[package]] [[package]]
name = "idna" name = "idna"
version = "3.7" version = "3.7"
@ -169,6 +262,17 @@ urllib3 = ">=1.21.1,<3"
socks = ["PySocks (>=1.5.6,!=1.5.7)"] socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "sniffio"
version = "1.3.1"
description = "Sniff out which async library your code is running under"
optional = false
python-versions = ">=3.7"
files = [
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
]
[[package]] [[package]]
name = "soupsieve" name = "soupsieve"
version = "2.4" version = "2.4"
@ -194,6 +298,17 @@ files = [
[package.extras] [package.extras]
widechars = ["wcwidth"] widechars = ["wcwidth"]
[[package]]
name = "typing-extensions"
version = "4.12.2"
description = "Backported and Experimental Type Hints for Python 3.8+"
optional = false
python-versions = ">=3.8"
files = [
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
]
[[package]] [[package]]
name = "urllib3" name = "urllib3"
version = "1.26.19" version = "1.26.19"
@ -213,4 +328,4 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.8" python-versions = "^3.8"
content-hash = "0a1d5abd47a669c7a1f2dc7b43824a449e29ba94908a4338d2ea0f2dfb4f805e" content-hash = "a69dbf5dcfd6dcc5afc0fd2de4ab153841f7d210d4be60c426e332e36a79d679"

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "nhentai" name = "nhentai"
version = "0.5.8" version = "0.5.15"
description = "nhentai doujinshi downloader" description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"] authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT" license = "MIT"
@ -14,6 +14,7 @@ beautifulsoup4 = "^4.11.2"
tabulate = "^0.9.0" tabulate = "^0.9.0"
iso8601 = "^1.1.0" iso8601 = "^1.1.0"
urllib3 = "^1.26.14" urllib3 = "^1.26.14"
httpx = "0.27.2"
[build-system] [build-system]

View File

@ -1,3 +1,4 @@
httpx==0.27.2
requests requests
soupsieve soupsieve
setuptools setuptools

View File

@ -1,3 +1,3 @@
[metadata] [metadata]
description-file = README.rst description_file = README.rst

View File

@ -20,7 +20,7 @@ class TestDownload(unittest.TestCase):
def test_download(self): def test_download(self):
did = 440546 did = 440546
info = Doujinshi(**doujinshi_parser(did), name_format='%i') info = Doujinshi(**doujinshi_parser(did), name_format='%i')
info.downloader = Downloader(path='/tmp', size=5) info.downloader = Downloader(path='/tmp', threads=5)
info.download() info.download()
self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg')) self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))