mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-07-01 07:59:29 +02:00
Compare commits
17 Commits
Author | SHA1 | Date | |
---|---|---|---|
3d6263cf11 | |||
e3410f5a9a | |||
feb7f45533 | |||
0754caaeb7 | |||
49e5a3094a | |||
c044b64beb | |||
f8334c09b5 | |||
c90c486fb4 | |||
90b17832cc | |||
14c6db9cc3 | |||
f30ff59b2b | |||
1504ee779f | |||
98d9eecf6d | |||
e16e623b9d | |||
c3f3182df3 | |||
12aad842f8 | |||
f9f76ab0f5 |
@ -1,3 +1,3 @@
|
||||
__version__ = '0.5.10'
|
||||
__version__ = '0.5.12'
|
||||
__author__ = 'RicterZ'
|
||||
__email__ = 'ricterzheng@gmail.com'
|
||||
|
@ -77,7 +77,7 @@ def main():
|
||||
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
|
||||
|
||||
if not options.is_show:
|
||||
downloader = Downloader(path=options.output_dir, size=options.threads,
|
||||
downloader = Downloader(path=options.output_dir, threads=options.threads,
|
||||
timeout=options.timeout, delay=options.delay)
|
||||
|
||||
for doujinshi_id in doujinshi_ids:
|
||||
|
@ -12,6 +12,7 @@ EXT_MAP = {
|
||||
'j': 'jpg',
|
||||
'p': 'png',
|
||||
'g': 'gif',
|
||||
'w': 'webp',
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,24 +1,17 @@
|
||||
# coding: utf-
|
||||
|
||||
import multiprocessing
|
||||
import signal
|
||||
|
||||
import sys
|
||||
import os
|
||||
import requests
|
||||
import time
|
||||
import asyncio
|
||||
import httpx
|
||||
import urllib3.exceptions
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from nhentai import constant
|
||||
from nhentai.logger import logger
|
||||
from nhentai.parser import request
|
||||
from nhentai.utils import Singleton
|
||||
from nhentai.utils import Singleton, async_request
|
||||
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
semaphore = multiprocessing.Semaphore(1)
|
||||
|
||||
|
||||
class NHentaiImageNotExistException(Exception):
|
||||
pass
|
||||
@ -39,64 +32,66 @@ def download_callback(result):
|
||||
logger.log(16, f'{data} downloaded successfully')
|
||||
|
||||
|
||||
class Downloader(Singleton):
|
||||
async def fiber(tasks):
|
||||
for completed_task in asyncio.as_completed(tasks):
|
||||
try:
|
||||
result = await completed_task
|
||||
logger.info(f'{result[1]} download completed')
|
||||
except Exception as e:
|
||||
logger.error(f'An error occurred: {e}')
|
||||
|
||||
def __init__(self, path='', size=5, timeout=30, delay=0):
|
||||
self.size = size
|
||||
|
||||
class Downloader(Singleton):
|
||||
def __init__(self, path='', threads=5, timeout=30, delay=0):
|
||||
self.threads = threads
|
||||
self.path = str(path)
|
||||
self.timeout = timeout
|
||||
self.delay = delay
|
||||
|
||||
def download(self, url, folder='', filename='', retried=0, proxy=None):
|
||||
if self.delay:
|
||||
time.sleep(self.delay)
|
||||
logger.info(f'Starting to download {url} ...')
|
||||
filename = filename if filename else os.path.basename(urlparse(url).path)
|
||||
base_filename, extension = os.path.splitext(filename)
|
||||
async def _semaphore_download(self, semaphore, *args, **kwargs):
|
||||
async with semaphore:
|
||||
return await self.download(*args, **kwargs)
|
||||
|
||||
async def download(self, url, folder='', filename='', retried=0, proxy=None):
|
||||
logger.info(f'Starting to download {url} ...')
|
||||
|
||||
if self.delay:
|
||||
await asyncio.sleep(self.delay)
|
||||
|
||||
filename = filename if filename else os.path.basename(urlparse(url).path)
|
||||
|
||||
save_file_path = os.path.join(self.folder, filename)
|
||||
|
||||
save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
|
||||
try:
|
||||
if os.path.exists(save_file_path):
|
||||
logger.warning(f'Skipped download: {save_file_path} already exists')
|
||||
return 1, url
|
||||
|
||||
response = None
|
||||
with open(save_file_path, "wb") as f:
|
||||
i = 0
|
||||
while i < 10:
|
||||
try:
|
||||
response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy)
|
||||
if response.status_code != 200:
|
||||
path = urlparse(url).path
|
||||
for mirror in constant.IMAGE_URL_MIRRORS:
|
||||
print(f'{mirror}{path}')
|
||||
mirror_url = f'{mirror}{path}'
|
||||
response = request('get', mirror_url, stream=True,
|
||||
timeout=self.timeout, proxies=proxy)
|
||||
if response.status_code == 200:
|
||||
break
|
||||
response = await async_request('GET', url, timeout=self.timeout, proxies=proxy)
|
||||
|
||||
except Exception as e:
|
||||
i += 1
|
||||
if not i < 10:
|
||||
logger.critical(str(e))
|
||||
return 0, None
|
||||
continue
|
||||
if response.status_code != 200:
|
||||
path = urlparse(url).path
|
||||
for mirror in constant.IMAGE_URL_MIRRORS:
|
||||
logger.info(f"Try mirror: {mirror}{path}")
|
||||
mirror_url = f'{mirror}{path}'
|
||||
response = await async_request('GET', mirror_url, timeout=self.timeout, proxies=proxy)
|
||||
if response.status_code == 200:
|
||||
break
|
||||
|
||||
break
|
||||
if not await self.save(filename, response):
|
||||
logger.error(f'Can not download image {url}')
|
||||
return 1, None
|
||||
|
||||
length = response.headers.get('content-length')
|
||||
if length is None:
|
||||
f.write(response.content)
|
||||
else:
|
||||
for chunk in response.iter_content(2048):
|
||||
f.write(chunk)
|
||||
|
||||
except (requests.HTTPError, requests.Timeout) as e:
|
||||
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
|
||||
if retried < 3:
|
||||
logger.warning(f'Warning: {e}, retrying({retried}) ...')
|
||||
return 0, self.download(url=url, folder=folder, filename=filename,
|
||||
retried=retried+1, proxy=proxy)
|
||||
logger.info(f'Download {filename} failed, retrying({retried + 1}) times...')
|
||||
return await self.download(
|
||||
url=url,
|
||||
folder=folder,
|
||||
filename=filename,
|
||||
retried=retried + 1,
|
||||
proxy=proxy,
|
||||
)
|
||||
else:
|
||||
return 0, None
|
||||
|
||||
@ -106,6 +101,8 @@ class Downloader(Singleton):
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
logger.error(f"Exception type: {type(e)}")
|
||||
traceback.print_stack()
|
||||
logger.critical(str(e))
|
||||
return 0, None
|
||||
@ -115,8 +112,24 @@ class Downloader(Singleton):
|
||||
|
||||
return 1, url
|
||||
|
||||
async def save(self, save_file_path, response) -> bool:
|
||||
if response is None:
|
||||
logger.error('Error: Response is None')
|
||||
return False
|
||||
save_file_path = os.path.join(self.folder, save_file_path)
|
||||
with open(save_file_path, 'wb') as f:
|
||||
if response is not None:
|
||||
length = response.headers.get('content-length')
|
||||
if length is None:
|
||||
f.write(response.content)
|
||||
else:
|
||||
async for chunk in response.aiter_bytes(2048):
|
||||
f.write(chunk)
|
||||
return True
|
||||
|
||||
|
||||
def start_download(self, queue, folder='') -> bool:
|
||||
if not isinstance(folder, (str, )):
|
||||
if not isinstance(folder, (str,)):
|
||||
folder = str(folder)
|
||||
|
||||
if self.path:
|
||||
@ -128,34 +141,20 @@ class Downloader(Singleton):
|
||||
os.makedirs(folder)
|
||||
except EnvironmentError as e:
|
||||
logger.critical(str(e))
|
||||
self.folder = folder
|
||||
|
||||
if os.getenv('DEBUG', None) == 'NODOWNLOAD':
|
||||
# Assuming we want to continue with rest of process.
|
||||
return True
|
||||
queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
|
||||
|
||||
pool = multiprocessing.Pool(self.size, init_worker)
|
||||
[pool.apply_async(download_wrapper, args=item) for item in queue]
|
||||
semaphore = asyncio.Semaphore(self.threads)
|
||||
|
||||
pool.close()
|
||||
pool.join()
|
||||
coroutines = [
|
||||
self._semaphore_download(semaphore, url, filename=os.path.basename(urlparse(url).path))
|
||||
for url in queue
|
||||
]
|
||||
|
||||
# Prevent coroutines infection
|
||||
asyncio.run(fiber(coroutines))
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def download_wrapper(obj, url, folder='', proxy=None):
|
||||
if sys.platform == 'darwin' or semaphore.get_value():
|
||||
return Downloader.download(obj, url=url, folder=folder, proxy=proxy)
|
||||
else:
|
||||
return -3, None
|
||||
|
||||
|
||||
def init_worker():
|
||||
signal.signal(signal.SIGINT, subprocess_signal)
|
||||
|
||||
|
||||
def subprocess_signal(sig, frame):
|
||||
if semaphore.acquire(timeout=1):
|
||||
logger.warning('Ctrl-C pressed, exiting sub processes ...')
|
||||
|
||||
raise KeyboardInterrupt
|
||||
|
@ -148,7 +148,7 @@ def doujinshi_parser(id_, counter=0):
|
||||
doujinshi['subtitle'] = subtitle.text if subtitle else ''
|
||||
|
||||
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
|
||||
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif)$',
|
||||
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
|
||||
doujinshi_cover.a.img.attrs['data-src'])
|
||||
|
||||
ext = []
|
||||
|
@ -6,6 +6,7 @@ import os
|
||||
import zipfile
|
||||
import shutil
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
import sqlite3
|
||||
import urllib.parse
|
||||
@ -32,8 +33,28 @@ def request(method, url, **kwargs):
|
||||
return getattr(session, method)(url, verify=False, **kwargs)
|
||||
|
||||
|
||||
async def async_request(method, url, proxies = None, **kwargs):
|
||||
headers = {
|
||||
'Referer': constant.LOGIN_URL,
|
||||
'User-Agent': constant.CONFIG['useragent'],
|
||||
'Cookie': constant.CONFIG['cookie'],
|
||||
}
|
||||
|
||||
if proxies is None:
|
||||
proxies = constant.CONFIG['proxy']
|
||||
|
||||
if proxies.get('http') == '' and proxies.get('https') == '':
|
||||
proxies = None
|
||||
|
||||
async with httpx.AsyncClient(headers=headers, verify=False, proxies=proxies, **kwargs) as client:
|
||||
response = await client.request(method, url, **kwargs)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def check_cookie():
|
||||
response = request('get', constant.BASE_URL)
|
||||
|
||||
if response.status_code == 403 and 'Just a moment...' in response.text:
|
||||
logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
|
||||
sys.exit(1)
|
||||
@ -104,7 +125,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
||||
file_list.sort()
|
||||
|
||||
for image in file_list:
|
||||
if not os.path.splitext(image)[1] in ('.jpg', '.png'):
|
||||
if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'):
|
||||
continue
|
||||
image_html += f'<img src="{image}" class="image-item"/>\n'
|
||||
|
||||
@ -230,7 +251,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
|
||||
import img2pdf
|
||||
|
||||
"""Write images to a PDF file using img2pdf."""
|
||||
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]
|
||||
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))]
|
||||
file_list.sort()
|
||||
|
||||
logger.info(f'Writing PDF file to path: {filename}')
|
||||
@ -309,7 +330,7 @@ def generate_metadata_file(output_dir, doujinshi_obj):
|
||||
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
|
||||
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
||||
'SERIES', 'PARODY', 'URL']
|
||||
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
|
||||
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'DATE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
|
||||
'LANGUAGE', 'TAGS', 'URL', 'PAGES']
|
||||
|
||||
for i in range(len(fields)):
|
||||
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "nhentai"
|
||||
version = "0.5.10"
|
||||
version = "0.5.12"
|
||||
description = "nhentai doujinshi downloader"
|
||||
authors = ["Ricter Z <ricterzheng@gmail.com>"]
|
||||
license = "MIT"
|
||||
|
@ -1,3 +1,4 @@
|
||||
httpx
|
||||
requests
|
||||
soupsieve
|
||||
setuptools
|
||||
|
@ -20,7 +20,7 @@ class TestDownload(unittest.TestCase):
|
||||
def test_download(self):
|
||||
did = 440546
|
||||
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
|
||||
info.downloader = Downloader(path='/tmp', size=5)
|
||||
info.downloader = Downloader(path='/tmp', threads=5)
|
||||
info.download()
|
||||
|
||||
self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
|
||||
|
Reference in New Issue
Block a user