mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-04-20 02:41:19 +02:00
Merge pull request #354 from normalizedwater546/master
asyncio: fix downloader being run sequentially + httpx: fix proxy and missing headers
This commit is contained in:
commit
3d6263cf11
@ -77,7 +77,7 @@ def main():
|
|||||||
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
|
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
|
||||||
|
|
||||||
if not options.is_show:
|
if not options.is_show:
|
||||||
downloader = Downloader(path=options.output_dir, size=options.threads,
|
downloader = Downloader(path=options.output_dir, threads=options.threads,
|
||||||
timeout=options.timeout, delay=options.delay)
|
timeout=options.timeout, delay=options.delay)
|
||||||
|
|
||||||
for doujinshi_id in doujinshi_ids:
|
for doujinshi_id in doujinshi_ids:
|
||||||
|
@ -1,22 +1,17 @@
|
|||||||
# coding: utf-
|
# coding: utf-
|
||||||
|
|
||||||
import multiprocessing
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import time
|
import asyncio
|
||||||
|
import httpx
|
||||||
import urllib3.exceptions
|
import urllib3.exceptions
|
||||||
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from nhentai import constant
|
from nhentai import constant
|
||||||
from nhentai.logger import logger
|
from nhentai.logger import logger
|
||||||
from nhentai.utils import Singleton
|
from nhentai.utils import Singleton, async_request
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
semaphore = multiprocessing.Semaphore(1)
|
|
||||||
|
|
||||||
|
|
||||||
class NHentaiImageNotExistException(Exception):
|
class NHentaiImageNotExistException(Exception):
|
||||||
pass
|
pass
|
||||||
@ -37,17 +32,32 @@ def download_callback(result):
|
|||||||
logger.log(16, f'{data} downloaded successfully')
|
logger.log(16, f'{data} downloaded successfully')
|
||||||
|
|
||||||
|
|
||||||
|
async def fiber(tasks):
|
||||||
|
for completed_task in asyncio.as_completed(tasks):
|
||||||
|
try:
|
||||||
|
result = await completed_task
|
||||||
|
logger.info(f'{result[1]} download completed')
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f'An error occurred: {e}')
|
||||||
|
|
||||||
|
|
||||||
class Downloader(Singleton):
|
class Downloader(Singleton):
|
||||||
def __init__(self, path='', size=5, timeout=30, delay=0):
|
def __init__(self, path='', threads=5, timeout=30, delay=0):
|
||||||
self.size = size
|
self.threads = threads
|
||||||
self.path = str(path)
|
self.path = str(path)
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.delay = delay
|
self.delay = delay
|
||||||
|
|
||||||
|
async def _semaphore_download(self, semaphore, *args, **kwargs):
|
||||||
|
async with semaphore:
|
||||||
|
return await self.download(*args, **kwargs)
|
||||||
|
|
||||||
async def download(self, url, folder='', filename='', retried=0, proxy=None):
|
async def download(self, url, folder='', filename='', retried=0, proxy=None):
|
||||||
if self.delay:
|
|
||||||
time.sleep(self.delay)
|
|
||||||
logger.info(f'Starting to download {url} ...')
|
logger.info(f'Starting to download {url} ...')
|
||||||
|
|
||||||
|
if self.delay:
|
||||||
|
await asyncio.sleep(self.delay)
|
||||||
|
|
||||||
filename = filename if filename else os.path.basename(urlparse(url).path)
|
filename = filename if filename else os.path.basename(urlparse(url).path)
|
||||||
|
|
||||||
save_file_path = os.path.join(self.folder, filename)
|
save_file_path = os.path.join(self.folder, filename)
|
||||||
@ -57,14 +67,14 @@ class Downloader(Singleton):
|
|||||||
logger.warning(f'Skipped download: {save_file_path} already exists')
|
logger.warning(f'Skipped download: {save_file_path} already exists')
|
||||||
return 1, url
|
return 1, url
|
||||||
|
|
||||||
response = await self.async_request(url, self.timeout) # TODO: Add proxy
|
response = await async_request('GET', url, timeout=self.timeout, proxies=proxy)
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
path = urlparse(url).path
|
path = urlparse(url).path
|
||||||
for mirror in constant.IMAGE_URL_MIRRORS:
|
for mirror in constant.IMAGE_URL_MIRRORS:
|
||||||
logger.info(f"Try mirror: {mirror}{path}")
|
logger.info(f"Try mirror: {mirror}{path}")
|
||||||
mirror_url = f'{mirror}{path}'
|
mirror_url = f'{mirror}{path}'
|
||||||
response = await self.async_request(mirror_url, self.timeout)
|
response = await async_request('GET', mirror_url, timeout=self.timeout, proxies=proxy)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -117,13 +127,9 @@ class Downloader(Singleton):
|
|||||||
f.write(chunk)
|
f.write(chunk)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
async def async_request(self, url, timeout):
|
|
||||||
async with httpx.AsyncClient() as client:
|
|
||||||
return await client.get(url, timeout=timeout)
|
|
||||||
|
|
||||||
def start_download(self, queue, folder='') -> bool:
|
def start_download(self, queue, folder='') -> bool:
|
||||||
logger.warning("Proxy temporarily unavailable, it will be fixed later. ")
|
if not isinstance(folder, (str,)):
|
||||||
if not isinstance(folder, (str, )):
|
|
||||||
folder = str(folder)
|
folder = str(folder)
|
||||||
|
|
||||||
if self.path:
|
if self.path:
|
||||||
@ -141,19 +147,14 @@ class Downloader(Singleton):
|
|||||||
# Assuming we want to continue with rest of process.
|
# Assuming we want to continue with rest of process.
|
||||||
return True
|
return True
|
||||||
|
|
||||||
async def fiber(tasks):
|
semaphore = asyncio.Semaphore(self.threads)
|
||||||
for completed_task in asyncio.as_completed(tasks):
|
|
||||||
try:
|
|
||||||
result = await completed_task
|
|
||||||
logger.info(f'{result[1]} download completed')
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f'An error occurred: {e}')
|
|
||||||
|
|
||||||
tasks = [
|
coroutines = [
|
||||||
self.download(url, filename=os.path.basename(urlparse(url).path))
|
self._semaphore_download(semaphore, url, filename=os.path.basename(urlparse(url).path))
|
||||||
for url in queue
|
for url in queue
|
||||||
]
|
]
|
||||||
|
|
||||||
# Prevent coroutines infection
|
# Prevent coroutines infection
|
||||||
asyncio.run(fiber(tasks))
|
asyncio.run(fiber(coroutines))
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -6,6 +6,7 @@ import os
|
|||||||
import zipfile
|
import zipfile
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
import httpx
|
||||||
import requests
|
import requests
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
@ -32,8 +33,28 @@ def request(method, url, **kwargs):
|
|||||||
return getattr(session, method)(url, verify=False, **kwargs)
|
return getattr(session, method)(url, verify=False, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
async def async_request(method, url, proxies = None, **kwargs):
|
||||||
|
headers = {
|
||||||
|
'Referer': constant.LOGIN_URL,
|
||||||
|
'User-Agent': constant.CONFIG['useragent'],
|
||||||
|
'Cookie': constant.CONFIG['cookie'],
|
||||||
|
}
|
||||||
|
|
||||||
|
if proxies is None:
|
||||||
|
proxies = constant.CONFIG['proxy']
|
||||||
|
|
||||||
|
if proxies.get('http') == '' and proxies.get('https') == '':
|
||||||
|
proxies = None
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(headers=headers, verify=False, proxies=proxies, **kwargs) as client:
|
||||||
|
response = await client.request(method, url, **kwargs)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
def check_cookie():
|
def check_cookie():
|
||||||
response = request('get', constant.BASE_URL)
|
response = request('get', constant.BASE_URL)
|
||||||
|
|
||||||
if response.status_code == 403 and 'Just a moment...' in response.text:
|
if response.status_code == 403 and 'Just a moment...' in response.text:
|
||||||
logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
|
logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
@ -20,7 +20,7 @@ class TestDownload(unittest.TestCase):
|
|||||||
def test_download(self):
|
def test_download(self):
|
||||||
did = 440546
|
did = 440546
|
||||||
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
|
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
|
||||||
info.downloader = Downloader(path='/tmp', size=5)
|
info.downloader = Downloader(path='/tmp', threads=5)
|
||||||
info.download()
|
info.download()
|
||||||
|
|
||||||
self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
|
self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user