mirror of
				https://github.com/RicterZ/nhentai.git
				synced 2025-11-03 18:50:53 +01:00 
			
		
		
		
	Merge pull request #354 from normalizedwater546/master
asyncio: fix downloader being run sequentially + httpx: fix proxy and missing headers
This commit is contained in:
		@@ -77,7 +77,7 @@ def main():
 | 
			
		||||
        doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
 | 
			
		||||
 | 
			
		||||
    if not options.is_show:
 | 
			
		||||
        downloader = Downloader(path=options.output_dir, size=options.threads,
 | 
			
		||||
        downloader = Downloader(path=options.output_dir, threads=options.threads,
 | 
			
		||||
                                timeout=options.timeout, delay=options.delay)
 | 
			
		||||
 | 
			
		||||
        for doujinshi_id in doujinshi_ids:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,22 +1,17 @@
 | 
			
		||||
# coding: utf-
 | 
			
		||||
 | 
			
		||||
import multiprocessing
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
import asyncio
 | 
			
		||||
import httpx
 | 
			
		||||
import urllib3.exceptions
 | 
			
		||||
 | 
			
		||||
from urllib.parse import urlparse
 | 
			
		||||
from nhentai import constant
 | 
			
		||||
from nhentai.logger import logger
 | 
			
		||||
from nhentai.utils import Singleton
 | 
			
		||||
from nhentai.utils import Singleton, async_request
 | 
			
		||||
 | 
			
		||||
import asyncio
 | 
			
		||||
import httpx
 | 
			
		||||
 | 
			
		||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 | 
			
		||||
semaphore = multiprocessing.Semaphore(1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NHentaiImageNotExistException(Exception):
 | 
			
		||||
    pass
 | 
			
		||||
@@ -37,17 +32,32 @@ def download_callback(result):
 | 
			
		||||
        logger.log(16, f'{data} downloaded successfully')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def fiber(tasks):
 | 
			
		||||
    for completed_task in asyncio.as_completed(tasks):
 | 
			
		||||
        try:
 | 
			
		||||
            result = await completed_task
 | 
			
		||||
            logger.info(f'{result[1]} download completed')
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f'An error occurred: {e}')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Downloader(Singleton):
 | 
			
		||||
    def __init__(self, path='', size=5, timeout=30, delay=0):
 | 
			
		||||
        self.size = size
 | 
			
		||||
    def __init__(self, path='', threads=5, timeout=30, delay=0):
 | 
			
		||||
        self.threads = threads
 | 
			
		||||
        self.path = str(path)
 | 
			
		||||
        self.timeout = timeout
 | 
			
		||||
        self.delay = delay
 | 
			
		||||
 | 
			
		||||
    async def _semaphore_download(self, semaphore, *args, **kwargs):
 | 
			
		||||
        async with semaphore:
 | 
			
		||||
            return await self.download(*args, **kwargs)
 | 
			
		||||
 | 
			
		||||
    async def download(self, url, folder='', filename='', retried=0, proxy=None):
 | 
			
		||||
        if self.delay:
 | 
			
		||||
            time.sleep(self.delay)
 | 
			
		||||
        logger.info(f'Starting to download {url} ...')
 | 
			
		||||
 | 
			
		||||
        if self.delay:
 | 
			
		||||
            await asyncio.sleep(self.delay)
 | 
			
		||||
 | 
			
		||||
        filename = filename if filename else os.path.basename(urlparse(url).path)
 | 
			
		||||
 | 
			
		||||
        save_file_path = os.path.join(self.folder, filename)
 | 
			
		||||
@@ -57,14 +67,14 @@ class Downloader(Singleton):
 | 
			
		||||
                logger.warning(f'Skipped download: {save_file_path} already exists')
 | 
			
		||||
                return 1, url
 | 
			
		||||
 | 
			
		||||
            response = await self.async_request(url, self.timeout)  # TODO: Add proxy
 | 
			
		||||
            response = await async_request('GET', url, timeout=self.timeout, proxies=proxy)
 | 
			
		||||
 | 
			
		||||
            if response.status_code != 200:
 | 
			
		||||
                path = urlparse(url).path
 | 
			
		||||
                for mirror in constant.IMAGE_URL_MIRRORS:
 | 
			
		||||
                    logger.info(f"Try mirror: {mirror}{path}")
 | 
			
		||||
                    mirror_url = f'{mirror}{path}'
 | 
			
		||||
                    response = await self.async_request(mirror_url, self.timeout)
 | 
			
		||||
                    response = await async_request('GET', mirror_url, timeout=self.timeout, proxies=proxy)
 | 
			
		||||
                    if response.status_code == 200:
 | 
			
		||||
                        break
 | 
			
		||||
 | 
			
		||||
@@ -117,12 +127,8 @@ class Downloader(Singleton):
 | 
			
		||||
                        f.write(chunk)
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    async def async_request(self, url, timeout):
 | 
			
		||||
        async with httpx.AsyncClient() as client:
 | 
			
		||||
            return await client.get(url, timeout=timeout)
 | 
			
		||||
 | 
			
		||||
    def start_download(self, queue, folder='') -> bool:
 | 
			
		||||
        logger.warning("Proxy temporarily unavailable, it will be fixed later. ")
 | 
			
		||||
        if not isinstance(folder, (str,)):
 | 
			
		||||
            folder = str(folder)
 | 
			
		||||
 | 
			
		||||
@@ -141,19 +147,14 @@ class Downloader(Singleton):
 | 
			
		||||
            # Assuming we want to continue with rest of process.
 | 
			
		||||
            return True
 | 
			
		||||
 | 
			
		||||
        async def fiber(tasks):
 | 
			
		||||
            for completed_task in asyncio.as_completed(tasks):
 | 
			
		||||
                try:
 | 
			
		||||
                    result = await completed_task
 | 
			
		||||
                    logger.info(f'{result[1]} download completed')
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    logger.error(f'An error occurred: {e}')
 | 
			
		||||
        semaphore = asyncio.Semaphore(self.threads)
 | 
			
		||||
 | 
			
		||||
        tasks = [
 | 
			
		||||
            self.download(url, filename=os.path.basename(urlparse(url).path))
 | 
			
		||||
        coroutines = [
 | 
			
		||||
            self._semaphore_download(semaphore, url, filename=os.path.basename(urlparse(url).path))
 | 
			
		||||
            for url in queue
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        # Prevent coroutines infection
 | 
			
		||||
        asyncio.run(fiber(tasks))
 | 
			
		||||
        asyncio.run(fiber(coroutines))
 | 
			
		||||
 | 
			
		||||
        return True
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,7 @@ import os
 | 
			
		||||
import zipfile
 | 
			
		||||
import shutil
 | 
			
		||||
 | 
			
		||||
import httpx
 | 
			
		||||
import requests
 | 
			
		||||
import sqlite3
 | 
			
		||||
import urllib.parse
 | 
			
		||||
@@ -32,8 +33,28 @@ def request(method, url, **kwargs):
 | 
			
		||||
    return getattr(session, method)(url, verify=False, **kwargs)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def async_request(method, url, proxies = None, **kwargs):
 | 
			
		||||
    headers = {
 | 
			
		||||
        'Referer': constant.LOGIN_URL,
 | 
			
		||||
        'User-Agent': constant.CONFIG['useragent'],
 | 
			
		||||
        'Cookie': constant.CONFIG['cookie'],
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if proxies is None:
 | 
			
		||||
        proxies = constant.CONFIG['proxy']
 | 
			
		||||
 | 
			
		||||
    if proxies.get('http') == '' and proxies.get('https') == '':
 | 
			
		||||
        proxies = None
 | 
			
		||||
 | 
			
		||||
    async with httpx.AsyncClient(headers=headers, verify=False, proxies=proxies, **kwargs) as client:
 | 
			
		||||
        response = await client.request(method, url, **kwargs)
 | 
			
		||||
 | 
			
		||||
    return response
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def check_cookie():
 | 
			
		||||
    response = request('get', constant.BASE_URL)
 | 
			
		||||
 | 
			
		||||
    if response.status_code == 403 and 'Just a moment...' in response.text:
 | 
			
		||||
        logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
 
 | 
			
		||||
@@ -20,7 +20,7 @@ class TestDownload(unittest.TestCase):
 | 
			
		||||
    def test_download(self):
 | 
			
		||||
        did = 440546
 | 
			
		||||
        info = Doujinshi(**doujinshi_parser(did), name_format='%i')
 | 
			
		||||
        info.downloader = Downloader(path='/tmp', size=5)
 | 
			
		||||
        info.downloader = Downloader(path='/tmp', threads=5)
 | 
			
		||||
        info.download()
 | 
			
		||||
 | 
			
		||||
        self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user