mirror of
				https://github.com/RicterZ/nhentai.git
				synced 2025-11-04 02:50:55 +01:00 
			
		
		
		
	Merge pull request #354 from normalizedwater546/master
asyncio: fix downloader being run sequentially + httpx: fix proxy and missing headers
This commit is contained in:
		@@ -77,7 +77,7 @@ def main():
 | 
				
			|||||||
        doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
 | 
					        doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not options.is_show:
 | 
					    if not options.is_show:
 | 
				
			||||||
        downloader = Downloader(path=options.output_dir, size=options.threads,
 | 
					        downloader = Downloader(path=options.output_dir, threads=options.threads,
 | 
				
			||||||
                                timeout=options.timeout, delay=options.delay)
 | 
					                                timeout=options.timeout, delay=options.delay)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for doujinshi_id in doujinshi_ids:
 | 
					        for doujinshi_id in doujinshi_ids:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,22 +1,17 @@
 | 
				
			|||||||
# coding: utf-
 | 
					# coding: utf-
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import multiprocessing
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import time
 | 
					import asyncio
 | 
				
			||||||
 | 
					import httpx
 | 
				
			||||||
import urllib3.exceptions
 | 
					import urllib3.exceptions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from urllib.parse import urlparse
 | 
					from urllib.parse import urlparse
 | 
				
			||||||
from nhentai import constant
 | 
					from nhentai import constant
 | 
				
			||||||
from nhentai.logger import logger
 | 
					from nhentai.logger import logger
 | 
				
			||||||
from nhentai.utils import Singleton
 | 
					from nhentai.utils import Singleton, async_request
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import asyncio
 | 
					 | 
				
			||||||
import httpx
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 | 
					urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 | 
				
			||||||
semaphore = multiprocessing.Semaphore(1)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class NHentaiImageNotExistException(Exception):
 | 
					class NHentaiImageNotExistException(Exception):
 | 
				
			||||||
    pass
 | 
					    pass
 | 
				
			||||||
@@ -37,17 +32,32 @@ def download_callback(result):
 | 
				
			|||||||
        logger.log(16, f'{data} downloaded successfully')
 | 
					        logger.log(16, f'{data} downloaded successfully')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					async def fiber(tasks):
 | 
				
			||||||
 | 
					    for completed_task in asyncio.as_completed(tasks):
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            result = await completed_task
 | 
				
			||||||
 | 
					            logger.info(f'{result[1]} download completed')
 | 
				
			||||||
 | 
					        except Exception as e:
 | 
				
			||||||
 | 
					            logger.error(f'An error occurred: {e}')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Downloader(Singleton):
 | 
					class Downloader(Singleton):
 | 
				
			||||||
    def __init__(self, path='', size=5, timeout=30, delay=0):
 | 
					    def __init__(self, path='', threads=5, timeout=30, delay=0):
 | 
				
			||||||
        self.size = size
 | 
					        self.threads = threads
 | 
				
			||||||
        self.path = str(path)
 | 
					        self.path = str(path)
 | 
				
			||||||
        self.timeout = timeout
 | 
					        self.timeout = timeout
 | 
				
			||||||
        self.delay = delay
 | 
					        self.delay = delay
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    async def _semaphore_download(self, semaphore, *args, **kwargs):
 | 
				
			||||||
 | 
					        async with semaphore:
 | 
				
			||||||
 | 
					            return await self.download(*args, **kwargs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    async def download(self, url, folder='', filename='', retried=0, proxy=None):
 | 
					    async def download(self, url, folder='', filename='', retried=0, proxy=None):
 | 
				
			||||||
        if self.delay:
 | 
					 | 
				
			||||||
            time.sleep(self.delay)
 | 
					 | 
				
			||||||
        logger.info(f'Starting to download {url} ...')
 | 
					        logger.info(f'Starting to download {url} ...')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if self.delay:
 | 
				
			||||||
 | 
					            await asyncio.sleep(self.delay)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        filename = filename if filename else os.path.basename(urlparse(url).path)
 | 
					        filename = filename if filename else os.path.basename(urlparse(url).path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        save_file_path = os.path.join(self.folder, filename)
 | 
					        save_file_path = os.path.join(self.folder, filename)
 | 
				
			||||||
@@ -57,14 +67,14 @@ class Downloader(Singleton):
 | 
				
			|||||||
                logger.warning(f'Skipped download: {save_file_path} already exists')
 | 
					                logger.warning(f'Skipped download: {save_file_path} already exists')
 | 
				
			||||||
                return 1, url
 | 
					                return 1, url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            response = await self.async_request(url, self.timeout)  # TODO: Add proxy
 | 
					            response = await async_request('GET', url, timeout=self.timeout, proxies=proxy)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if response.status_code != 200:
 | 
					            if response.status_code != 200:
 | 
				
			||||||
                path = urlparse(url).path
 | 
					                path = urlparse(url).path
 | 
				
			||||||
                for mirror in constant.IMAGE_URL_MIRRORS:
 | 
					                for mirror in constant.IMAGE_URL_MIRRORS:
 | 
				
			||||||
                    logger.info(f"Try mirror: {mirror}{path}")
 | 
					                    logger.info(f"Try mirror: {mirror}{path}")
 | 
				
			||||||
                    mirror_url = f'{mirror}{path}'
 | 
					                    mirror_url = f'{mirror}{path}'
 | 
				
			||||||
                    response = await self.async_request(mirror_url, self.timeout)
 | 
					                    response = await async_request('GET', mirror_url, timeout=self.timeout, proxies=proxy)
 | 
				
			||||||
                    if response.status_code == 200:
 | 
					                    if response.status_code == 200:
 | 
				
			||||||
                        break
 | 
					                        break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -117,12 +127,8 @@ class Downloader(Singleton):
 | 
				
			|||||||
                        f.write(chunk)
 | 
					                        f.write(chunk)
 | 
				
			||||||
        return True
 | 
					        return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    async def async_request(self, url, timeout):
 | 
					 | 
				
			||||||
        async with httpx.AsyncClient() as client:
 | 
					 | 
				
			||||||
            return await client.get(url, timeout=timeout)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def start_download(self, queue, folder='') -> bool:
 | 
					    def start_download(self, queue, folder='') -> bool:
 | 
				
			||||||
        logger.warning("Proxy temporarily unavailable, it will be fixed later. ")
 | 
					 | 
				
			||||||
        if not isinstance(folder, (str,)):
 | 
					        if not isinstance(folder, (str,)):
 | 
				
			||||||
            folder = str(folder)
 | 
					            folder = str(folder)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -141,19 +147,14 @@ class Downloader(Singleton):
 | 
				
			|||||||
            # Assuming we want to continue with rest of process.
 | 
					            # Assuming we want to continue with rest of process.
 | 
				
			||||||
            return True
 | 
					            return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        async def fiber(tasks):
 | 
					        semaphore = asyncio.Semaphore(self.threads)
 | 
				
			||||||
            for completed_task in asyncio.as_completed(tasks):
 | 
					 | 
				
			||||||
                try:
 | 
					 | 
				
			||||||
                    result = await completed_task
 | 
					 | 
				
			||||||
                    logger.info(f'{result[1]} download completed')
 | 
					 | 
				
			||||||
                except Exception as e:
 | 
					 | 
				
			||||||
                    logger.error(f'An error occurred: {e}')
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        tasks = [
 | 
					        coroutines = [
 | 
				
			||||||
            self.download(url, filename=os.path.basename(urlparse(url).path))
 | 
					            self._semaphore_download(semaphore, url, filename=os.path.basename(urlparse(url).path))
 | 
				
			||||||
            for url in queue
 | 
					            for url in queue
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Prevent coroutines infection
 | 
					        # Prevent coroutines infection
 | 
				
			||||||
        asyncio.run(fiber(tasks))
 | 
					        asyncio.run(fiber(coroutines))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return True
 | 
					        return True
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -6,6 +6,7 @@ import os
 | 
				
			|||||||
import zipfile
 | 
					import zipfile
 | 
				
			||||||
import shutil
 | 
					import shutil
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import httpx
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
import sqlite3
 | 
					import sqlite3
 | 
				
			||||||
import urllib.parse
 | 
					import urllib.parse
 | 
				
			||||||
@@ -32,8 +33,28 @@ def request(method, url, **kwargs):
 | 
				
			|||||||
    return getattr(session, method)(url, verify=False, **kwargs)
 | 
					    return getattr(session, method)(url, verify=False, **kwargs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					async def async_request(method, url, proxies = None, **kwargs):
 | 
				
			||||||
 | 
					    headers = {
 | 
				
			||||||
 | 
					        'Referer': constant.LOGIN_URL,
 | 
				
			||||||
 | 
					        'User-Agent': constant.CONFIG['useragent'],
 | 
				
			||||||
 | 
					        'Cookie': constant.CONFIG['cookie'],
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if proxies is None:
 | 
				
			||||||
 | 
					        proxies = constant.CONFIG['proxy']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if proxies.get('http') == '' and proxies.get('https') == '':
 | 
				
			||||||
 | 
					        proxies = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    async with httpx.AsyncClient(headers=headers, verify=False, proxies=proxies, **kwargs) as client:
 | 
				
			||||||
 | 
					        response = await client.request(method, url, **kwargs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return response
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def check_cookie():
 | 
					def check_cookie():
 | 
				
			||||||
    response = request('get', constant.BASE_URL)
 | 
					    response = request('get', constant.BASE_URL)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if response.status_code == 403 and 'Just a moment...' in response.text:
 | 
					    if response.status_code == 403 and 'Just a moment...' in response.text:
 | 
				
			||||||
        logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
 | 
					        logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
 | 
				
			||||||
        sys.exit(1)
 | 
					        sys.exit(1)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -20,7 +20,7 @@ class TestDownload(unittest.TestCase):
 | 
				
			|||||||
    def test_download(self):
 | 
					    def test_download(self):
 | 
				
			||||||
        did = 440546
 | 
					        did = 440546
 | 
				
			||||||
        info = Doujinshi(**doujinshi_parser(did), name_format='%i')
 | 
					        info = Doujinshi(**doujinshi_parser(did), name_format='%i')
 | 
				
			||||||
        info.downloader = Downloader(path='/tmp', size=5)
 | 
					        info.downloader = Downloader(path='/tmp', threads=5)
 | 
				
			||||||
        info.download()
 | 
					        info.download()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
 | 
					        self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user