remove debug print

2025-07-01 16:09:28 +02:00 · 2024-11-09 11:51:03 +08:00
12 changed files with 83 additions and 234 deletions
--- a/README.rst
+++ b/README.rst
@ -140,7 +140,6 @@ Format output doujinshi folder name:
 Supported doujinshi folder formatter:

 - %i: Doujinshi id
- %f: Doujinshi favorite count
 - %t: Doujinshi name
 - %s: Doujinshi subtitle (translated name)
 - %a: Doujinshi authors' name
--- a/nhentai/init.py
+++ b/nhentai/init.py
@ -1,3 +1,3 @@
-__version__ = '0.5.15'
+__version__ = '0.5.12'
 __author__ = 'RicterZ'
 __email__ = 'ricterzheng@gmail.com'
--- a/nhentai/command.py
+++ b/nhentai/command.py
@ -77,7 +77,7 @@ def main():
        doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))

    if not options.is_show:
-        downloader = Downloader(path=options.output_dir, threads=options.threads,
+        downloader = Downloader(path=options.output_dir, size=options.threads,
                                timeout=options.timeout, delay=options.delay)

        for doujinshi_id in doujinshi_ids:
--- a/nhentai/doujinshi.py
+++ b/nhentai/doujinshi.py
@ -29,12 +29,11 @@ class DoujinshiInfo(dict):


 class Doujinshi(object):
-    def __init__(self, name=None, pretty_name=None, id=None, favorite_counts=0, img_id=None,
+    def __init__(self, name=None, pretty_name=None, id=None, img_id=None,
                 ext='', pages=0, name_format='[%i][%a][%t]', **kwargs):
        self.name = name
        self.pretty_name = pretty_name
        self.id = id
-        self.favorite_counts = favorite_counts
        self.img_id = img_id
        self.ext = ext
        self.pages = pages
@ -46,7 +45,6 @@ class Doujinshi(object):
        name_format = name_format.replace('%ag', format_filename(ag_value))

        name_format = name_format.replace('%i', format_filename(str(self.id)))
-        name_format = name_format.replace('%f', format_filename(str(self.favorite_counts)))
        name_format = name_format.replace('%a', format_filename(self.info.artists))
        name_format = name_format.replace('%g', format_filename(self.info.groups))

@ -65,7 +63,6 @@ class Doujinshi(object):
            ['Groups', self.info.groups],
            ['Languages', self.info.languages],
            ['Tags', self.info.tags],
-            ['Favorite Counts', self.info.favorite_counts],
            ['URL', self.url],
            ['Pages', self.pages],
        ]
--- a/nhentai/downloader.py
+++ b/nhentai/downloader.py
@ -1,17 +1,24 @@
 # coding: utf-

+import multiprocessing
+import signal
+
+import sys
 import os
-import asyncio
-import httpx
+import requests
+import time
 import urllib3.exceptions

 from urllib.parse import urlparse
 from nhentai import constant
 from nhentai.logger import logger
-from nhentai.utils import Singleton, async_request
+from nhentai.parser import request
+from nhentai.utils import Singleton


 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+semaphore = multiprocessing.Semaphore(1)
+

 class NHentaiImageNotExistException(Exception):
    pass
@ -32,68 +39,64 @@ def download_callback(result):
        logger.log(16, f'{data} downloaded successfully')


-
 class Downloader(Singleton):
-    def __init__(self, path='', threads=5, timeout=30, delay=0):
-        self.threads = threads
+
+    def __init__(self, path='', size=5, timeout=30, delay=0):
+        self.size = size
        self.path = str(path)
        self.timeout = timeout
        self.delay = delay

-    async def fiber(self, tasks):
-        self.semaphore = asyncio.Semaphore(self.threads)
-        for completed_task in asyncio.as_completed(tasks):
-            try:
-                result = await completed_task
-                logger.info(f'{result[1]} download completed')
-            except Exception as e:
-                logger.error(f'An error occurred: {e}')
-
-
-    async def _semaphore_download(self, *args, **kwargs):
-        async with self.semaphore:
-            return await self.download(*args, **kwargs)
-
-    async def download(self, url, folder='', filename='', retried=0, proxy=None):
-        logger.info(f'Starting to download {url} ...')
-
+    def download(self, url, folder='', filename='', retried=0, proxy=None):
        if self.delay:
-            await asyncio.sleep(self.delay)
-
+            time.sleep(self.delay)
+        logger.info(f'Starting to download {url} ...')
        filename = filename if filename else os.path.basename(urlparse(url).path)
+        base_filename, extension = os.path.splitext(filename)

-        save_file_path = os.path.join(self.folder, filename)
-
+        save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
        try:
            if os.path.exists(save_file_path):
                logger.warning(f'Skipped download: {save_file_path} already exists')
                return 1, url

-            response = await async_request('GET', url, timeout=self.timeout, proxies=proxy)
+            response = None
+            with open(save_file_path, "wb") as f:
+                i = 0
+                while i < 10:
+                    try:
+                        response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy)
+                        if response.status_code != 200:
+                            path = urlparse(url).path
+                            for mirror in constant.IMAGE_URL_MIRRORS:
+                                # print(f'{mirror}{path}')
+                                mirror_url = f'{mirror}{path}'
+                                response = request('get', mirror_url, stream=True,
+                                                   timeout=self.timeout, proxies=proxy)
+                                if response.status_code == 200:
+                                    break

-            if response.status_code != 200:
-                path = urlparse(url).path
-                for mirror in constant.IMAGE_URL_MIRRORS:
-                    logger.info(f"Try mirror: {mirror}{path}")
-                    mirror_url = f'{mirror}{path}'
-                    response = await async_request('GET', mirror_url, timeout=self.timeout, proxies=proxy)
-                    if response.status_code == 200:
-                        break
+                    except Exception as e:
+                        i += 1
+                        if not i < 10:
+                            logger.critical(str(e))
+                            return 0, None
+                        continue

-            if not await self.save(filename, response):
-                logger.error(f'Can not download image {url}')
-                return 1, None
+                    break

-        except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
+                length = response.headers.get('content-length')
+                if length is None:
+                    f.write(response.content)
+                else:
+                    for chunk in response.iter_content(2048):
+                        f.write(chunk)
+
+        except (requests.HTTPError, requests.Timeout) as e:
            if retried < 3:
-                logger.info(f'Download {filename} failed, retrying({retried + 1}) times...')
-                return await self.download(
-                    url=url,
-                    folder=folder,
-                    filename=filename,
-                    retried=retried + 1,
-                    proxy=proxy,
-                )
+                logger.warning(f'Warning: {e}, retrying({retried}) ...')
+                return 0, self.download(url=url, folder=folder, filename=filename,
+                                        retried=retried+1, proxy=proxy)
            else:
                return 0, None

@ -103,8 +106,6 @@ class Downloader(Singleton):

        except Exception as e:
            import traceback
-
-            logger.error(f"Exception type: {type(e)}")
            traceback.print_stack()
            logger.critical(str(e))
            return 0, None
@ -114,24 +115,8 @@ class Downloader(Singleton):

        return 1, url

-    async def save(self, save_file_path, response) -> bool:
-        if response is None:
-            logger.error('Error: Response is None')
-            return False
-        save_file_path = os.path.join(self.folder, save_file_path)
-        with open(save_file_path, 'wb') as f:
-            if response is not None:
-                length = response.headers.get('content-length')
-                if length is None:
-                    f.write(response.content)
-                else:
-                    async for chunk in response.aiter_bytes(2048):
-                        f.write(chunk)
-        return True
-
-
    def start_download(self, queue, folder='') -> bool:
-        if not isinstance(folder, (str,)):
+        if not isinstance(folder, (str, )):
            folder = str(folder)

        if self.path:
@ -143,19 +128,34 @@ class Downloader(Singleton):
                os.makedirs(folder)
            except EnvironmentError as e:
                logger.critical(str(e))
-        self.folder = folder

        if os.getenv('DEBUG', None) == 'NODOWNLOAD':
            # Assuming we want to continue with rest of process.
            return True
+        queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]

+        pool = multiprocessing.Pool(self.size, init_worker)
+        [pool.apply_async(download_wrapper, args=item) for item in queue]

-        coroutines = [
-            self._semaphore_download(url, filename=os.path.basename(urlparse(url).path))
-            for url in queue
-        ]
-
-        # Prevent coroutines infection
-        asyncio.run(self.fiber(coroutines))
+        pool.close()
+        pool.join()

        return True
+
+
+def download_wrapper(obj, url, folder='', proxy=None):
+    if sys.platform == 'darwin' or semaphore.get_value():
+        return Downloader.download(obj, url=url, folder=folder, proxy=proxy)
+    else:
+        return -3, None
+
+
+def init_worker():
+    signal.signal(signal.SIGINT, subprocess_signal)
+
+
+def subprocess_signal(sig, frame):
+    if semaphore.acquire(timeout=1):
+        logger.warning('Ctrl-C pressed, exiting sub processes ...')
+
+    raise KeyboardInterrupt
--- a/nhentai/parser.py
+++ b/nhentai/parser.py
@ -142,11 +142,10 @@ def doujinshi_parser(id_, counter=0):
    title = doujinshi_info.find('h1').text
    pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
    subtitle = doujinshi_info.find('h2')
-    favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count').text.strip()
+
    doujinshi['name'] = title
    doujinshi['pretty_name'] = pretty_name
    doujinshi['subtitle'] = subtitle.text if subtitle else ''
-    doujinshi['favorite_counts'] = favorite_counts

    doujinshi_cover = html.find('div', attrs={'id': 'cover'})
    img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
--- a/nhentai/serializer.py
+++ b/nhentai/serializer.py
@ -8,8 +8,6 @@ from nhentai.constant import LANGUAGE_ISO
 def serialize_json(doujinshi, output_dir):
    metadata = {'title': doujinshi.name,
                'subtitle': doujinshi.info.subtitle}
-    if doujinshi.info.favorite_counts:
-        metadata['favorite_counts'] = doujinshi.favorite_counts
    if doujinshi.info.date:
        metadata['upload_date'] = doujinshi.info.date
    if doujinshi.info.parodies:
@ -46,7 +44,6 @@ def serialize_comic_xml(doujinshi, output_dir):
        xml_write_simple_tag(f, 'PageCount', doujinshi.pages)
        xml_write_simple_tag(f, 'URL', doujinshi.url)
        xml_write_simple_tag(f, 'NhentaiId', doujinshi.id)
-        xml_write_simple_tag(f, 'Favorites', doujinshi.favorite_counts)
        xml_write_simple_tag(f, 'Genre', doujinshi.info.categories)

        xml_write_simple_tag(f, 'BlackAndWhite', 'No' if doujinshi.info.tags and
--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@ -5,9 +5,7 @@ import re
 import os
 import zipfile
 import shutil
-import copy

-import httpx
 import requests
 import sqlite3
 import urllib.parse
@ -34,32 +32,8 @@ def request(method, url, **kwargs):
    return getattr(session, method)(url, verify=False, **kwargs)


-async def async_request(method, url, proxies = None, **kwargs):
-    headers = {
-        'Referer': constant.LOGIN_URL,
-        'User-Agent': constant.CONFIG['useragent'],
-        'Cookie': constant.CONFIG['cookie'],
-    }
-
-    if proxies is None:
-        proxies = constant.CONFIG['proxy']
-
-    if proxies.get('http') == '' and proxies.get('https') == '':
-        proxies = None
-
-    if proxies:
-        _proxies = {f'{k}://': v for k, v in proxies.items() if v}
-        proxies = _proxies
-
-    async with httpx.AsyncClient(headers=headers, verify=False, proxies=proxies, **kwargs) as client:
-        response = await client.request(method, url, **kwargs)
-
-    return response
-
-
 def check_cookie():
    response = request('get', constant.BASE_URL)
-
    if response.status_code == 403 and 'Just a moment...' in response.text:
        logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
        sys.exit(1)
--- a/poetry.lock
+++ b/poetry.lock
@ -1,26 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
-
-[[package]]
-name = "anyio"
-version = "4.5.2"
-description = "High level compatibility layer for multiple asynchronous event loop implementations"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "anyio-4.5.2-py3-none-any.whl", hash = "sha256:c011ee36bc1e8ba40e5a81cb9df91925c218fe9b778554e0b56a21e1b5d4716f"},
-    {file = "anyio-4.5.2.tar.gz", hash = "sha256:23009af4ed04ce05991845451e11ef02fc7c5ed29179ac9a420e5ad0ac7ddc5b"},
-]
-
-[package.dependencies]
-exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
-idna = ">=2.8"
-sniffio = ">=1.1"
-typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
-
-[package.extras]
-doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"]
-trio = ["trio (>=0.26.1)"]
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.

 [[package]]
 name = "beautifulsoup4"
@ -148,77 +126,6 @@ files = [
    {file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"},
 ]

-[[package]]
-name = "exceptiongroup"
-version = "1.2.2"
-description = "Backport of PEP 654 (exception groups)"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
-    {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
-]
-
-[package.extras]
-test = ["pytest (>=6)"]
-
-[[package]]
-name = "h11"
-version = "0.14.0"
-description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
-    {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
-]
-
-[[package]]
-name = "httpcore"
-version = "1.0.7"
-description = "A minimal low-level HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
-    {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
-]
-
-[package.dependencies]
-certifi = "*"
-h11 = ">=0.13,<0.15"
-
-[package.extras]
-asyncio = ["anyio (>=4.0,<5.0)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<1.0)"]
-
-[[package]]
-name = "httpx"
-version = "0.27.2"
-description = "The next generation HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
-    {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
-]
-
-[package.dependencies]
-anyio = "*"
-certifi = "*"
-httpcore = "==1.*"
-idna = "*"
-sniffio = "*"
-
-[package.extras]
-brotli = ["brotli", "brotlicffi"]
-cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-zstd = ["zstandard (>=0.18.0)"]
-
 [[package]]
 name = "idna"
 version = "3.7"
@ -262,17 +169,6 @@ urllib3 = ">=1.21.1,<3"
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]
 use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]

-[[package]]
-name = "sniffio"
-version = "1.3.1"
-description = "Sniff out which async library your code is running under"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
-    {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
-]
-
 [[package]]
 name = "soupsieve"
 version = "2.4"
@ -298,17 +194,6 @@ files = [
 [package.extras]
 widechars = ["wcwidth"]

-[[package]]
-name = "typing-extensions"
-version = "4.12.2"
-description = "Backported and Experimental Type Hints for Python 3.8+"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
-    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
-]
-
 [[package]]
 name = "urllib3"
 version = "1.26.19"
@ -328,4 +213,4 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.8"
-content-hash = "a69dbf5dcfd6dcc5afc0fd2de4ab153841f7d210d4be60c426e332e36a79d679"
+content-hash = "0a1d5abd47a669c7a1f2dc7b43824a449e29ba94908a4338d2ea0f2dfb4f805e"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "nhentai"
-version = "0.5.15"
+version = "0.5.12"
 description = "nhentai doujinshi downloader"
 authors = ["Ricter Z <ricterzheng@gmail.com>"]
 license = "MIT"
@ -14,7 +14,6 @@ beautifulsoup4 = "^4.11.2"
 tabulate = "^0.9.0"
 iso8601 = "^1.1.0"
 urllib3 = "^1.26.14"
-httpx = "0.27.2"


 [build-system]
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,3 @@
-httpx==0.27.2
 requests
 soupsieve
 setuptools
--- a/tests/test_download.py
+++ b/tests/test_download.py
@ -20,7 +20,7 @@ class TestDownload(unittest.TestCase):
    def test_download(self):
        did = 440546
        info = Doujinshi(**doujinshi_parser(did), name_format='%i')
-        info.downloader = Downloader(path='/tmp', threads=5)
+        info.downloader = Downloader(path='/tmp', size=5)
        info.download()

        self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))