mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-07-01 16:09:28 +02:00
Compare commits
32 Commits
Author | SHA1 | Date | |
---|---|---|---|
3d6263cf11 | |||
e3410f5a9a | |||
feb7f45533 | |||
0754caaeb7 | |||
49e5a3094a | |||
c044b64beb | |||
f8334c09b5 | |||
c90c486fb4 | |||
90b17832cc | |||
14c6db9cc3 | |||
f30ff59b2b | |||
1504ee779f | |||
98d9eecf6d | |||
e16e623b9d | |||
c3f3182df3 | |||
12aad842f8 | |||
f9f76ab0f5 | |||
744a9e4418 | |||
c3e9fff491 | |||
a84e2c5714 | |||
c814c35c50 | |||
e2f71437e2 | |||
2fa45ae4df | |||
17bc33c6cb | |||
09bb8460f6 | |||
eb5b93d654 | |||
cb6cf6df1a | |||
98a66a3cb0 | |||
02d47632cf | |||
f932b1fbbe | |||
fd9e92f9d4 | |||
a8a48c6ce7 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -8,3 +8,4 @@ dist/
|
|||||||
output/
|
output/
|
||||||
venv/
|
venv/
|
||||||
.vscode/
|
.vscode/
|
||||||
|
test-output
|
25
README.rst
25
README.rst
@ -161,25 +161,21 @@ Other options:
|
|||||||
NHENTAI nhentai mirror url
|
NHENTAI nhentai mirror url
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
# Operation options, control the program behaviors
|
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
-D, --download download doujinshi (for search results)
|
-D, --download download doujinshi (for search results)
|
||||||
-S, --show just show the doujinshi information
|
-S, --show just show the doujinshi information
|
||||||
|
|
||||||
# Doujinshi options, specify id, keyword, etc.
|
|
||||||
--id doujinshi ids set, e.g. 167680 167681 167682
|
--id doujinshi ids set, e.g. 167680 167681 167682
|
||||||
-s KEYWORD, --search=KEYWORD
|
-s KEYWORD, --search=KEYWORD
|
||||||
search doujinshi by keyword
|
search doujinshi by keyword
|
||||||
-F, --favorites list or download your favorites
|
-F, --favorites list or download your favorites
|
||||||
|
-a ARTIST, --artist=ARTIST
|
||||||
# Page options, control the page to fetch / download
|
list doujinshi by artist name
|
||||||
--page-all all search results
|
--page-all all search results
|
||||||
--page=PAGE, --page-range=PAGE
|
--page=PAGE, --page-range=PAGE
|
||||||
page number of search results. e.g. 1,2-5,14
|
page number of search results. e.g. 1,2-5,14
|
||||||
--sorting=SORTING sorting of doujinshi (recent / popular /
|
--sorting=SORTING, --sort=SORTING
|
||||||
|
sorting of doujinshi (recent / popular /
|
||||||
popular-[today|week])
|
popular-[today|week])
|
||||||
|
|
||||||
# Download options, the output directory, threads, timeout, delay, etc.
|
|
||||||
-o OUTPUT_DIR, --output=OUTPUT_DIR
|
-o OUTPUT_DIR, --output=OUTPUT_DIR
|
||||||
output dir
|
output dir
|
||||||
-t THREADS, --threads=THREADS
|
-t THREADS, --threads=THREADS
|
||||||
@ -192,8 +188,6 @@ Other options:
|
|||||||
-f FILE, --file=FILE read gallery IDs from file.
|
-f FILE, --file=FILE read gallery IDs from file.
|
||||||
--format=NAME_FORMAT format the saved folder name
|
--format=NAME_FORMAT format the saved folder name
|
||||||
--dry-run Dry run, skip file download
|
--dry-run Dry run, skip file download
|
||||||
|
|
||||||
# Generate options, for generate html viewer, cbz file, pdf file, etc
|
|
||||||
--html generate a html viewer at current directory
|
--html generate a html viewer at current directory
|
||||||
--no-html don't generate HTML after downloading
|
--no-html don't generate HTML after downloading
|
||||||
--gen-main generate a main viewer contain all the doujin in the
|
--gen-main generate a main viewer contain all the doujin in the
|
||||||
@ -202,12 +196,10 @@ Other options:
|
|||||||
-P, --pdf generate PDF file
|
-P, --pdf generate PDF file
|
||||||
--rm-origin-dir remove downloaded doujinshi dir when generated CBZ or
|
--rm-origin-dir remove downloaded doujinshi dir when generated CBZ or
|
||||||
PDF file
|
PDF file
|
||||||
--move-to-folder remove files in doujinshi dir then move new file to folder
|
--move-to-folder remove files in doujinshi dir then move new file to
|
||||||
when generated CBZ or PDF file
|
folder when generated CBZ or PDF file
|
||||||
--meta generate a metadata file in doujinshi format
|
--meta generate a metadata file in doujinshi format
|
||||||
--regenerate-cbz regenerate the cbz file if exists
|
--regenerate regenerate the cbz or pdf file if exists
|
||||||
|
|
||||||
# nhentai options, set cookie, user-agent, language, remove caches, histories, etc
|
|
||||||
--cookie=COOKIE set cookie of nhentai to bypass Cloudflare captcha
|
--cookie=COOKIE set cookie of nhentai to bypass Cloudflare captcha
|
||||||
--useragent=USERAGENT, --user-agent=USERAGENT
|
--useragent=USERAGENT, --user-agent=USERAGENT
|
||||||
set useragent to bypass Cloudflare captcha
|
set useragent to bypass Cloudflare captcha
|
||||||
@ -231,6 +223,9 @@ For example:
|
|||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
i.h.loli.club -> i.nhentai.net
|
i.h.loli.club -> i.nhentai.net
|
||||||
|
i3.h.loli.club -> i3.nhentai.net
|
||||||
|
i5.h.loli.club -> i5.nhentai.net
|
||||||
|
i7.h.loli.club -> i7.nhentai.net
|
||||||
h.loli.club -> nhentai.net
|
h.loli.club -> nhentai.net
|
||||||
|
|
||||||
Set `NHENTAI` env var to your nhentai mirror.
|
Set `NHENTAI` env var to your nhentai mirror.
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
__version__ = '0.5.8'
|
__version__ = '0.5.12'
|
||||||
__author__ = 'RicterZ'
|
__author__ = 'RicterZ'
|
||||||
__email__ = 'ricterzheng@gmail.com'
|
__email__ = 'ricterzheng@gmail.com'
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
import signal
|
import signal
|
||||||
import platform
|
import platform
|
||||||
@ -12,7 +14,7 @@ from nhentai.downloader import Downloader
|
|||||||
from nhentai.logger import logger
|
from nhentai.logger import logger
|
||||||
from nhentai.constant import BASE_URL
|
from nhentai.constant import BASE_URL
|
||||||
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
|
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
|
||||||
paging, check_cookie, signal_handler, DB
|
paging, check_cookie, signal_handler, DB, move_to_folder
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -75,7 +77,7 @@ def main():
|
|||||||
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
|
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
|
||||||
|
|
||||||
if not options.is_show:
|
if not options.is_show:
|
||||||
downloader = Downloader(path=options.output_dir, size=options.threads,
|
downloader = Downloader(path=options.output_dir, threads=options.threads,
|
||||||
timeout=options.timeout, delay=options.delay)
|
timeout=options.timeout, delay=options.delay)
|
||||||
|
|
||||||
for doujinshi_id in doujinshi_ids:
|
for doujinshi_id in doujinshi_ids:
|
||||||
@ -92,6 +94,7 @@ def main():
|
|||||||
doujinshi.download()
|
doujinshi.download()
|
||||||
else:
|
else:
|
||||||
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
|
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
|
||||||
|
continue
|
||||||
|
|
||||||
if options.generate_metadata:
|
if options.generate_metadata:
|
||||||
generate_metadata_file(options.output_dir, doujinshi)
|
generate_metadata_file(options.output_dir, doujinshi)
|
||||||
@ -104,12 +107,22 @@ def main():
|
|||||||
generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
|
generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
|
||||||
|
|
||||||
if options.is_cbz:
|
if options.is_cbz:
|
||||||
generate_doc('cbz', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
|
generate_doc('cbz', options.output_dir, doujinshi, options.regenerate)
|
||||||
options.regenerate)
|
|
||||||
|
|
||||||
if options.is_pdf:
|
if options.is_pdf:
|
||||||
generate_doc('pdf', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
|
generate_doc('pdf', options.output_dir, doujinshi, options.regenerate)
|
||||||
options.regenerate)
|
|
||||||
|
if options.move_to_folder:
|
||||||
|
if options.is_cbz:
|
||||||
|
move_to_folder(options.output_dir, doujinshi, 'cbz')
|
||||||
|
if options.is_pdf:
|
||||||
|
move_to_folder(options.output_dir, doujinshi, 'pdf')
|
||||||
|
|
||||||
|
if options.rm_origin_dir:
|
||||||
|
if options.move_to_folder:
|
||||||
|
logger.critical('You specified both --move-to-folder and --rm-origin-dir options, '
|
||||||
|
'you will not get anything :(')
|
||||||
|
shutil.rmtree(os.path.join(options.output_dir, doujinshi.filename), ignore_errors=True)
|
||||||
|
|
||||||
if options.main_viewer:
|
if options.main_viewer:
|
||||||
generate_main_html(options.output_dir)
|
generate_main_html(options.output_dir)
|
||||||
|
@ -38,9 +38,9 @@ FAV_URL = f'{BASE_URL}/favorites/'
|
|||||||
|
|
||||||
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
|
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
|
||||||
IMAGE_URL_MIRRORS = [
|
IMAGE_URL_MIRRORS = [
|
||||||
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}'
|
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
|
||||||
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}'
|
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
|
||||||
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}'
|
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
|
||||||
]
|
]
|
||||||
|
|
||||||
NHENTAI_HOME = get_nhentai_home()
|
NHENTAI_HOME = get_nhentai_home()
|
||||||
|
@ -12,6 +12,7 @@ EXT_MAP = {
|
|||||||
'j': 'jpg',
|
'j': 'jpg',
|
||||||
'p': 'png',
|
'p': 'png',
|
||||||
'g': 'gif',
|
'g': 'gif',
|
||||||
|
'w': 'webp',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -75,26 +76,28 @@ class Doujinshi(object):
|
|||||||
def check_if_need_download(self, options):
|
def check_if_need_download(self, options):
|
||||||
base_path = os.path.join(self.downloader.path, self.filename)
|
base_path = os.path.join(self.downloader.path, self.filename)
|
||||||
|
|
||||||
# doujinshi directory is not exist, we need to download definitely
|
# regenerate, re-download
|
||||||
if not (os.path.exists(base_path) and os.path.isdir(base_path)):
|
|
||||||
return True
|
|
||||||
|
|
||||||
# regenerate, we need to re-download from nhentai
|
|
||||||
if options.regenerate:
|
if options.regenerate:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
# pdf or cbz file exists, skip re-download
|
||||||
|
# doujinshi directory may not exist b/c of --rm-origin-dir option set.
|
||||||
|
# user should pass --regenerate option to get back origin dir.
|
||||||
|
ret_pdf = ret_cbz = None
|
||||||
if options.is_pdf:
|
if options.is_pdf:
|
||||||
file_ext = 'pdf'
|
ret_pdf = os.path.exists(f'{base_path}.pdf') or os.path.exists(f'{base_path}/{self.filename}.pdf')
|
||||||
elif options.is_cbz:
|
|
||||||
file_ext = 'cbz'
|
|
||||||
else:
|
|
||||||
# re-download
|
|
||||||
return True
|
|
||||||
|
|
||||||
# pdf or cbz file exists, we needn't to re-download it
|
if options.is_cbz:
|
||||||
if os.path.exists(f'{base_path}.{file_ext}') or os.path.exists(f'{base_path}/{self.filename}.{file_ext}'):
|
ret_cbz = os.path.exists(f'{base_path}.cbz') or os.path.exists(f'{base_path}/{self.filename}.cbz')
|
||||||
|
|
||||||
|
ret = list(filter(lambda s: s is not None, [ret_cbz, ret_pdf]))
|
||||||
|
if ret and all(ret):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# doujinshi directory doesn't exist, re-download
|
||||||
|
if not (os.path.exists(base_path) and os.path.isdir(base_path)):
|
||||||
|
return True
|
||||||
|
|
||||||
# fallback
|
# fallback
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -1,24 +1,17 @@
|
|||||||
# coding: utf-
|
# coding: utf-
|
||||||
|
|
||||||
import multiprocessing
|
|
||||||
import signal
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import os
|
import os
|
||||||
import requests
|
import asyncio
|
||||||
import time
|
import httpx
|
||||||
import urllib3.exceptions
|
import urllib3.exceptions
|
||||||
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from nhentai import constant
|
from nhentai import constant
|
||||||
from nhentai.logger import logger
|
from nhentai.logger import logger
|
||||||
from nhentai.parser import request
|
from nhentai.utils import Singleton, async_request
|
||||||
from nhentai.utils import Singleton
|
|
||||||
|
|
||||||
|
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
semaphore = multiprocessing.Semaphore(1)
|
|
||||||
|
|
||||||
|
|
||||||
class NHentaiImageNotExistException(Exception):
|
class NHentaiImageNotExistException(Exception):
|
||||||
pass
|
pass
|
||||||
@ -39,64 +32,66 @@ def download_callback(result):
|
|||||||
logger.log(16, f'{data} downloaded successfully')
|
logger.log(16, f'{data} downloaded successfully')
|
||||||
|
|
||||||
|
|
||||||
class Downloader(Singleton):
|
async def fiber(tasks):
|
||||||
|
for completed_task in asyncio.as_completed(tasks):
|
||||||
|
try:
|
||||||
|
result = await completed_task
|
||||||
|
logger.info(f'{result[1]} download completed')
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f'An error occurred: {e}')
|
||||||
|
|
||||||
def __init__(self, path='', size=5, timeout=30, delay=0):
|
|
||||||
self.size = size
|
class Downloader(Singleton):
|
||||||
|
def __init__(self, path='', threads=5, timeout=30, delay=0):
|
||||||
|
self.threads = threads
|
||||||
self.path = str(path)
|
self.path = str(path)
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.delay = delay
|
self.delay = delay
|
||||||
|
|
||||||
def download(self, url, folder='', filename='', retried=0, proxy=None):
|
async def _semaphore_download(self, semaphore, *args, **kwargs):
|
||||||
if self.delay:
|
async with semaphore:
|
||||||
time.sleep(self.delay)
|
return await self.download(*args, **kwargs)
|
||||||
logger.info(f'Starting to download {url} ...')
|
|
||||||
filename = filename if filename else os.path.basename(urlparse(url).path)
|
async def download(self, url, folder='', filename='', retried=0, proxy=None):
|
||||||
base_filename, extension = os.path.splitext(filename)
|
logger.info(f'Starting to download {url} ...')
|
||||||
|
|
||||||
|
if self.delay:
|
||||||
|
await asyncio.sleep(self.delay)
|
||||||
|
|
||||||
|
filename = filename if filename else os.path.basename(urlparse(url).path)
|
||||||
|
|
||||||
|
save_file_path = os.path.join(self.folder, filename)
|
||||||
|
|
||||||
save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
|
|
||||||
try:
|
try:
|
||||||
if os.path.exists(save_file_path):
|
if os.path.exists(save_file_path):
|
||||||
logger.warning(f'Skipped download: {save_file_path} already exists')
|
logger.warning(f'Skipped download: {save_file_path} already exists')
|
||||||
return 1, url
|
return 1, url
|
||||||
|
|
||||||
response = None
|
response = await async_request('GET', url, timeout=self.timeout, proxies=proxy)
|
||||||
with open(save_file_path, "wb") as f:
|
|
||||||
i = 0
|
|
||||||
while i < 10:
|
|
||||||
try:
|
|
||||||
response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy)
|
|
||||||
if response.status_code != 200:
|
|
||||||
path = urlparse(url).path
|
|
||||||
for mirror in constant.IMAGE_URL_MIRRORS:
|
|
||||||
print(f'{mirror}{path}')
|
|
||||||
mirror_url = f'{mirror}{path}'
|
|
||||||
response = request('get', mirror_url, stream=True,
|
|
||||||
timeout=self.timeout, proxies=proxy)
|
|
||||||
if response.status_code == 200:
|
|
||||||
break
|
|
||||||
|
|
||||||
except Exception as e:
|
if response.status_code != 200:
|
||||||
i += 1
|
path = urlparse(url).path
|
||||||
if not i < 10:
|
for mirror in constant.IMAGE_URL_MIRRORS:
|
||||||
logger.critical(str(e))
|
logger.info(f"Try mirror: {mirror}{path}")
|
||||||
return 0, None
|
mirror_url = f'{mirror}{path}'
|
||||||
continue
|
response = await async_request('GET', mirror_url, timeout=self.timeout, proxies=proxy)
|
||||||
|
if response.status_code == 200:
|
||||||
|
break
|
||||||
|
|
||||||
break
|
if not await self.save(filename, response):
|
||||||
|
logger.error(f'Can not download image {url}')
|
||||||
|
return 1, None
|
||||||
|
|
||||||
length = response.headers.get('content-length')
|
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
|
||||||
if length is None:
|
|
||||||
f.write(response.content)
|
|
||||||
else:
|
|
||||||
for chunk in response.iter_content(2048):
|
|
||||||
f.write(chunk)
|
|
||||||
|
|
||||||
except (requests.HTTPError, requests.Timeout) as e:
|
|
||||||
if retried < 3:
|
if retried < 3:
|
||||||
logger.warning(f'Warning: {e}, retrying({retried}) ...')
|
logger.info(f'Download {filename} failed, retrying({retried + 1}) times...')
|
||||||
return 0, self.download(url=url, folder=folder, filename=filename,
|
return await self.download(
|
||||||
retried=retried+1, proxy=proxy)
|
url=url,
|
||||||
|
folder=folder,
|
||||||
|
filename=filename,
|
||||||
|
retried=retried + 1,
|
||||||
|
proxy=proxy,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
return 0, None
|
return 0, None
|
||||||
|
|
||||||
@ -106,6 +101,8 @@ class Downloader(Singleton):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
|
logger.error(f"Exception type: {type(e)}")
|
||||||
traceback.print_stack()
|
traceback.print_stack()
|
||||||
logger.critical(str(e))
|
logger.critical(str(e))
|
||||||
return 0, None
|
return 0, None
|
||||||
@ -115,8 +112,24 @@ class Downloader(Singleton):
|
|||||||
|
|
||||||
return 1, url
|
return 1, url
|
||||||
|
|
||||||
|
async def save(self, save_file_path, response) -> bool:
|
||||||
|
if response is None:
|
||||||
|
logger.error('Error: Response is None')
|
||||||
|
return False
|
||||||
|
save_file_path = os.path.join(self.folder, save_file_path)
|
||||||
|
with open(save_file_path, 'wb') as f:
|
||||||
|
if response is not None:
|
||||||
|
length = response.headers.get('content-length')
|
||||||
|
if length is None:
|
||||||
|
f.write(response.content)
|
||||||
|
else:
|
||||||
|
async for chunk in response.aiter_bytes(2048):
|
||||||
|
f.write(chunk)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def start_download(self, queue, folder='') -> bool:
|
def start_download(self, queue, folder='') -> bool:
|
||||||
if not isinstance(folder, (str, )):
|
if not isinstance(folder, (str,)):
|
||||||
folder = str(folder)
|
folder = str(folder)
|
||||||
|
|
||||||
if self.path:
|
if self.path:
|
||||||
@ -128,34 +141,20 @@ class Downloader(Singleton):
|
|||||||
os.makedirs(folder)
|
os.makedirs(folder)
|
||||||
except EnvironmentError as e:
|
except EnvironmentError as e:
|
||||||
logger.critical(str(e))
|
logger.critical(str(e))
|
||||||
|
self.folder = folder
|
||||||
|
|
||||||
if os.getenv('DEBUG', None) == 'NODOWNLOAD':
|
if os.getenv('DEBUG', None) == 'NODOWNLOAD':
|
||||||
# Assuming we want to continue with rest of process.
|
# Assuming we want to continue with rest of process.
|
||||||
return True
|
return True
|
||||||
queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
|
|
||||||
|
|
||||||
pool = multiprocessing.Pool(self.size, init_worker)
|
semaphore = asyncio.Semaphore(self.threads)
|
||||||
[pool.apply_async(download_wrapper, args=item) for item in queue]
|
|
||||||
|
|
||||||
pool.close()
|
coroutines = [
|
||||||
pool.join()
|
self._semaphore_download(semaphore, url, filename=os.path.basename(urlparse(url).path))
|
||||||
|
for url in queue
|
||||||
|
]
|
||||||
|
|
||||||
|
# Prevent coroutines infection
|
||||||
|
asyncio.run(fiber(coroutines))
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def download_wrapper(obj, url, folder='', proxy=None):
|
|
||||||
if sys.platform == 'darwin' or semaphore.get_value():
|
|
||||||
return Downloader.download(obj, url=url, folder=folder, proxy=proxy)
|
|
||||||
else:
|
|
||||||
return -3, None
|
|
||||||
|
|
||||||
|
|
||||||
def init_worker():
|
|
||||||
signal.signal(signal.SIGINT, subprocess_signal)
|
|
||||||
|
|
||||||
|
|
||||||
def subprocess_signal(sig, frame):
|
|
||||||
if semaphore.acquire(timeout=1):
|
|
||||||
logger.warning('Ctrl-C pressed, exiting sub processes ...')
|
|
||||||
|
|
||||||
raise KeyboardInterrupt
|
|
||||||
|
@ -148,7 +148,7 @@ def doujinshi_parser(id_, counter=0):
|
|||||||
doujinshi['subtitle'] = subtitle.text if subtitle else ''
|
doujinshi['subtitle'] = subtitle.text if subtitle else ''
|
||||||
|
|
||||||
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
|
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
|
||||||
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif)$',
|
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
|
||||||
doujinshi_cover.a.img.attrs['data-src'])
|
doujinshi_cover.a.img.attrs['data-src'])
|
||||||
|
|
||||||
ext = []
|
ext = []
|
||||||
|
@ -6,6 +6,7 @@ import os
|
|||||||
import zipfile
|
import zipfile
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
import httpx
|
||||||
import requests
|
import requests
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
@ -32,8 +33,28 @@ def request(method, url, **kwargs):
|
|||||||
return getattr(session, method)(url, verify=False, **kwargs)
|
return getattr(session, method)(url, verify=False, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
async def async_request(method, url, proxies = None, **kwargs):
|
||||||
|
headers = {
|
||||||
|
'Referer': constant.LOGIN_URL,
|
||||||
|
'User-Agent': constant.CONFIG['useragent'],
|
||||||
|
'Cookie': constant.CONFIG['cookie'],
|
||||||
|
}
|
||||||
|
|
||||||
|
if proxies is None:
|
||||||
|
proxies = constant.CONFIG['proxy']
|
||||||
|
|
||||||
|
if proxies.get('http') == '' and proxies.get('https') == '':
|
||||||
|
proxies = None
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(headers=headers, verify=False, proxies=proxies, **kwargs) as client:
|
||||||
|
response = await client.request(method, url, **kwargs)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
def check_cookie():
|
def check_cookie():
|
||||||
response = request('get', constant.BASE_URL)
|
response = request('get', constant.BASE_URL)
|
||||||
|
|
||||||
if response.status_code == 403 and 'Just a moment...' in response.text:
|
if response.status_code == 403 and 'Just a moment...' in response.text:
|
||||||
logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
|
logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@ -72,8 +93,8 @@ def parse_doujinshi_obj(
|
|||||||
doujinshi_obj=None,
|
doujinshi_obj=None,
|
||||||
file_type: str = ''
|
file_type: str = ''
|
||||||
) -> Tuple[str, str]:
|
) -> Tuple[str, str]:
|
||||||
filename = './doujinshi' + file_type
|
|
||||||
|
|
||||||
|
filename = f'./doujinshi.{file_type}'
|
||||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||||
if doujinshi_obj is not None:
|
if doujinshi_obj is not None:
|
||||||
_filename = f'{doujinshi_obj.filename}.{file_type}'
|
_filename = f'{doujinshi_obj.filename}.{file_type}'
|
||||||
@ -104,7 +125,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
|||||||
file_list.sort()
|
file_list.sort()
|
||||||
|
|
||||||
for image in file_list:
|
for image in file_list:
|
||||||
if not os.path.splitext(image)[1] in ('.jpg', '.png'):
|
if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'):
|
||||||
continue
|
continue
|
||||||
image_html += f'<img src="{image}" class="image-item"/>\n'
|
image_html += f'<img src="{image}" class="image-item"/>\n'
|
||||||
|
|
||||||
@ -128,6 +149,27 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
|||||||
logger.warning(f'Writing HTML Viewer failed ({e})')
|
logger.warning(f'Writing HTML Viewer failed ({e})')
|
||||||
|
|
||||||
|
|
||||||
|
def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
|
||||||
|
if not file_type:
|
||||||
|
raise RuntimeError('no file_type specified')
|
||||||
|
|
||||||
|
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
|
||||||
|
|
||||||
|
for fn in os.listdir(doujinshi_dir):
|
||||||
|
file_path = os.path.join(doujinshi_dir, fn)
|
||||||
|
_, ext = os.path.splitext(file_path)
|
||||||
|
if ext in ['.pdf', '.cbz']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if os.path.isfile(file_path):
|
||||||
|
try:
|
||||||
|
os.remove(file_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error deleting file: {e}")
|
||||||
|
|
||||||
|
shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
|
||||||
|
|
||||||
|
|
||||||
def generate_main_html(output_dir='./'):
|
def generate_main_html(output_dir='./'):
|
||||||
"""
|
"""
|
||||||
Generate a main html to show all the contains doujinshi.
|
Generate a main html to show all the contains doujinshi.
|
||||||
@ -185,8 +227,7 @@ def generate_main_html(output_dir='./'):
|
|||||||
logger.warning(f'Writing Main Viewer failed ({e})')
|
logger.warning(f'Writing Main Viewer failed ({e})')
|
||||||
|
|
||||||
|
|
||||||
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir=False,
|
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
|
||||||
move_to_folder=False, regenerate=False):
|
|
||||||
|
|
||||||
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
|
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
|
||||||
|
|
||||||
@ -210,7 +251,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir
|
|||||||
import img2pdf
|
import img2pdf
|
||||||
|
|
||||||
"""Write images to a PDF file using img2pdf."""
|
"""Write images to a PDF file using img2pdf."""
|
||||||
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]
|
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))]
|
||||||
file_list.sort()
|
file_list.sort()
|
||||||
|
|
||||||
logger.info(f'Writing PDF file to path: {filename}')
|
logger.info(f'Writing PDF file to path: {filename}')
|
||||||
@ -225,20 +266,6 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
logger.error("Please install img2pdf package by using pip.")
|
logger.error("Please install img2pdf package by using pip.")
|
||||||
|
|
||||||
if rm_origin_dir:
|
|
||||||
shutil.rmtree(doujinshi_dir, ignore_errors=True)
|
|
||||||
|
|
||||||
if move_to_folder:
|
|
||||||
for filename in os.listdir(doujinshi_dir):
|
|
||||||
file_path = os.path.join(doujinshi_dir, filename)
|
|
||||||
if os.path.isfile(file_path):
|
|
||||||
try:
|
|
||||||
os.remove(file_path)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error deleting file: {e}")
|
|
||||||
|
|
||||||
shutil.move(filename, doujinshi_dir)
|
|
||||||
|
|
||||||
|
|
||||||
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
|
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
|
||||||
"""
|
"""
|
||||||
@ -303,7 +330,7 @@ def generate_metadata_file(output_dir, doujinshi_obj):
|
|||||||
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
|
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
|
||||||
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
||||||
'SERIES', 'PARODY', 'URL']
|
'SERIES', 'PARODY', 'URL']
|
||||||
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
|
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'DATE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
|
||||||
'LANGUAGE', 'TAGS', 'URL', 'PAGES']
|
'LANGUAGE', 'TAGS', 'URL', 'PAGES']
|
||||||
|
|
||||||
for i in range(len(fields)):
|
for i in range(len(fields)):
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "nhentai"
|
name = "nhentai"
|
||||||
version = "0.5.8"
|
version = "0.5.12"
|
||||||
description = "nhentai doujinshi downloader"
|
description = "nhentai doujinshi downloader"
|
||||||
authors = ["Ricter Z <ricterzheng@gmail.com>"]
|
authors = ["Ricter Z <ricterzheng@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
httpx
|
||||||
requests
|
requests
|
||||||
soupsieve
|
soupsieve
|
||||||
setuptools
|
setuptools
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
[metadata]
|
[metadata]
|
||||||
description-file = README.rst
|
description_file = README.rst
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ class TestDownload(unittest.TestCase):
|
|||||||
def test_download(self):
|
def test_download(self):
|
||||||
did = 440546
|
did = 440546
|
||||||
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
|
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
|
||||||
info.downloader = Downloader(path='/tmp', size=5)
|
info.downloader = Downloader(path='/tmp', threads=5)
|
||||||
info.download()
|
info.download()
|
||||||
|
|
||||||
self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
|
self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
|
||||||
|
Reference in New Issue
Block a user