mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-07-01 16:09:28 +02:00
Compare commits
36 Commits
Author | SHA1 | Date | |
---|---|---|---|
d1d0c22af8 | |||
803957ba88 | |||
13b584a820 | |||
be08fcf4cb | |||
b585225308 | |||
54af682848 | |||
d74fd103f0 | |||
0cb2411955 | |||
de08d3daaa | |||
946b85ace9 | |||
5bde24f159 | |||
3cae13e76f | |||
7483b8f923 | |||
eae42c8eb5 | |||
b841747761 | |||
1f3528afad | |||
bb41e502c1 | |||
7089144ac6 | |||
0a9f7c3d3e | |||
40536ad456 | |||
edb571c9dd | |||
b2befd3473 | |||
c2e880f172 | |||
841988bc29 | |||
390948e252 | |||
b9b8468bfe | |||
3d6263cf11 | |||
e3410f5a9a | |||
feb7f45533 | |||
0754caaeb7 | |||
49e5a3094a | |||
c044b64beb | |||
f8334c09b5 | |||
c90c486fb4 | |||
90b17832cc | |||
14c6db9cc3 |
@ -5,7 +5,7 @@ COPY requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
RUN python setup.py install
|
||||
RUN pip install .
|
||||
|
||||
WORKDIR /output
|
||||
ENTRYPOINT ["nhentai"]
|
||||
|
@ -59,7 +59,7 @@ On Gentoo Linux:
|
||||
|
||||
.. code-block::
|
||||
|
||||
layman -fa glicOne
|
||||
layman -fa glibOne
|
||||
sudo emerge net-misc/nhentai
|
||||
|
||||
On NixOS:
|
||||
@ -129,7 +129,7 @@ Download your favorites with delay:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
nhentai --favorites --download --delay 1
|
||||
nhentai --favorites --download --delay 1 --page 3-5,7
|
||||
|
||||
Format output doujinshi folder name:
|
||||
|
||||
@ -140,6 +140,7 @@ Format output doujinshi folder name:
|
||||
Supported doujinshi folder formatter:
|
||||
|
||||
- %i: Doujinshi id
|
||||
- %f: Doujinshi favorite count
|
||||
- %t: Doujinshi name
|
||||
- %s: Doujinshi subtitle (translated name)
|
||||
- %a: Doujinshi authors' name
|
||||
|
@ -1,3 +1,3 @@
|
||||
__version__ = '0.5.12'
|
||||
__version__ = '0.5.17.2'
|
||||
__author__ = 'RicterZ'
|
||||
__email__ = 'ricterzheng@gmail.com'
|
||||
|
@ -37,7 +37,7 @@ def write_config():
|
||||
f.write(json.dumps(constant.CONFIG))
|
||||
|
||||
|
||||
def callback(option, opt_str, value, parser):
|
||||
def callback(option, _opt_str, _value, parser):
|
||||
if option == '--id':
|
||||
pass
|
||||
value = []
|
||||
@ -79,7 +79,7 @@ def cmd_parser():
|
||||
# page options
|
||||
parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
|
||||
help='all search results')
|
||||
parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='1',
|
||||
parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
|
||||
help='page number of search results. e.g. 1,2-5,14')
|
||||
parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
|
||||
help='sorting of doujinshi (recent / popular / popular-[today|week])',
|
||||
|
@ -48,7 +48,7 @@ def main():
|
||||
if not options.is_download:
|
||||
logger.warning('You do not specify --download option')
|
||||
|
||||
doujinshis = favorites_parser() if options.page_all else favorites_parser(page=page_list)
|
||||
doujinshis = favorites_parser(page=page_list) if options.page else favorites_parser()
|
||||
|
||||
elif options.keyword:
|
||||
if constant.CONFIG['language']:
|
||||
@ -77,7 +77,7 @@ def main():
|
||||
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
|
||||
|
||||
if not options.is_show:
|
||||
downloader = Downloader(path=options.output_dir, size=options.threads,
|
||||
downloader = Downloader(path=options.output_dir, threads=options.threads,
|
||||
timeout=options.timeout, delay=options.delay)
|
||||
|
||||
for doujinshi_id in doujinshi_ids:
|
||||
|
@ -38,8 +38,12 @@ FAV_URL = f'{BASE_URL}/favorites/'
|
||||
|
||||
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
|
||||
IMAGE_URL_MIRRORS = [
|
||||
f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i2.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i4.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i6.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
|
||||
]
|
||||
|
||||
|
@ -29,11 +29,12 @@ class DoujinshiInfo(dict):
|
||||
|
||||
|
||||
class Doujinshi(object):
|
||||
def __init__(self, name=None, pretty_name=None, id=None, img_id=None,
|
||||
def __init__(self, name=None, pretty_name=None, id=None, favorite_counts=0, img_id=None,
|
||||
ext='', pages=0, name_format='[%i][%a][%t]', **kwargs):
|
||||
self.name = name
|
||||
self.pretty_name = pretty_name
|
||||
self.id = id
|
||||
self.favorite_counts = favorite_counts
|
||||
self.img_id = img_id
|
||||
self.ext = ext
|
||||
self.pages = pages
|
||||
@ -45,6 +46,7 @@ class Doujinshi(object):
|
||||
name_format = name_format.replace('%ag', format_filename(ag_value))
|
||||
|
||||
name_format = name_format.replace('%i', format_filename(str(self.id)))
|
||||
name_format = name_format.replace('%f', format_filename(str(self.favorite_counts)))
|
||||
name_format = name_format.replace('%a', format_filename(self.info.artists))
|
||||
name_format = name_format.replace('%g', format_filename(self.info.groups))
|
||||
|
||||
@ -55,7 +57,7 @@ class Doujinshi(object):
|
||||
|
||||
self.table = [
|
||||
['Parodies', self.info.parodies],
|
||||
['Doujinshi', self.name],
|
||||
['Title', self.name],
|
||||
['Subtitle', self.info.subtitle],
|
||||
['Date', self.info.date],
|
||||
['Characters', self.info.characters],
|
||||
@ -63,6 +65,7 @@ class Doujinshi(object):
|
||||
['Groups', self.info.groups],
|
||||
['Languages', self.info.languages],
|
||||
['Tags', self.info.tags],
|
||||
['Favorite Counts', self.info.favorite_counts],
|
||||
['URL', self.url],
|
||||
['Pages', self.pages],
|
||||
]
|
||||
|
@ -1,23 +1,17 @@
|
||||
# coding: utf-
|
||||
|
||||
import multiprocessing
|
||||
import signal
|
||||
|
||||
import sys
|
||||
import os
|
||||
import requests
|
||||
import time
|
||||
import asyncio
|
||||
import httpx
|
||||
import urllib3.exceptions
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from nhentai import constant
|
||||
from nhentai.logger import logger
|
||||
from nhentai.parser import request
|
||||
from nhentai.utils import Singleton
|
||||
from nhentai.utils import Singleton, async_request
|
||||
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
semaphore = multiprocessing.Semaphore(1)
|
||||
|
||||
|
||||
class NHentaiImageNotExistException(Exception):
|
||||
@ -40,65 +34,75 @@ def download_callback(result):
|
||||
|
||||
|
||||
class Downloader(Singleton):
|
||||
|
||||
def __init__(self, path='', size=5, timeout=30, delay=0):
|
||||
self.size = size
|
||||
def __init__(self, path='', threads=5, timeout=30, delay=0):
|
||||
self.threads = threads
|
||||
self.path = str(path)
|
||||
self.timeout = timeout
|
||||
self.delay = delay
|
||||
self.folder = None
|
||||
self.semaphore = None
|
||||
|
||||
def download(self, url, folder='', filename='', retried=0, proxy=None):
|
||||
if self.delay:
|
||||
time.sleep(self.delay)
|
||||
async def fiber(self, tasks):
|
||||
self.semaphore = asyncio.Semaphore(self.threads)
|
||||
for completed_task in asyncio.as_completed(tasks):
|
||||
try:
|
||||
result = await completed_task
|
||||
if result[1]:
|
||||
logger.info(f'{result[1]} download completed')
|
||||
else:
|
||||
logger.warning(f'{result[1]} download failed, return value {result[0]}')
|
||||
except Exception as e:
|
||||
logger.error(f'An error occurred: {e}')
|
||||
|
||||
async def _semaphore_download(self, *args, **kwargs):
|
||||
async with self.semaphore:
|
||||
return await self.download(*args, **kwargs)
|
||||
|
||||
async def download(self, url, folder='', filename='', retried=0, proxy=None, length=0):
|
||||
logger.info(f'Starting to download {url} ...')
|
||||
|
||||
if self.delay:
|
||||
await asyncio.sleep(self.delay)
|
||||
|
||||
filename = filename if filename else os.path.basename(urlparse(url).path)
|
||||
base_filename, extension = os.path.splitext(filename)
|
||||
filename = base_filename.zfill(length) + extension
|
||||
|
||||
save_file_path = os.path.join(self.folder, filename)
|
||||
|
||||
save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
|
||||
try:
|
||||
if os.path.exists(save_file_path):
|
||||
logger.warning(f'Skipped download: {save_file_path} already exists')
|
||||
return 1, url
|
||||
|
||||
response = None
|
||||
with open(save_file_path, "wb") as f:
|
||||
i = 0
|
||||
while i < 10:
|
||||
try:
|
||||
response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy)
|
||||
response = await async_request('GET', url, timeout=self.timeout, proxies=proxy)
|
||||
|
||||
if response.status_code != 200:
|
||||
path = urlparse(url).path
|
||||
for mirror in constant.IMAGE_URL_MIRRORS:
|
||||
# print(f'{mirror}{path}')
|
||||
logger.info(f"Try mirror: {mirror}{path}")
|
||||
mirror_url = f'{mirror}{path}'
|
||||
response = request('get', mirror_url, stream=True,
|
||||
timeout=self.timeout, proxies=proxy)
|
||||
response = await async_request('GET', mirror_url, timeout=self.timeout, proxies=proxy)
|
||||
if response.status_code == 200:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
i += 1
|
||||
if not i < 10:
|
||||
logger.critical(str(e))
|
||||
return 0, None
|
||||
continue
|
||||
if not await self.save(filename, response):
|
||||
logger.error(f'Can not download image {url}')
|
||||
return 1, url
|
||||
|
||||
break
|
||||
|
||||
length = response.headers.get('content-length')
|
||||
if length is None:
|
||||
f.write(response.content)
|
||||
else:
|
||||
for chunk in response.iter_content(2048):
|
||||
f.write(chunk)
|
||||
|
||||
except (requests.HTTPError, requests.Timeout) as e:
|
||||
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
|
||||
if retried < 3:
|
||||
logger.warning(f'Warning: {e}, retrying({retried}) ...')
|
||||
return 0, self.download(url=url, folder=folder, filename=filename,
|
||||
retried=retried+1, proxy=proxy)
|
||||
logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
|
||||
return await self.download(
|
||||
url=url,
|
||||
folder=folder,
|
||||
filename=filename,
|
||||
retried=retried + 1,
|
||||
proxy=proxy,
|
||||
)
|
||||
else:
|
||||
return 0, None
|
||||
logger.warning(f'Download {filename} failed with 3 times retried, skipped')
|
||||
return 0, url
|
||||
|
||||
except NHentaiImageNotExistException as e:
|
||||
os.remove(save_file_path)
|
||||
@ -106,17 +110,34 @@ class Downloader(Singleton):
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
logger.error(f"Exception type: {type(e)}")
|
||||
traceback.print_stack()
|
||||
logger.critical(str(e))
|
||||
return 0, None
|
||||
return 0, url
|
||||
|
||||
except KeyboardInterrupt:
|
||||
return -3, None
|
||||
return -3, url
|
||||
|
||||
return 1, url
|
||||
|
||||
async def save(self, save_file_path, response) -> bool:
|
||||
if response is None:
|
||||
logger.error('Error: Response is None')
|
||||
return False
|
||||
save_file_path = os.path.join(self.folder, save_file_path)
|
||||
with open(save_file_path, 'wb') as f:
|
||||
if response is not None:
|
||||
length = response.headers.get('content-length')
|
||||
if length is None:
|
||||
f.write(response.content)
|
||||
else:
|
||||
async for chunk in response.aiter_bytes(2048):
|
||||
f.write(chunk)
|
||||
return True
|
||||
|
||||
def start_download(self, queue, folder='') -> bool:
|
||||
if not isinstance(folder, (str, )):
|
||||
if not isinstance(folder, (str,)):
|
||||
folder = str(folder)
|
||||
|
||||
if self.path:
|
||||
@ -128,34 +149,20 @@ class Downloader(Singleton):
|
||||
os.makedirs(folder)
|
||||
except EnvironmentError as e:
|
||||
logger.critical(str(e))
|
||||
self.folder = folder
|
||||
|
||||
if os.getenv('DEBUG', None) == 'NODOWNLOAD':
|
||||
# Assuming we want to continue with rest of process.
|
||||
return True
|
||||
queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
|
||||
|
||||
pool = multiprocessing.Pool(self.size, init_worker)
|
||||
[pool.apply_async(download_wrapper, args=item) for item in queue]
|
||||
digit_length = len(str(len(queue)))
|
||||
logger.info(f'Total download pages: {len(queue)}')
|
||||
coroutines = [
|
||||
self._semaphore_download(url, filename=os.path.basename(urlparse(url).path), length=digit_length)
|
||||
for url in queue
|
||||
]
|
||||
|
||||
pool.close()
|
||||
pool.join()
|
||||
# Prevent coroutines infection
|
||||
asyncio.run(self.fiber(coroutines))
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def download_wrapper(obj, url, folder='', proxy=None):
|
||||
if sys.platform == 'darwin' or semaphore.get_value():
|
||||
return Downloader.download(obj, url=url, folder=folder, proxy=proxy)
|
||||
else:
|
||||
return -3, None
|
||||
|
||||
|
||||
def init_worker():
|
||||
signal.signal(signal.SIGINT, subprocess_signal)
|
||||
|
||||
|
||||
def subprocess_signal(sig, frame):
|
||||
if semaphore.acquire(timeout=1):
|
||||
logger.warning('Ctrl-C pressed, exiting sub processes ...')
|
||||
|
||||
raise KeyboardInterrupt
|
||||
|
@ -135,30 +135,38 @@ def doujinshi_parser(id_, counter=0):
|
||||
logger.warning(f'Error: {e}, ignored')
|
||||
return None
|
||||
|
||||
# print(response)
|
||||
html = BeautifulSoup(response, 'html.parser')
|
||||
doujinshi_info = html.find('div', attrs={'id': 'info'})
|
||||
|
||||
title = doujinshi_info.find('h1').text
|
||||
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
|
||||
subtitle = doujinshi_info.find('h2')
|
||||
favorite_counts = str(doujinshi_info.find('span', class_='nobold').find('span', class_='count'))
|
||||
if favorite_counts is None:
|
||||
favorite_counts = '0'
|
||||
|
||||
doujinshi['name'] = title
|
||||
doujinshi['pretty_name'] = pretty_name
|
||||
doujinshi['subtitle'] = subtitle.text if subtitle else ''
|
||||
doujinshi['favorite_counts'] = favorite_counts.strip()
|
||||
|
||||
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
|
||||
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
|
||||
doujinshi_cover.a.img.attrs['data-src'])
|
||||
# img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
|
||||
# doujinshi_cover.a.img.attrs['data-src'])
|
||||
img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
|
||||
|
||||
ext = []
|
||||
for i in html.find_all('div', attrs={'class': 'thumb-container'}):
|
||||
_, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1)
|
||||
ext.append(ext_name)
|
||||
base_name = os.path.basename(i.img.attrs['data-src'])
|
||||
ext_name = base_name.split('.')
|
||||
if len(ext_name) == 3:
|
||||
ext.append(ext_name[1])
|
||||
else:
|
||||
ext.append(ext_name[-1])
|
||||
|
||||
if not img_id:
|
||||
logger.critical('Tried yo get image id failed')
|
||||
sys.exit(1)
|
||||
logger.critical(f'Tried yo get image id failed of id: {id_}')
|
||||
return None
|
||||
|
||||
doujinshi['img_id'] = img_id.group(1)
|
||||
doujinshi['ext'] = ext
|
||||
@ -185,53 +193,6 @@ def doujinshi_parser(id_, counter=0):
|
||||
return doujinshi
|
||||
|
||||
|
||||
def legacy_doujinshi_parser(id_):
|
||||
if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
|
||||
raise Exception(f'Doujinshi id({id_}) is not valid')
|
||||
|
||||
id_ = int(id_)
|
||||
logger.info(f'Fetching information of doujinshi id {id_}')
|
||||
doujinshi = dict()
|
||||
doujinshi['id'] = id_
|
||||
url = f'{constant.DETAIL_URL}/{id_}'
|
||||
i = 0
|
||||
while 5 > i:
|
||||
try:
|
||||
response = request('get', url).json()
|
||||
except Exception as e:
|
||||
i += 1
|
||||
if not i < 5:
|
||||
logger.critical(str(e))
|
||||
sys.exit(1)
|
||||
continue
|
||||
break
|
||||
|
||||
doujinshi['name'] = response['title']['english']
|
||||
doujinshi['subtitle'] = response['title']['japanese']
|
||||
doujinshi['img_id'] = response['media_id']
|
||||
doujinshi['ext'] = ''.join([i['t'] for i in response['images']['pages']])
|
||||
doujinshi['pages'] = len(response['images']['pages'])
|
||||
|
||||
# gain information of the doujinshi
|
||||
needed_fields = ['character', 'artist', 'language', 'tag', 'parody', 'group', 'category']
|
||||
for tag in response['tags']:
|
||||
tag_type = tag['type']
|
||||
if tag_type in needed_fields:
|
||||
if tag_type == 'tag':
|
||||
if tag_type not in doujinshi:
|
||||
doujinshi[tag_type] = {}
|
||||
|
||||
tag['name'] = tag['name'].replace(' ', '-')
|
||||
tag['name'] = tag['name'].lower()
|
||||
doujinshi[tag_type][tag['name']] = tag['id']
|
||||
elif tag_type not in doujinshi:
|
||||
doujinshi[tag_type] = tag['name']
|
||||
else:
|
||||
doujinshi[tag_type] += ', ' + tag['name']
|
||||
|
||||
return doujinshi
|
||||
|
||||
|
||||
def print_doujinshi(doujinshi_list):
|
||||
if not doujinshi_list:
|
||||
return
|
||||
|
@ -8,6 +8,8 @@ from nhentai.constant import LANGUAGE_ISO
|
||||
def serialize_json(doujinshi, output_dir):
|
||||
metadata = {'title': doujinshi.name,
|
||||
'subtitle': doujinshi.info.subtitle}
|
||||
if doujinshi.info.favorite_counts:
|
||||
metadata['favorite_counts'] = doujinshi.favorite_counts
|
||||
if doujinshi.info.date:
|
||||
metadata['upload_date'] = doujinshi.info.date
|
||||
if doujinshi.info.parodies:
|
||||
@ -44,6 +46,7 @@ def serialize_comic_xml(doujinshi, output_dir):
|
||||
xml_write_simple_tag(f, 'PageCount', doujinshi.pages)
|
||||
xml_write_simple_tag(f, 'URL', doujinshi.url)
|
||||
xml_write_simple_tag(f, 'NhentaiId', doujinshi.id)
|
||||
xml_write_simple_tag(f, 'Favorites', doujinshi.favorite_counts)
|
||||
xml_write_simple_tag(f, 'Genre', doujinshi.info.categories)
|
||||
|
||||
xml_write_simple_tag(f, 'BlackAndWhite', 'No' if doujinshi.info.tags and
|
||||
|
@ -6,16 +6,19 @@ import os
|
||||
import zipfile
|
||||
import shutil
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
import sqlite3
|
||||
import urllib.parse
|
||||
from typing import Optional, Tuple
|
||||
from typing import Tuple
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from nhentai import constant
|
||||
from nhentai.logger import logger
|
||||
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
|
||||
|
||||
MAX_FIELD_LENGTH = 100
|
||||
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
|
||||
|
||||
|
||||
def request(method, url, **kwargs):
|
||||
@ -32,8 +35,32 @@ def request(method, url, **kwargs):
|
||||
return getattr(session, method)(url, verify=False, **kwargs)
|
||||
|
||||
|
||||
async def async_request(method, url, proxies = None, **kwargs):
|
||||
headers = {
|
||||
'Referer': constant.LOGIN_URL,
|
||||
'User-Agent': constant.CONFIG['useragent'],
|
||||
'Cookie': constant.CONFIG['cookie'],
|
||||
}
|
||||
|
||||
if proxies is None:
|
||||
proxies = constant.CONFIG['proxy']
|
||||
|
||||
if proxies.get('http') == '' and proxies.get('https') == '':
|
||||
proxies = None
|
||||
|
||||
if proxies:
|
||||
_proxies = {f'{k}://': v for k, v in proxies.items() if v}
|
||||
proxies = _proxies
|
||||
|
||||
async with httpx.AsyncClient(headers=headers, verify=False, proxies=proxies, **kwargs) as client:
|
||||
response = await client.request(method, url, **kwargs)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def check_cookie():
|
||||
response = request('get', constant.BASE_URL)
|
||||
|
||||
if response.status_code == 403 and 'Just a moment...' in response.text:
|
||||
logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
|
||||
sys.exit(1)
|
||||
@ -74,8 +101,8 @@ def parse_doujinshi_obj(
|
||||
) -> Tuple[str, str]:
|
||||
|
||||
filename = f'./doujinshi.{file_type}'
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
if doujinshi_obj is not None:
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
_filename = f'{doujinshi_obj.filename}.{file_type}'
|
||||
|
||||
if file_type == 'cbz':
|
||||
@ -85,6 +112,8 @@ def parse_doujinshi_obj(
|
||||
_filename = _filename.replace('/', '-')
|
||||
|
||||
filename = os.path.join(output_dir, _filename)
|
||||
else:
|
||||
doujinshi_dir = './'
|
||||
|
||||
return doujinshi_dir, filename
|
||||
|
||||
@ -104,7 +133,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
||||
file_list.sort()
|
||||
|
||||
for image in file_list:
|
||||
if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'):
|
||||
if not os.path.splitext(image)[1] in EXTENSIONS:
|
||||
continue
|
||||
image_html += f'<img src="{image}" class="image-item"/>\n'
|
||||
|
||||
@ -230,7 +259,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
|
||||
import img2pdf
|
||||
|
||||
"""Write images to a PDF file using img2pdf."""
|
||||
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))]
|
||||
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(EXTENSIONS)]
|
||||
file_list.sort()
|
||||
|
||||
logger.info(f'Writing PDF file to path: {filename}')
|
||||
@ -251,7 +280,7 @@ def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
|
||||
It used to be a whitelist approach allowed only alphabet and a part of symbols.
|
||||
but most doujinshi's names include Japanese 2-byte characters and these was rejected.
|
||||
so it is using blacklist approach now.
|
||||
if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' ').
|
||||
if filename include forbidden characters (\'/:,;*?"<>|) ,it replaces space character(" ").
|
||||
"""
|
||||
# maybe you can use `--format` to select a suitable filename
|
||||
|
||||
@ -274,7 +303,7 @@ def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
|
||||
return filename
|
||||
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
def signal_handler(_signal, _frame):
|
||||
logger.error('Ctrl-C signal received. Stopping...')
|
||||
sys.exit(1)
|
||||
|
||||
@ -282,7 +311,8 @@ def signal_handler(signal, frame):
|
||||
def paging(page_string):
|
||||
# 1,3-5,14 -> [1, 3, 4, 5, 14]
|
||||
if not page_string:
|
||||
return []
|
||||
# default, the first page
|
||||
return [1]
|
||||
|
||||
page_list = []
|
||||
for i in page_string.split(','):
|
||||
@ -309,14 +339,14 @@ def generate_metadata_file(output_dir, doujinshi_obj):
|
||||
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
|
||||
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
||||
'SERIES', 'PARODY', 'URL']
|
||||
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'DATE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
|
||||
'LANGUAGE', 'TAGS', 'URL', 'PAGES']
|
||||
|
||||
for i in range(len(fields)):
|
||||
f.write(f'{fields[i]}: ')
|
||||
if fields[i] in special_fields:
|
||||
f.write(str(doujinshi_obj.table[special_fields.index(fields[i])][1]))
|
||||
f.write('\n')
|
||||
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
|
||||
for i in fields:
|
||||
v = temp_dict.get(i)
|
||||
v = temp_dict.get(f'{i}s') if v is None else v
|
||||
v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
|
||||
v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
|
||||
f.write(f'{i}: {v}\n')
|
||||
|
||||
f.close()
|
||||
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
|
||||
|
119
poetry.lock
generated
119
poetry.lock
generated
@ -1,4 +1,26 @@
|
||||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "anyio"
|
||||
version = "4.5.2"
|
||||
description = "High level compatibility layer for multiple asynchronous event loop implementations"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "anyio-4.5.2-py3-none-any.whl", hash = "sha256:c011ee36bc1e8ba40e5a81cb9df91925c218fe9b778554e0b56a21e1b5d4716f"},
|
||||
{file = "anyio-4.5.2.tar.gz", hash = "sha256:23009af4ed04ce05991845451e11ef02fc7c5ed29179ac9a420e5ad0ac7ddc5b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
|
||||
idna = ">=2.8"
|
||||
sniffio = ">=1.1"
|
||||
typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
|
||||
|
||||
[package.extras]
|
||||
doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
|
||||
test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"]
|
||||
trio = ["trio (>=0.26.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
@ -126,6 +148,77 @@ files = [
|
||||
{file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.2.2"
|
||||
description = "Backport of PEP 654 (exception groups)"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
|
||||
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
test = ["pytest (>=6)"]
|
||||
|
||||
[[package]]
|
||||
name = "h11"
|
||||
version = "0.14.0"
|
||||
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
|
||||
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpcore"
|
||||
version = "1.0.7"
|
||||
description = "A minimal low-level HTTP client."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
|
||||
{file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
certifi = "*"
|
||||
h11 = ">=0.13,<0.15"
|
||||
|
||||
[package.extras]
|
||||
asyncio = ["anyio (>=4.0,<5.0)"]
|
||||
http2 = ["h2 (>=3,<5)"]
|
||||
socks = ["socksio (==1.*)"]
|
||||
trio = ["trio (>=0.22.0,<1.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "httpx"
|
||||
version = "0.27.2"
|
||||
description = "The next generation HTTP client."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
|
||||
{file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
anyio = "*"
|
||||
certifi = "*"
|
||||
httpcore = "==1.*"
|
||||
idna = "*"
|
||||
sniffio = "*"
|
||||
|
||||
[package.extras]
|
||||
brotli = ["brotli", "brotlicffi"]
|
||||
cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
|
||||
http2 = ["h2 (>=3,<5)"]
|
||||
socks = ["socksio (==1.*)"]
|
||||
zstd = ["zstandard (>=0.18.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.7"
|
||||
@ -169,6 +262,17 @@ urllib3 = ">=1.21.1,<3"
|
||||
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
|
||||
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
|
||||
|
||||
[[package]]
|
||||
name = "sniffio"
|
||||
version = "1.3.1"
|
||||
description = "Sniff out which async library your code is running under"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
|
||||
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "soupsieve"
|
||||
version = "2.4"
|
||||
@ -194,6 +298,17 @@ files = [
|
||||
[package.extras]
|
||||
widechars = ["wcwidth"]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.12.2"
|
||||
description = "Backported and Experimental Type Hints for Python 3.8+"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
|
||||
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "1.26.19"
|
||||
@ -213,4 +328,4 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.8"
|
||||
content-hash = "0a1d5abd47a669c7a1f2dc7b43824a449e29ba94908a4338d2ea0f2dfb4f805e"
|
||||
content-hash = "a69dbf5dcfd6dcc5afc0fd2de4ab153841f7d210d4be60c426e332e36a79d679"
|
||||
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "nhentai"
|
||||
version = "0.5.12"
|
||||
version = "0.5.17.2"
|
||||
description = "nhentai doujinshi downloader"
|
||||
authors = ["Ricter Z <ricterzheng@gmail.com>"]
|
||||
license = "MIT"
|
||||
@ -14,8 +14,12 @@ beautifulsoup4 = "^4.11.2"
|
||||
tabulate = "^0.9.0"
|
||||
iso8601 = "^1.1.0"
|
||||
urllib3 = "^1.26.14"
|
||||
httpx = "0.27.2"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
nhentai = 'nhentai.command:main'
|
29
qodana.yaml
Executable file
29
qodana.yaml
Executable file
@ -0,0 +1,29 @@
|
||||
#-------------------------------------------------------------------------------#
|
||||
# Qodana analysis is configured by qodana.yaml file #
|
||||
# https://www.jetbrains.com/help/qodana/qodana-yaml.html #
|
||||
#-------------------------------------------------------------------------------#
|
||||
version: "1.0"
|
||||
|
||||
#Specify inspection profile for code analysis
|
||||
profile:
|
||||
name: qodana.starter
|
||||
|
||||
#Enable inspections
|
||||
#include:
|
||||
# - name: <SomeEnabledInspectionId>
|
||||
|
||||
#Disable inspections
|
||||
#exclude:
|
||||
# - name: <SomeDisabledInspectionId>
|
||||
# paths:
|
||||
# - <path/where/not/run/inspection>
|
||||
|
||||
#Execute shell command before Qodana execution (Applied in CI/CD pipeline)
|
||||
#bootstrap: sh ./prepare-qodana.sh
|
||||
|
||||
#Install IDE plugins before Qodana execution (Applied in CI/CD pipeline)
|
||||
#plugins:
|
||||
# - id: <plugin.id> #(plugin id can be found at https://plugins.jetbrains.com)
|
||||
|
||||
#Specify Qodana linter for analysis (Applied in CI/CD pipeline)
|
||||
linter: jetbrains/qodana-python:2024.3
|
@ -1,3 +1,4 @@
|
||||
httpx==0.27.2
|
||||
requests
|
||||
soupsieve
|
||||
setuptools
|
||||
|
38
setup.py
38
setup.py
@ -1,38 +0,0 @@
|
||||
# coding: utf-8
|
||||
import codecs
|
||||
from setuptools import setup, find_packages
|
||||
from nhentai import __version__, __author__, __email__
|
||||
|
||||
|
||||
with open('requirements.txt') as f:
|
||||
requirements = [l for l in f.read().splitlines() if l]
|
||||
|
||||
|
||||
def long_description():
|
||||
with codecs.open('README.rst', 'rb') as readme:
|
||||
return readme.read().decode('utf-8')
|
||||
|
||||
|
||||
setup(
|
||||
name='nhentai',
|
||||
version=__version__,
|
||||
packages=find_packages(),
|
||||
|
||||
author=__author__,
|
||||
author_email=__email__,
|
||||
keywords=['nhentai', 'doujinshi', 'downloader'],
|
||||
description='nhentai.net doujinshis downloader',
|
||||
long_description=long_description(),
|
||||
url='https://github.com/RicterZ/nhentai',
|
||||
download_url='https://github.com/RicterZ/nhentai/tarball/master',
|
||||
include_package_data=True,
|
||||
zip_safe=False,
|
||||
|
||||
install_requires=requirements,
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'nhentai = nhentai.command:main',
|
||||
]
|
||||
},
|
||||
license='MIT',
|
||||
)
|
@ -20,7 +20,7 @@ class TestDownload(unittest.TestCase):
|
||||
def test_download(self):
|
||||
did = 440546
|
||||
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
|
||||
info.downloader = Downloader(path='/tmp', size=5)
|
||||
info.downloader = Downloader(path='/tmp', threads=5)
|
||||
info.download()
|
||||
|
||||
self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
|
||||
|
Reference in New Issue
Block a user