mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-07-02 00:19:29 +02:00
Compare commits
80 Commits
Author | SHA1 | Date | |
---|---|---|---|
a83c571ec4 | |||
e7ff5dab3d | |||
a166898b60 | |||
ce25051fa3 | |||
41fba6b5ac | |||
8944ece4a8 | |||
6b4c4bdc70 | |||
d1d0c22af8 | |||
803957ba88 | |||
13b584a820 | |||
be08fcf4cb | |||
b585225308 | |||
54af682848 | |||
d74fd103f0 | |||
0cb2411955 | |||
de08d3daaa | |||
946b85ace9 | |||
5bde24f159 | |||
3cae13e76f | |||
7483b8f923 | |||
eae42c8eb5 | |||
b841747761 | |||
1f3528afad | |||
bb41e502c1 | |||
7089144ac6 | |||
0a9f7c3d3e | |||
40536ad456 | |||
edb571c9dd | |||
b2befd3473 | |||
c2e880f172 | |||
841988bc29 | |||
390948e252 | |||
b9b8468bfe | |||
3d6263cf11 | |||
e3410f5a9a | |||
feb7f45533 | |||
0754caaeb7 | |||
49e5a3094a | |||
c044b64beb | |||
f8334c09b5 | |||
c90c486fb4 | |||
90b17832cc | |||
14c6db9cc3 | |||
f30ff59b2b | |||
1504ee779f | |||
98d9eecf6d | |||
e16e623b9d | |||
c3f3182df3 | |||
12aad842f8 | |||
f9f76ab0f5 | |||
744a9e4418 | |||
c3e9fff491 | |||
a84e2c5714 | |||
c814c35c50 | |||
e2f71437e2 | |||
2fa45ae4df | |||
17bc33c6cb | |||
09bb8460f6 | |||
eb5b93d654 | |||
cb6cf6df1a | |||
98a66a3cb0 | |||
02d47632cf | |||
f932b1fbbe | |||
fd9e92f9d4 | |||
a8a48c6ce7 | |||
f6e9d08fc7 | |||
9c1c2ea069 | |||
984ae4262c | |||
cbf9448ed9 | |||
16bac45f02 | |||
7fa9193112 | |||
a05a308e71 | |||
5a29eaf775 | |||
497eb6fe50 | |||
4bfe104714 | |||
12364e980c | |||
b51e812449 | |||
0ed5fa1931 | |||
7f655b0f10 | |||
dec3f44542 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -8,3 +8,4 @@ dist/
|
||||
output/
|
||||
venv/
|
||||
.vscode/
|
||||
test-output
|
@ -5,7 +5,7 @@ COPY requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
RUN python setup.py install
|
||||
RUN pip install .
|
||||
|
||||
WORKDIR /output
|
||||
ENTRYPOINT ["nhentai"]
|
||||
|
30
README.rst
30
README.rst
@ -59,7 +59,7 @@ On Gentoo Linux:
|
||||
|
||||
.. code-block::
|
||||
|
||||
layman -fa glicOne
|
||||
layman -fa glibOne
|
||||
sudo emerge net-misc/nhentai
|
||||
|
||||
On NixOS:
|
||||
@ -129,7 +129,7 @@ Download your favorites with delay:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
nhentai --favorites --download --delay 1
|
||||
nhentai --favorites --download --delay 1 --page 3-5,7
|
||||
|
||||
Format output doujinshi folder name:
|
||||
|
||||
@ -140,6 +140,7 @@ Format output doujinshi folder name:
|
||||
Supported doujinshi folder formatter:
|
||||
|
||||
- %i: Doujinshi id
|
||||
- %f: Doujinshi favorite count
|
||||
- %t: Doujinshi name
|
||||
- %s: Doujinshi subtitle (translated name)
|
||||
- %a: Doujinshi authors' name
|
||||
@ -161,25 +162,21 @@ Other options:
|
||||
NHENTAI nhentai mirror url
|
||||
|
||||
Options:
|
||||
# Operation options, control the program behaviors
|
||||
-h, --help show this help message and exit
|
||||
-D, --download download doujinshi (for search results)
|
||||
-S, --show just show the doujinshi information
|
||||
|
||||
# Doujinshi options, specify id, keyword, etc.
|
||||
--id doujinshi ids set, e.g. 167680 167681 167682
|
||||
-s KEYWORD, --search=KEYWORD
|
||||
search doujinshi by keyword
|
||||
-F, --favorites list or download your favorites
|
||||
|
||||
# Page options, control the page to fetch / download
|
||||
-a ARTIST, --artist=ARTIST
|
||||
list doujinshi by artist name
|
||||
--page-all all search results
|
||||
--page=PAGE, --page-range=PAGE
|
||||
page number of search results. e.g. 1,2-5,14
|
||||
--sorting=SORTING sorting of doujinshi (recent / popular /
|
||||
--sorting=SORTING, --sort=SORTING
|
||||
sorting of doujinshi (recent / popular /
|
||||
popular-[today|week])
|
||||
|
||||
# Download options, the output directory, threads, timeout, delay, etc.
|
||||
-o OUTPUT_DIR, --output=OUTPUT_DIR
|
||||
output dir
|
||||
-t THREADS, --threads=THREADS
|
||||
@ -192,8 +189,6 @@ Other options:
|
||||
-f FILE, --file=FILE read gallery IDs from file.
|
||||
--format=NAME_FORMAT format the saved folder name
|
||||
--dry-run Dry run, skip file download
|
||||
|
||||
# Generate options, for generate html viewer, cbz file, pdf file, etc
|
||||
--html generate a html viewer at current directory
|
||||
--no-html don't generate HTML after downloading
|
||||
--gen-main generate a main viewer contain all the doujin in the
|
||||
@ -202,12 +197,10 @@ Other options:
|
||||
-P, --pdf generate PDF file
|
||||
--rm-origin-dir remove downloaded doujinshi dir when generated CBZ or
|
||||
PDF file
|
||||
--move-to-folder remove files in doujinshi dir then move new file to folder
|
||||
when generated CBZ or PDF file
|
||||
--move-to-folder remove files in doujinshi dir then move new file to
|
||||
folder when generated CBZ or PDF file
|
||||
--meta generate a metadata file in doujinshi format
|
||||
--regenerate-cbz regenerate the cbz file if exists
|
||||
|
||||
# nhentai options, set cookie, user-agent, language, remove caches, histories, etc
|
||||
--regenerate regenerate the cbz or pdf file if exists
|
||||
--cookie=COOKIE set cookie of nhentai to bypass Cloudflare captcha
|
||||
--useragent=USERAGENT, --user-agent=USERAGENT
|
||||
set useragent to bypass Cloudflare captcha
|
||||
@ -231,6 +224,9 @@ For example:
|
||||
.. code-block::
|
||||
|
||||
i.h.loli.club -> i.nhentai.net
|
||||
i3.h.loli.club -> i3.nhentai.net
|
||||
i5.h.loli.club -> i5.nhentai.net
|
||||
i7.h.loli.club -> i7.nhentai.net
|
||||
h.loli.club -> nhentai.net
|
||||
|
||||
Set `NHENTAI` env var to your nhentai mirror.
|
||||
|
@ -1,3 +1,3 @@
|
||||
__version__ = '0.5.7'
|
||||
__version__ = '0.5.19'
|
||||
__author__ = 'RicterZ'
|
||||
__email__ = 'ricterzheng@gmail.com'
|
||||
|
@ -11,6 +11,7 @@ from optparse import OptionParser
|
||||
from nhentai import __version__
|
||||
from nhentai.utils import generate_html, generate_main_html, DB
|
||||
from nhentai.logger import logger
|
||||
from nhentai.constant import PATH_SEPARATOR
|
||||
|
||||
|
||||
def banner():
|
||||
@ -37,7 +38,7 @@ def write_config():
|
||||
f.write(json.dumps(constant.CONFIG))
|
||||
|
||||
|
||||
def callback(option, opt_str, value, parser):
|
||||
def callback(option, _opt_str, _value, parser):
|
||||
if option == '--id':
|
||||
pass
|
||||
value = []
|
||||
@ -64,7 +65,8 @@ def cmd_parser():
|
||||
# operation options
|
||||
parser.add_option('--download', '-D', dest='is_download', action='store_true',
|
||||
help='download doujinshi (for search results)')
|
||||
parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information')
|
||||
parser.add_option('--show', '-S', dest='is_show', action='store_true',
|
||||
help='just show the doujinshi information')
|
||||
|
||||
# doujinshi options
|
||||
parser.add_option('--id', dest='id', action='callback', callback=callback,
|
||||
@ -79,14 +81,15 @@ def cmd_parser():
|
||||
# page options
|
||||
parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
|
||||
help='all search results')
|
||||
parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='1',
|
||||
parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
|
||||
help='page number of search results. e.g. 1,2-5,14')
|
||||
parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
|
||||
help='sorting of doujinshi (recent / popular / popular-[today|week])',
|
||||
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
|
||||
|
||||
# download options
|
||||
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='./',
|
||||
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store',
|
||||
default=f'.{PATH_SEPARATOR}',
|
||||
help='output dir')
|
||||
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
|
||||
help='thread count for downloading doujinshi')
|
||||
@ -96,7 +99,8 @@ def cmd_parser():
|
||||
help='slow down between downloading every doujinshi')
|
||||
parser.add_option('--proxy', type='string', dest='proxy', action='store',
|
||||
help='store a proxy, for example: -p "http://127.0.0.1:1080"')
|
||||
parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.')
|
||||
parser.add_option('--file', '-f', type='string', dest='file', action='store',
|
||||
help='read gallery IDs from file.')
|
||||
parser.add_option('--format', type='string', dest='name_format', action='store',
|
||||
help='format the saved folder name', default='[%i][%a][%t]')
|
||||
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
|
||||
@ -118,8 +122,8 @@ def cmd_parser():
|
||||
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
|
||||
parser.add_option('--meta', dest='generate_metadata', action='store_true',
|
||||
help='generate a metadata file in doujinshi format')
|
||||
parser.add_option('--regenerate-cbz', dest='regenerate_cbz', action='store_true', default=False,
|
||||
help='regenerate the cbz file if exists')
|
||||
parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
|
||||
help='regenerate the cbz or pdf file if exists')
|
||||
|
||||
# nhentai options
|
||||
parser.add_option('--cookie', type='str', dest='cookie', action='store',
|
||||
|
@ -1,4 +1,6 @@
|
||||
# coding: utf-8
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import signal
|
||||
import platform
|
||||
@ -11,8 +13,8 @@ from nhentai.doujinshi import Doujinshi
|
||||
from nhentai.downloader import Downloader
|
||||
from nhentai.logger import logger
|
||||
from nhentai.constant import BASE_URL
|
||||
from nhentai.utils import generate_html, generate_cbz, generate_main_html, generate_pdf, generate_metadata_file, \
|
||||
paging, check_cookie, signal_handler, DB
|
||||
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
|
||||
paging, check_cookie, signal_handler, DB, move_to_folder
|
||||
|
||||
|
||||
def main():
|
||||
@ -46,7 +48,7 @@ def main():
|
||||
if not options.is_download:
|
||||
logger.warning('You do not specify --download option')
|
||||
|
||||
doujinshis = favorites_parser() if options.page_all else favorites_parser(page=page_list)
|
||||
doujinshis = favorites_parser(page=page_list) if options.page else favorites_parser()
|
||||
|
||||
elif options.keyword:
|
||||
if constant.CONFIG['language']:
|
||||
@ -75,7 +77,7 @@ def main():
|
||||
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
|
||||
|
||||
if not options.is_show:
|
||||
downloader = Downloader(path=options.output_dir, size=options.threads,
|
||||
downloader = Downloader(path=options.output_dir, threads=options.threads,
|
||||
timeout=options.timeout, delay=options.delay)
|
||||
|
||||
for doujinshi_id in doujinshi_ids:
|
||||
@ -87,22 +89,40 @@ def main():
|
||||
|
||||
if not options.dryrun:
|
||||
doujinshi.downloader = downloader
|
||||
doujinshi.download(regenerate_cbz=options.regenerate_cbz)
|
||||
|
||||
if doujinshi.check_if_need_download(options):
|
||||
doujinshi.download()
|
||||
else:
|
||||
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
|
||||
continue
|
||||
|
||||
if options.generate_metadata:
|
||||
table = doujinshi.table
|
||||
generate_metadata_file(options.output_dir, table, doujinshi)
|
||||
generate_metadata_file(options.output_dir, doujinshi)
|
||||
|
||||
if options.is_save_download_history:
|
||||
with DB() as db:
|
||||
db.add_one(doujinshi.id)
|
||||
|
||||
if not options.is_nohtml and not options.is_cbz and not options.is_pdf:
|
||||
if not options.is_nohtml:
|
||||
generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
|
||||
elif options.is_cbz:
|
||||
generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir, True, options.move_to_folder)
|
||||
elif options.is_pdf:
|
||||
generate_pdf(options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder)
|
||||
|
||||
if options.is_cbz:
|
||||
generate_doc('cbz', options.output_dir, doujinshi, options.regenerate)
|
||||
|
||||
if options.is_pdf:
|
||||
generate_doc('pdf', options.output_dir, doujinshi, options.regenerate)
|
||||
|
||||
if options.move_to_folder:
|
||||
if options.is_cbz:
|
||||
move_to_folder(options.output_dir, doujinshi, 'cbz')
|
||||
if options.is_pdf:
|
||||
move_to_folder(options.output_dir, doujinshi, 'pdf')
|
||||
|
||||
if options.rm_origin_dir:
|
||||
if options.move_to_folder:
|
||||
logger.critical('You specified both --move-to-folder and --rm-origin-dir options, '
|
||||
'you will not get anything :(')
|
||||
shutil.rmtree(os.path.join(options.output_dir, doujinshi.filename), ignore_errors=True)
|
||||
|
||||
if options.main_viewer:
|
||||
generate_main_html(options.output_dir)
|
||||
|
@ -35,12 +35,17 @@ LOGIN_URL = f'{BASE_URL}/login/'
|
||||
CHALLENGE_URL = f'{BASE_URL}/challenge'
|
||||
FAV_URL = f'{BASE_URL}/favorites/'
|
||||
|
||||
PATH_SEPARATOR = os.path.sep
|
||||
|
||||
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
|
||||
|
||||
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}/galleries'
|
||||
IMAGE_URL_MIRRORS = [
|
||||
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}'
|
||||
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}'
|
||||
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}'
|
||||
f'{urlparse(BASE_URL).scheme}://i2.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i4.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i6.{urlparse(BASE_URL).hostname}',
|
||||
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
|
||||
]
|
||||
|
||||
NHENTAI_HOME = get_nhentai_home()
|
||||
|
@ -1,4 +1,5 @@
|
||||
# coding: utf-8
|
||||
import os
|
||||
|
||||
from tabulate import tabulate
|
||||
|
||||
@ -11,6 +12,7 @@ EXT_MAP = {
|
||||
'j': 'jpg',
|
||||
'p': 'png',
|
||||
'g': 'gif',
|
||||
'w': 'webp',
|
||||
}
|
||||
|
||||
|
||||
@ -27,11 +29,12 @@ class DoujinshiInfo(dict):
|
||||
|
||||
|
||||
class Doujinshi(object):
|
||||
def __init__(self, name=None, pretty_name=None, id=None, img_id=None,
|
||||
def __init__(self, name=None, pretty_name=None, id=None, favorite_counts=0, img_id=None,
|
||||
ext='', pages=0, name_format='[%i][%a][%t]', **kwargs):
|
||||
self.name = name
|
||||
self.pretty_name = pretty_name
|
||||
self.id = id
|
||||
self.favorite_counts = favorite_counts
|
||||
self.img_id = img_id
|
||||
self.ext = ext
|
||||
self.pages = pages
|
||||
@ -43,6 +46,7 @@ class Doujinshi(object):
|
||||
name_format = name_format.replace('%ag', format_filename(ag_value))
|
||||
|
||||
name_format = name_format.replace('%i', format_filename(str(self.id)))
|
||||
name_format = name_format.replace('%f', format_filename(str(self.favorite_counts)))
|
||||
name_format = name_format.replace('%a', format_filename(self.info.artists))
|
||||
name_format = name_format.replace('%g', format_filename(self.info.groups))
|
||||
|
||||
@ -53,13 +57,15 @@ class Doujinshi(object):
|
||||
|
||||
self.table = [
|
||||
['Parodies', self.info.parodies],
|
||||
['Doujinshi', self.name],
|
||||
['Title', self.name],
|
||||
['Subtitle', self.info.subtitle],
|
||||
['Date', self.info.date],
|
||||
['Characters', self.info.characters],
|
||||
['Authors', self.info.artists],
|
||||
['Groups', self.info.groups],
|
||||
['Languages', self.info.languages],
|
||||
['Tags', self.info.tags],
|
||||
['Favorite Counts', self.favorite_counts],
|
||||
['URL', self.url],
|
||||
['Pages', self.pages],
|
||||
]
|
||||
@ -70,7 +76,35 @@ class Doujinshi(object):
|
||||
def show(self):
|
||||
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
|
||||
|
||||
def download(self, regenerate_cbz=False):
|
||||
def check_if_need_download(self, options):
|
||||
base_path = os.path.join(self.downloader.path, self.filename)
|
||||
|
||||
# regenerate, re-download
|
||||
if options.regenerate:
|
||||
return True
|
||||
|
||||
# pdf or cbz file exists, skip re-download
|
||||
# doujinshi directory may not exist b/c of --rm-origin-dir option set.
|
||||
# user should pass --regenerate option to get back origin dir.
|
||||
ret_pdf = ret_cbz = None
|
||||
if options.is_pdf:
|
||||
ret_pdf = os.path.exists(f'{base_path}.pdf') or os.path.exists(f'{base_path}/{self.filename}.pdf')
|
||||
|
||||
if options.is_cbz:
|
||||
ret_cbz = os.path.exists(f'{base_path}.cbz') or os.path.exists(f'{base_path}/{self.filename}.cbz')
|
||||
|
||||
ret = list(filter(lambda s: s is not None, [ret_cbz, ret_pdf]))
|
||||
if ret and all(ret):
|
||||
return False
|
||||
|
||||
# doujinshi directory doesn't exist, re-download
|
||||
if not (os.path.exists(base_path) and os.path.isdir(base_path)):
|
||||
return True
|
||||
|
||||
# fallback
|
||||
return True
|
||||
|
||||
def download(self):
|
||||
logger.info(f'Starting to download doujinshi: {self.name}')
|
||||
if self.downloader:
|
||||
download_queue = []
|
||||
@ -80,9 +114,10 @@ class Doujinshi(object):
|
||||
for i in range(1, min(self.pages, len(self.ext)) + 1):
|
||||
download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}')
|
||||
|
||||
self.downloader.start_download(download_queue, self.filename, regenerate_cbz=regenerate_cbz)
|
||||
return self.downloader.start_download(download_queue, self.filename)
|
||||
else:
|
||||
logger.critical('Downloader has not been loaded')
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -1,23 +1,17 @@
|
||||
# coding: utf-
|
||||
|
||||
import multiprocessing
|
||||
import signal
|
||||
|
||||
import sys
|
||||
import os
|
||||
import requests
|
||||
import time
|
||||
import asyncio
|
||||
import httpx
|
||||
import urllib3.exceptions
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from nhentai import constant
|
||||
from nhentai.logger import logger
|
||||
from nhentai.parser import request
|
||||
from nhentai.utils import Singleton
|
||||
from nhentai.utils import Singleton, async_request
|
||||
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
semaphore = multiprocessing.Semaphore(1)
|
||||
|
||||
|
||||
class NHentaiImageNotExistException(Exception):
|
||||
@ -40,65 +34,75 @@ def download_callback(result):
|
||||
|
||||
|
||||
class Downloader(Singleton):
|
||||
|
||||
def __init__(self, path='', size=5, timeout=30, delay=0):
|
||||
self.size = size
|
||||
def __init__(self, path='', threads=5, timeout=30, delay=0):
|
||||
self.threads = threads
|
||||
self.path = str(path)
|
||||
self.timeout = timeout
|
||||
self.delay = delay
|
||||
self.folder = None
|
||||
self.semaphore = None
|
||||
|
||||
def download(self, url, folder='', filename='', retried=0, proxy=None):
|
||||
if self.delay:
|
||||
time.sleep(self.delay)
|
||||
async def fiber(self, tasks):
|
||||
self.semaphore = asyncio.Semaphore(self.threads)
|
||||
for completed_task in asyncio.as_completed(tasks):
|
||||
try:
|
||||
result = await completed_task
|
||||
if result[1]:
|
||||
logger.info(f'{result[1]} download completed')
|
||||
else:
|
||||
logger.warning(f'{result[1]} download failed, return value {result[0]}')
|
||||
except Exception as e:
|
||||
logger.error(f'An error occurred: {e}')
|
||||
|
||||
async def _semaphore_download(self, *args, **kwargs):
|
||||
async with self.semaphore:
|
||||
return await self.download(*args, **kwargs)
|
||||
|
||||
async def download(self, url, folder='', filename='', retried=0, proxy=None, length=0):
|
||||
logger.info(f'Starting to download {url} ...')
|
||||
|
||||
if self.delay:
|
||||
await asyncio.sleep(self.delay)
|
||||
|
||||
filename = filename if filename else os.path.basename(urlparse(url).path)
|
||||
base_filename, extension = os.path.splitext(filename)
|
||||
filename = base_filename.zfill(length) + extension
|
||||
|
||||
save_file_path = os.path.join(self.folder, filename)
|
||||
|
||||
save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
|
||||
try:
|
||||
if os.path.exists(save_file_path):
|
||||
logger.warning(f'Ignored exists file: {save_file_path}')
|
||||
logger.warning(f'Skipped download: {save_file_path} already exists')
|
||||
return 1, url
|
||||
|
||||
response = None
|
||||
with open(save_file_path, "wb") as f:
|
||||
i = 0
|
||||
while i < 10:
|
||||
try:
|
||||
response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy)
|
||||
if response.status_code != 200:
|
||||
path = urlparse(url).path
|
||||
for mirror in constant.IMAGE_URL_MIRRORS:
|
||||
print(f'{mirror}{path}')
|
||||
mirror_url = f'{mirror}{path}'
|
||||
response = request('get', mirror_url, stream=True,
|
||||
timeout=self.timeout, proxies=proxy)
|
||||
if response.status_code == 200:
|
||||
break
|
||||
response = await async_request('GET', url, timeout=self.timeout, proxies=proxy)
|
||||
|
||||
except Exception as e:
|
||||
i += 1
|
||||
if not i < 10:
|
||||
logger.critical(str(e))
|
||||
return 0, None
|
||||
continue
|
||||
if response.status_code != 200:
|
||||
path = urlparse(url).path
|
||||
for mirror in constant.IMAGE_URL_MIRRORS:
|
||||
logger.info(f"Try mirror: {mirror}{path}")
|
||||
mirror_url = f'{mirror}{path}'
|
||||
response = await async_request('GET', mirror_url, timeout=self.timeout, proxies=proxy)
|
||||
if response.status_code == 200:
|
||||
break
|
||||
|
||||
break
|
||||
if not await self.save(filename, response):
|
||||
logger.error(f'Can not download image {url}')
|
||||
return 1, url
|
||||
|
||||
length = response.headers.get('content-length')
|
||||
if length is None:
|
||||
f.write(response.content)
|
||||
else:
|
||||
for chunk in response.iter_content(2048):
|
||||
f.write(chunk)
|
||||
|
||||
except (requests.HTTPError, requests.Timeout) as e:
|
||||
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
|
||||
if retried < 3:
|
||||
logger.warning(f'Warning: {e}, retrying({retried}) ...')
|
||||
return 0, self.download(url=url, folder=folder, filename=filename,
|
||||
retried=retried+1, proxy=proxy)
|
||||
logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
|
||||
return await self.download(
|
||||
url=url,
|
||||
folder=folder,
|
||||
filename=filename,
|
||||
retried=retried + 1,
|
||||
proxy=proxy,
|
||||
)
|
||||
else:
|
||||
return 0, None
|
||||
logger.warning(f'Download {filename} failed with 3 times retried, skipped')
|
||||
return 0, url
|
||||
|
||||
except NHentaiImageNotExistException as e:
|
||||
os.remove(save_file_path)
|
||||
@ -106,59 +110,59 @@ class Downloader(Singleton):
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
logger.error(f"Exception type: {type(e)}")
|
||||
traceback.print_stack()
|
||||
logger.critical(str(e))
|
||||
return 0, None
|
||||
return 0, url
|
||||
|
||||
except KeyboardInterrupt:
|
||||
return -3, None
|
||||
return -3, url
|
||||
|
||||
return 1, url
|
||||
|
||||
def start_download(self, queue, folder='', regenerate_cbz=False):
|
||||
if not isinstance(folder, (str, )):
|
||||
async def save(self, save_file_path, response) -> bool:
|
||||
if response is None:
|
||||
logger.error('Error: Response is None')
|
||||
return False
|
||||
save_file_path = os.path.join(self.folder, save_file_path)
|
||||
with open(save_file_path, 'wb') as f:
|
||||
if response is not None:
|
||||
length = response.headers.get('content-length')
|
||||
if length is None:
|
||||
f.write(response.content)
|
||||
else:
|
||||
async for chunk in response.aiter_bytes(2048):
|
||||
f.write(chunk)
|
||||
return True
|
||||
|
||||
def start_download(self, queue, folder='') -> bool:
|
||||
if not isinstance(folder, (str,)):
|
||||
folder = str(folder)
|
||||
|
||||
if self.path:
|
||||
folder = os.path.join(self.path, folder)
|
||||
|
||||
if os.path.exists(folder + '.cbz'):
|
||||
if not regenerate_cbz:
|
||||
logger.warning(f'CBZ file "{folder}.cbz" exists, ignored download request')
|
||||
return
|
||||
|
||||
logger.info(f'Doujinshi will be saved at "{folder}"')
|
||||
if not os.path.exists(folder):
|
||||
try:
|
||||
os.makedirs(folder)
|
||||
except EnvironmentError as e:
|
||||
logger.critical(str(e))
|
||||
self.folder = folder
|
||||
|
||||
else:
|
||||
logger.warning(f'Path "{folder}" already exist.')
|
||||
if os.getenv('DEBUG', None) == 'NODOWNLOAD':
|
||||
# Assuming we want to continue with rest of process.
|
||||
return True
|
||||
|
||||
queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
|
||||
digit_length = len(str(len(queue)))
|
||||
logger.info(f'Total download pages: {len(queue)}')
|
||||
coroutines = [
|
||||
self._semaphore_download(url, filename=os.path.basename(urlparse(url).path), length=digit_length)
|
||||
for url in queue
|
||||
]
|
||||
|
||||
pool = multiprocessing.Pool(self.size, init_worker)
|
||||
[pool.apply_async(download_wrapper, args=item) for item in queue]
|
||||
# Prevent coroutines infection
|
||||
asyncio.run(self.fiber(coroutines))
|
||||
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
|
||||
def download_wrapper(obj, url, folder='', proxy=None):
|
||||
if sys.platform == 'darwin' or semaphore.get_value():
|
||||
return Downloader.download(obj, url=url, folder=folder, proxy=proxy)
|
||||
else:
|
||||
return -3, None
|
||||
|
||||
|
||||
def init_worker():
|
||||
signal.signal(signal.SIGINT, subprocess_signal)
|
||||
|
||||
|
||||
def subprocess_signal(sig, frame):
|
||||
if semaphore.acquire(timeout=1):
|
||||
logger.warning('Ctrl-C pressed, exiting sub processes ...')
|
||||
|
||||
raise KeyboardInterrupt
|
||||
return True
|
||||
|
@ -135,30 +135,36 @@ def doujinshi_parser(id_, counter=0):
|
||||
logger.warning(f'Error: {e}, ignored')
|
||||
return None
|
||||
|
||||
# print(response)
|
||||
html = BeautifulSoup(response, 'html.parser')
|
||||
doujinshi_info = html.find('div', attrs={'id': 'info'})
|
||||
|
||||
title = doujinshi_info.find('h1').text
|
||||
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
|
||||
subtitle = doujinshi_info.find('h2')
|
||||
favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')
|
||||
|
||||
doujinshi['name'] = title
|
||||
doujinshi['pretty_name'] = pretty_name
|
||||
doujinshi['subtitle'] = subtitle.text if subtitle else ''
|
||||
doujinshi['favorite_counts'] = int(favorite_counts.text.strip()) if favorite_counts else 0
|
||||
|
||||
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
|
||||
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif)$',
|
||||
doujinshi_cover.a.img.attrs['data-src'])
|
||||
# img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
|
||||
# doujinshi_cover.a.img.attrs['data-src'])
|
||||
img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
|
||||
|
||||
ext = []
|
||||
for i in html.find_all('div', attrs={'class': 'thumb-container'}):
|
||||
_, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1)
|
||||
ext.append(ext_name)
|
||||
base_name = os.path.basename(i.img.attrs['data-src'])
|
||||
ext_name = base_name.split('.')
|
||||
if len(ext_name) == 3:
|
||||
ext.append(ext_name[1])
|
||||
else:
|
||||
ext.append(ext_name[-1])
|
||||
|
||||
if not img_id:
|
||||
logger.critical('Tried yo get image id failed')
|
||||
sys.exit(1)
|
||||
logger.critical(f'Tried yo get image id failed of id: {id_}')
|
||||
return None
|
||||
|
||||
doujinshi['img_id'] = img_id.group(1)
|
||||
doujinshi['ext'] = ext
|
||||
@ -185,53 +191,6 @@ def doujinshi_parser(id_, counter=0):
|
||||
return doujinshi
|
||||
|
||||
|
||||
def legacy_doujinshi_parser(id_):
|
||||
if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
|
||||
raise Exception(f'Doujinshi id({id_}) is not valid')
|
||||
|
||||
id_ = int(id_)
|
||||
logger.info(f'Fetching information of doujinshi id {id_}')
|
||||
doujinshi = dict()
|
||||
doujinshi['id'] = id_
|
||||
url = f'{constant.DETAIL_URL}/{id_}'
|
||||
i = 0
|
||||
while 5 > i:
|
||||
try:
|
||||
response = request('get', url).json()
|
||||
except Exception as e:
|
||||
i += 1
|
||||
if not i < 5:
|
||||
logger.critical(str(e))
|
||||
sys.exit(1)
|
||||
continue
|
||||
break
|
||||
|
||||
doujinshi['name'] = response['title']['english']
|
||||
doujinshi['subtitle'] = response['title']['japanese']
|
||||
doujinshi['img_id'] = response['media_id']
|
||||
doujinshi['ext'] = ''.join([i['t'] for i in response['images']['pages']])
|
||||
doujinshi['pages'] = len(response['images']['pages'])
|
||||
|
||||
# gain information of the doujinshi
|
||||
needed_fields = ['character', 'artist', 'language', 'tag', 'parody', 'group', 'category']
|
||||
for tag in response['tags']:
|
||||
tag_type = tag['type']
|
||||
if tag_type in needed_fields:
|
||||
if tag_type == 'tag':
|
||||
if tag_type not in doujinshi:
|
||||
doujinshi[tag_type] = {}
|
||||
|
||||
tag['name'] = tag['name'].replace(' ', '-')
|
||||
tag['name'] = tag['name'].lower()
|
||||
doujinshi[tag_type][tag['name']] = tag['id']
|
||||
elif tag_type not in doujinshi:
|
||||
doujinshi[tag_type] = tag['name']
|
||||
else:
|
||||
doujinshi[tag_type] += ', ' + tag['name']
|
||||
|
||||
return doujinshi
|
||||
|
||||
|
||||
def print_doujinshi(doujinshi_list):
|
||||
if not doujinshi_list:
|
||||
return
|
||||
|
@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
import json
|
||||
import os
|
||||
|
||||
from nhentai.constant import PATH_SEPARATOR
|
||||
from xml.sax.saxutils import escape
|
||||
from nhentai.constant import LANGUAGE_ISO
|
||||
|
||||
@ -8,6 +10,8 @@ from nhentai.constant import LANGUAGE_ISO
|
||||
def serialize_json(doujinshi, output_dir):
|
||||
metadata = {'title': doujinshi.name,
|
||||
'subtitle': doujinshi.info.subtitle}
|
||||
if doujinshi.info.favorite_counts:
|
||||
metadata['favorite_counts'] = doujinshi.favorite_counts
|
||||
if doujinshi.info.date:
|
||||
metadata['upload_date'] = doujinshi.info.date
|
||||
if doujinshi.info.parodies:
|
||||
@ -22,7 +26,7 @@ def serialize_json(doujinshi, output_dir):
|
||||
metadata['group'] = [i.strip() for i in doujinshi.info.groups.split(',')]
|
||||
if doujinshi.info.languages:
|
||||
metadata['language'] = [i.strip() for i in doujinshi.info.languages.split(',')]
|
||||
metadata['category'] = doujinshi.info.categories
|
||||
metadata['category'] = [i.strip() for i in doujinshi.info.categories.split(',')]
|
||||
metadata['URL'] = doujinshi.url
|
||||
metadata['Pages'] = doujinshi.pages
|
||||
|
||||
@ -44,6 +48,7 @@ def serialize_comic_xml(doujinshi, output_dir):
|
||||
xml_write_simple_tag(f, 'PageCount', doujinshi.pages)
|
||||
xml_write_simple_tag(f, 'URL', doujinshi.url)
|
||||
xml_write_simple_tag(f, 'NhentaiId', doujinshi.id)
|
||||
xml_write_simple_tag(f, 'Favorites', doujinshi.favorite_counts)
|
||||
xml_write_simple_tag(f, 'Genre', doujinshi.info.categories)
|
||||
|
||||
xml_write_simple_tag(f, 'BlackAndWhite', 'No' if doujinshi.info.tags and
|
||||
@ -79,7 +84,7 @@ def xml_write_simple_tag(f, name, val, indent=1):
|
||||
|
||||
def merge_json():
|
||||
lst = []
|
||||
output_dir = "./"
|
||||
output_dir = f".{PATH_SEPARATOR}"
|
||||
os.chdir(output_dir)
|
||||
doujinshi_dirs = next(os.walk('.'))[1]
|
||||
for folder in doujinshi_dirs:
|
||||
|
226
nhentai/utils.py
226
nhentai/utils.py
@ -5,15 +5,21 @@ import re
|
||||
import os
|
||||
import zipfile
|
||||
import shutil
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
import sqlite3
|
||||
import urllib.parse
|
||||
from typing import Tuple
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
from nhentai import constant
|
||||
from nhentai.constant import PATH_SEPARATOR
|
||||
from nhentai.logger import logger
|
||||
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
|
||||
|
||||
|
||||
MAX_FIELD_LENGTH = 100
|
||||
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
|
||||
|
||||
|
||||
def request(method, url, **kwargs):
|
||||
@ -30,15 +36,40 @@ def request(method, url, **kwargs):
|
||||
return getattr(session, method)(url, verify=False, **kwargs)
|
||||
|
||||
|
||||
async def async_request(method, url, proxies = None, **kwargs):
|
||||
headers = {
|
||||
'Referer': constant.LOGIN_URL,
|
||||
'User-Agent': constant.CONFIG['useragent'],
|
||||
'Cookie': constant.CONFIG['cookie'],
|
||||
}
|
||||
|
||||
if proxies is None:
|
||||
proxies = constant.CONFIG['proxy']
|
||||
|
||||
if proxies.get('http') == '' and proxies.get('https') == '':
|
||||
proxies = None
|
||||
|
||||
if proxies:
|
||||
_proxies = {f'{k}://': v for k, v in proxies.items() if v}
|
||||
proxies = _proxies
|
||||
|
||||
async with httpx.AsyncClient(headers=headers, verify=False, proxies=proxies, **kwargs) as client:
|
||||
response = await client.request(method, url, **kwargs)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def check_cookie():
|
||||
response = request('get', constant.BASE_URL)
|
||||
|
||||
if response.status_code == 403 and 'Just a moment...' in response.text:
|
||||
logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
|
||||
sys.exit(1)
|
||||
|
||||
username = re.findall('"/users/[0-9]+/(.*?)"', response.text)
|
||||
if not username:
|
||||
logger.warning('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
|
||||
logger.warning(
|
||||
'Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
|
||||
else:
|
||||
logger.log(16, f'Login successfully! Your username: {username[0]}')
|
||||
|
||||
@ -64,13 +95,33 @@ def readfile(path):
|
||||
return file.read()
|
||||
|
||||
|
||||
def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
||||
image_html = ''
|
||||
def parse_doujinshi_obj(
|
||||
output_dir: str,
|
||||
doujinshi_obj=None,
|
||||
file_type: str = ''
|
||||
) -> Tuple[str, str]:
|
||||
|
||||
filename = f'.{PATH_SEPARATOR}doujinshi.{file_type}'
|
||||
if doujinshi_obj is not None:
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
_filename = f'{doujinshi_obj.filename}.{file_type}'
|
||||
|
||||
if file_type == 'cbz':
|
||||
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
|
||||
|
||||
if file_type == 'pdf':
|
||||
_filename = _filename.replace('/', '-')
|
||||
|
||||
filename = os.path.join(output_dir, _filename)
|
||||
else:
|
||||
doujinshi_dir = '.'
|
||||
doujinshi_dir = f'.{PATH_SEPARATOR}'
|
||||
|
||||
return doujinshi_dir, filename
|
||||
|
||||
|
||||
def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
||||
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
|
||||
image_html = ''
|
||||
|
||||
if not os.path.exists(doujinshi_dir):
|
||||
logger.warning(f'Path "{doujinshi_dir}" does not exist, creating.')
|
||||
@ -83,7 +134,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
||||
file_list.sort()
|
||||
|
||||
for image in file_list:
|
||||
if not os.path.splitext(image)[1] in ('.jpg', '.png'):
|
||||
if not os.path.splitext(image)[1] in EXTENSIONS:
|
||||
continue
|
||||
image_html += f'<img src="{image}" class="image-item"/>\n'
|
||||
|
||||
@ -107,7 +158,28 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
||||
logger.warning(f'Writing HTML Viewer failed ({e})')
|
||||
|
||||
|
||||
def generate_main_html(output_dir='./'):
|
||||
def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
|
||||
if not file_type:
|
||||
raise RuntimeError('no file_type specified')
|
||||
|
||||
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
|
||||
|
||||
for fn in os.listdir(doujinshi_dir):
|
||||
file_path = os.path.join(doujinshi_dir, fn)
|
||||
_, ext = os.path.splitext(file_path)
|
||||
if ext in ['.pdf', '.cbz']:
|
||||
continue
|
||||
|
||||
if os.path.isfile(file_path):
|
||||
try:
|
||||
os.remove(file_path)
|
||||
except Exception as e:
|
||||
print(f"Error deleting file: {e}")
|
||||
|
||||
shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
|
||||
|
||||
|
||||
def generate_main_html(output_dir=f'.{PATH_SEPARATOR}'):
|
||||
"""
|
||||
Generate a main html to show all the contains doujinshi.
|
||||
With a link to their `index.html`.
|
||||
@ -148,7 +220,7 @@ def generate_main_html(output_dir='./'):
|
||||
else:
|
||||
title = 'nHentai HTML Viewer'
|
||||
|
||||
image_html += element.format(FOLDER=folder, IMAGE=image, TITLE=title)
|
||||
image_html += element.format(FOLDER=urllib.parse.quote(folder), IMAGE=image, TITLE=title)
|
||||
if image_html == '':
|
||||
logger.warning('No index.html found, --gen-main paused.')
|
||||
return
|
||||
@ -158,94 +230,50 @@ def generate_main_html(output_dir='./'):
|
||||
f.write(data.encode('utf-8'))
|
||||
shutil.copy(os.path.dirname(__file__) + '/viewer/logo.png', './')
|
||||
set_js_database()
|
||||
logger.log(16, f'Main Viewer has been written to "{output_dir}main.html"')
|
||||
output_dir = output_dir[:-1] if output_dir.endswith('/') else output_dir
|
||||
logger.log(16, f'Main Viewer has been written to "{output_dir}/main.html"')
|
||||
except Exception as e:
|
||||
logger.warning(f'Writing Main Viewer failed ({e})')
|
||||
|
||||
|
||||
def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True, move_to_folder=False):
|
||||
if doujinshi_obj is not None:
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
if os.path.exists(doujinshi_dir+".cbz"):
|
||||
logger.warning(f'Comic Book CBZ file exists, skip "{doujinshi_dir}"')
|
||||
return
|
||||
if write_comic_info:
|
||||
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
|
||||
cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), f'{doujinshi_obj.filename}.cbz')
|
||||
else:
|
||||
cbz_filename = './doujinshi.cbz'
|
||||
doujinshi_dir = '.'
|
||||
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
|
||||
|
||||
file_list = os.listdir(doujinshi_dir)
|
||||
file_list.sort()
|
||||
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
|
||||
|
||||
logger.info(f'Writing CBZ file to path: {cbz_filename}')
|
||||
with zipfile.ZipFile(cbz_filename, 'w') as cbz_pf:
|
||||
for image in file_list:
|
||||
image_path = os.path.join(doujinshi_dir, image)
|
||||
cbz_pf.write(image_path, image)
|
||||
|
||||
if rm_origin_dir:
|
||||
shutil.rmtree(doujinshi_dir, ignore_errors=True)
|
||||
|
||||
if move_to_folder:
|
||||
for filename in os.listdir(doujinshi_dir):
|
||||
file_path = os.path.join(doujinshi_dir, filename)
|
||||
if os.path.isfile(file_path):
|
||||
try:
|
||||
os.remove(file_path)
|
||||
except Exception as e:
|
||||
print(f"Error deleting file: {e}")
|
||||
|
||||
shutil.move(cbz_filename, doujinshi_dir)
|
||||
|
||||
logger.log(16, f'Comic Book CBZ file has been written to "{doujinshi_dir}"')
|
||||
|
||||
|
||||
def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, move_to_folder=False):
|
||||
try:
|
||||
import img2pdf
|
||||
|
||||
"""Write images to a PDF file using img2pdf."""
|
||||
if doujinshi_obj is not None:
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
filename = doujinshi_obj.filename.replace('/', '-')
|
||||
pdf_filename = os.path.join(
|
||||
os.path.join(doujinshi_dir, '..'),
|
||||
f'{filename}.pdf'
|
||||
)
|
||||
else:
|
||||
pdf_filename = './doujinshi.pdf'
|
||||
doujinshi_dir = '.'
|
||||
if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
|
||||
logger.info(f'Skipped {file_type} file generation: {doujinshi_dir}.{file_type} already exists')
|
||||
return
|
||||
|
||||
if file_type == 'cbz':
|
||||
file_list = os.listdir(doujinshi_dir)
|
||||
file_list.sort()
|
||||
|
||||
logger.info(f'Writing PDF file to path: {pdf_filename}')
|
||||
with open(pdf_filename, 'wb') as pdf_f:
|
||||
full_path_list = (
|
||||
[os.path.join(doujinshi_dir, image) for image in file_list]
|
||||
)
|
||||
pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
|
||||
logger.info(f'Writing CBZ file to path: {filename}')
|
||||
with zipfile.ZipFile(filename, 'w') as cbz_pf:
|
||||
for image in file_list:
|
||||
image_path = os.path.join(doujinshi_dir, image)
|
||||
cbz_pf.write(image_path, image)
|
||||
|
||||
if rm_origin_dir:
|
||||
shutil.rmtree(doujinshi_dir, ignore_errors=True)
|
||||
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
|
||||
elif file_type == 'pdf':
|
||||
try:
|
||||
import img2pdf
|
||||
|
||||
if move_to_folder:
|
||||
for filename in os.listdir(doujinshi_dir):
|
||||
file_path = os.path.join(doujinshi_dir, filename)
|
||||
if os.path.isfile(file_path):
|
||||
try:
|
||||
os.remove(file_path)
|
||||
except Exception as e:
|
||||
print(f"Error deleting file: {e}")
|
||||
"""Write images to a PDF file using img2pdf."""
|
||||
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(EXTENSIONS)]
|
||||
file_list.sort()
|
||||
|
||||
shutil.move(pdf_filename, doujinshi_dir)
|
||||
logger.info(f'Writing PDF file to path: {filename}')
|
||||
with open(filename, 'wb') as pdf_f:
|
||||
full_path_list = (
|
||||
[os.path.join(doujinshi_dir, image) for image in file_list]
|
||||
)
|
||||
pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
|
||||
|
||||
logger.log(16, f'PDF file has been written to "{doujinshi_dir}"')
|
||||
logger.log(16, f'PDF file has been written to "{filename}"')
|
||||
|
||||
except ImportError:
|
||||
logger.error("Please install img2pdf package by using pip.")
|
||||
except ImportError:
|
||||
logger.error("Please install img2pdf package by using pip.")
|
||||
|
||||
|
||||
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
|
||||
@ -253,7 +281,7 @@ def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
|
||||
It used to be a whitelist approach allowed only alphabet and a part of symbols.
|
||||
but most doujinshi's names include Japanese 2-byte characters and these was rejected.
|
||||
so it is using blacklist approach now.
|
||||
if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' ').
|
||||
if filename include forbidden characters (\'/:,;*?"<>|) ,it replaces space character(" ").
|
||||
"""
|
||||
# maybe you can use `--format` to select a suitable filename
|
||||
|
||||
@ -276,7 +304,7 @@ def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
|
||||
return filename
|
||||
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
def signal_handler(_signal, _frame):
|
||||
logger.error('Ctrl-C signal received. Stopping...')
|
||||
sys.exit(1)
|
||||
|
||||
@ -284,7 +312,8 @@ def signal_handler(signal, frame):
|
||||
def paging(page_string):
|
||||
# 1,3-5,14 -> [1, 3, 4, 5, 14]
|
||||
if not page_string:
|
||||
return []
|
||||
# default, the first page
|
||||
return [1]
|
||||
|
||||
page_list = []
|
||||
for i in page_string.split(','):
|
||||
@ -301,32 +330,27 @@ def paging(page_string):
|
||||
return page_list
|
||||
|
||||
|
||||
def generate_metadata_file(output_dir, table, doujinshi_obj=None):
|
||||
logger.info('Writing Metadata Info')
|
||||
def generate_metadata_file(output_dir, doujinshi_obj):
|
||||
|
||||
if doujinshi_obj is not None:
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
else:
|
||||
doujinshi_dir = '.'
|
||||
info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
|
||||
|
||||
logger.info(doujinshi_dir)
|
||||
|
||||
f = open(os.path.join(doujinshi_dir, 'info.txt'), 'w', encoding='utf-8')
|
||||
f = open(info_txt_path, 'w', encoding='utf-8')
|
||||
|
||||
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
|
||||
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
|
||||
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
||||
'TAGS', 'FAVORITE COUNTS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
|
||||
'SERIES', 'PARODY', 'URL']
|
||||
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
|
||||
'LANGUAGE', 'TAGS', 'URL', 'PAGES']
|
||||
|
||||
for i in range(len(fields)):
|
||||
f.write(f'{fields[i]}: ')
|
||||
if fields[i] in special_fields:
|
||||
f.write(str(table[special_fields.index(fields[i])][1]))
|
||||
f.write('\n')
|
||||
temp_dict = CaseInsensitiveDict(dict(doujinshi_obj.table))
|
||||
for i in fields:
|
||||
v = temp_dict.get(i)
|
||||
v = temp_dict.get(f'{i}s') if v is None else v
|
||||
v = doujinshi_obj.info.get(i.lower(), None) if v is None else v
|
||||
v = doujinshi_obj.info.get(f'{i.lower()}s', "Unknown") if v is None else v
|
||||
f.write(f'{i}: {v}\n')
|
||||
|
||||
f.close()
|
||||
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
|
||||
|
||||
|
||||
class DB(object):
|
||||
|
119
poetry.lock
generated
119
poetry.lock
generated
@ -1,4 +1,26 @@
|
||||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "anyio"
|
||||
version = "4.5.2"
|
||||
description = "High level compatibility layer for multiple asynchronous event loop implementations"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "anyio-4.5.2-py3-none-any.whl", hash = "sha256:c011ee36bc1e8ba40e5a81cb9df91925c218fe9b778554e0b56a21e1b5d4716f"},
|
||||
{file = "anyio-4.5.2.tar.gz", hash = "sha256:23009af4ed04ce05991845451e11ef02fc7c5ed29179ac9a420e5ad0ac7ddc5b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
|
||||
idna = ">=2.8"
|
||||
sniffio = ">=1.1"
|
||||
typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
|
||||
|
||||
[package.extras]
|
||||
doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
|
||||
test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"]
|
||||
trio = ["trio (>=0.26.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
@ -126,6 +148,77 @@ files = [
|
||||
{file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.2.2"
|
||||
description = "Backport of PEP 654 (exception groups)"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
|
||||
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
test = ["pytest (>=6)"]
|
||||
|
||||
[[package]]
|
||||
name = "h11"
|
||||
version = "0.14.0"
|
||||
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
|
||||
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "httpcore"
|
||||
version = "1.0.7"
|
||||
description = "A minimal low-level HTTP client."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
|
||||
{file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
certifi = "*"
|
||||
h11 = ">=0.13,<0.15"
|
||||
|
||||
[package.extras]
|
||||
asyncio = ["anyio (>=4.0,<5.0)"]
|
||||
http2 = ["h2 (>=3,<5)"]
|
||||
socks = ["socksio (==1.*)"]
|
||||
trio = ["trio (>=0.22.0,<1.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "httpx"
|
||||
version = "0.27.2"
|
||||
description = "The next generation HTTP client."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
|
||||
{file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
anyio = "*"
|
||||
certifi = "*"
|
||||
httpcore = "==1.*"
|
||||
idna = "*"
|
||||
sniffio = "*"
|
||||
|
||||
[package.extras]
|
||||
brotli = ["brotli", "brotlicffi"]
|
||||
cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
|
||||
http2 = ["h2 (>=3,<5)"]
|
||||
socks = ["socksio (==1.*)"]
|
||||
zstd = ["zstandard (>=0.18.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.7"
|
||||
@ -169,6 +262,17 @@ urllib3 = ">=1.21.1,<3"
|
||||
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
|
||||
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
|
||||
|
||||
[[package]]
|
||||
name = "sniffio"
|
||||
version = "1.3.1"
|
||||
description = "Sniff out which async library your code is running under"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
|
||||
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "soupsieve"
|
||||
version = "2.4"
|
||||
@ -194,6 +298,17 @@ files = [
|
||||
[package.extras]
|
||||
widechars = ["wcwidth"]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.12.2"
|
||||
description = "Backported and Experimental Type Hints for Python 3.8+"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
|
||||
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "1.26.19"
|
||||
@ -213,4 +328,4 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.8"
|
||||
content-hash = "0a1d5abd47a669c7a1f2dc7b43824a449e29ba94908a4338d2ea0f2dfb4f805e"
|
||||
content-hash = "a69dbf5dcfd6dcc5afc0fd2de4ab153841f7d210d4be60c426e332e36a79d679"
|
||||
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "nhentai"
|
||||
version = "0.5.7"
|
||||
version = "0.5.19"
|
||||
description = "nhentai doujinshi downloader"
|
||||
authors = ["Ricter Z <ricterzheng@gmail.com>"]
|
||||
license = "MIT"
|
||||
@ -14,8 +14,12 @@ beautifulsoup4 = "^4.11.2"
|
||||
tabulate = "^0.9.0"
|
||||
iso8601 = "^1.1.0"
|
||||
urllib3 = "^1.26.14"
|
||||
httpx = "0.27.2"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
nhentai = 'nhentai.command:main'
|
||||
|
29
qodana.yaml
Executable file
29
qodana.yaml
Executable file
@ -0,0 +1,29 @@
|
||||
#-------------------------------------------------------------------------------#
|
||||
# Qodana analysis is configured by qodana.yaml file #
|
||||
# https://www.jetbrains.com/help/qodana/qodana-yaml.html #
|
||||
#-------------------------------------------------------------------------------#
|
||||
version: "1.0"
|
||||
|
||||
#Specify inspection profile for code analysis
|
||||
profile:
|
||||
name: qodana.starter
|
||||
|
||||
#Enable inspections
|
||||
#include:
|
||||
# - name: <SomeEnabledInspectionId>
|
||||
|
||||
#Disable inspections
|
||||
#exclude:
|
||||
# - name: <SomeDisabledInspectionId>
|
||||
# paths:
|
||||
# - <path/where/not/run/inspection>
|
||||
|
||||
#Execute shell command before Qodana execution (Applied in CI/CD pipeline)
|
||||
#bootstrap: sh ./prepare-qodana.sh
|
||||
|
||||
#Install IDE plugins before Qodana execution (Applied in CI/CD pipeline)
|
||||
#plugins:
|
||||
# - id: <plugin.id> #(plugin id can be found at https://plugins.jetbrains.com)
|
||||
|
||||
#Specify Qodana linter for analysis (Applied in CI/CD pipeline)
|
||||
linter: jetbrains/qodana-python:2024.3
|
@ -1,3 +1,4 @@
|
||||
httpx==0.27.2
|
||||
requests
|
||||
soupsieve
|
||||
setuptools
|
||||
|
38
setup.py
38
setup.py
@ -1,38 +0,0 @@
|
||||
# coding: utf-8
|
||||
import codecs
|
||||
from setuptools import setup, find_packages
|
||||
from nhentai import __version__, __author__, __email__
|
||||
|
||||
|
||||
with open('requirements.txt') as f:
|
||||
requirements = [l for l in f.read().splitlines() if l]
|
||||
|
||||
|
||||
def long_description():
|
||||
with codecs.open('README.rst', 'rb') as readme:
|
||||
return readme.read().decode('utf-8')
|
||||
|
||||
|
||||
setup(
|
||||
name='nhentai',
|
||||
version=__version__,
|
||||
packages=find_packages(),
|
||||
|
||||
author=__author__,
|
||||
author_email=__email__,
|
||||
keywords=['nhentai', 'doujinshi', 'downloader'],
|
||||
description='nhentai.net doujinshis downloader',
|
||||
long_description=long_description(),
|
||||
url='https://github.com/RicterZ/nhentai',
|
||||
download_url='https://github.com/RicterZ/nhentai/tarball/master',
|
||||
include_package_data=True,
|
||||
zip_safe=False,
|
||||
|
||||
install_requires=requirements,
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'nhentai = nhentai.command:main',
|
||||
]
|
||||
},
|
||||
license='MIT',
|
||||
)
|
@ -20,7 +20,7 @@ class TestDownload(unittest.TestCase):
|
||||
def test_download(self):
|
||||
did = 440546
|
||||
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
|
||||
info.downloader = Downloader(path='/tmp', size=5)
|
||||
info.downloader = Downloader(path='/tmp', threads=5)
|
||||
info.download()
|
||||
|
||||
self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))
|
||||
|
Reference in New Issue
Block a user