This commit is contained in:
ricterz 2024-09-22 13:35:07 +08:00
parent 16bac45f02
commit cbf9448ed9
5 changed files with 83 additions and 112 deletions

View File

@ -118,8 +118,8 @@ def cmd_parser():
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file') help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
parser.add_option('--meta', dest='generate_metadata', action='store_true', parser.add_option('--meta', dest='generate_metadata', action='store_true',
help='generate a metadata file in doujinshi format') help='generate a metadata file in doujinshi format')
parser.add_option('--regenerate-cbz', dest='regenerate_cbz', action='store_true', default=False, parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
help='regenerate the cbz file if exists') help='regenerate the cbz or pdf file if exists')
# nhentai options # nhentai options
parser.add_option('--cookie', type='str', dest='cookie', action='store', parser.add_option('--cookie', type='str', dest='cookie', action='store',

View File

@ -11,7 +11,7 @@ from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader from nhentai.downloader import Downloader
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import BASE_URL from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_cbz, generate_main_html, generate_pdf, generate_metadata_file, \ from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
paging, check_cookie, signal_handler, DB paging, check_cookie, signal_handler, DB
@ -85,21 +85,22 @@ def main():
else: else:
continue continue
file_type = ''
if options.is_cbz: file_type = '.cbz'
elif options.is_pdf: file_type = '.pdf'
if not options.dryrun: if not options.dryrun:
doujinshi.downloader = downloader doujinshi.downloader = downloader
result = doujinshi.download(regenerate_cbz=options.regenerate_cbz, file_type=file_type)
result = doujinshi.download(skip_exists=not options.regenerate)
# Already downloaded; continue on with the other doujins. # Already downloaded; continue on with the other doujins.
if not result: continue if not result:
continue
if options.generate_metadata: if options.generate_metadata:
table = doujinshi.table table = doujinshi.table
result = generate_metadata_file(options.output_dir, table, doujinshi, file_type) result = generate_metadata_file(options.output_dir, table, doujinshi)
# Already downloaded; continue on with the other doujins. # Already downloaded; continue on with the other doujins.
if not result: continue # if cbz / pdf file exists, skip the download process?
# regenerate but not re-download?
if not result:
continue
if options.is_save_download_history: if options.is_save_download_history:
with DB() as db: with DB() as db:
@ -107,10 +108,14 @@ def main():
if not options.is_nohtml and not options.is_cbz and not options.is_pdf: if not options.is_nohtml and not options.is_cbz and not options.is_pdf:
generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template']) generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
elif options.is_cbz:
generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir, True, options.move_to_folder) if options.is_cbz:
elif options.is_pdf: generate_doc('cbz', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
generate_pdf(options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder) options.regenerate)
if options.is_pdf:
generate_doc('pdf', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
options.regenerate)
if options.main_viewer: if options.main_viewer:
generate_main_html(options.output_dir) generate_main_html(options.output_dir)

View File

@ -1,5 +1,5 @@
# coding: utf-8 # coding: utf-8
import datetime import os
from tabulate import tabulate from tabulate import tabulate
@ -72,8 +72,14 @@ class Doujinshi(object):
def show(self): def show(self):
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}') logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
def download(self, regenerate_cbz=False, file_type=''): def download(self, skip_exists=True):
logger.info(f'Starting to download doujinshi: {self.name}') logger.info(f'Starting to download doujinshi: {self.name}')
base_path = os.path.join(self.downloader.path, self.filename)
if (os.path.exists(base_path + '.pdf') or os.path.exists(base_path + '.cbz')) and skip_exists:
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {self.name}')
return False
if self.downloader: if self.downloader:
download_queue = [] download_queue = []
if len(self.ext) != self.pages: if len(self.ext) != self.pages:
@ -82,7 +88,7 @@ class Doujinshi(object):
for i in range(1, min(self.pages, len(self.ext)) + 1): for i in range(1, min(self.pages, len(self.ext)) + 1):
download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}') download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}')
return self.downloader.start_download(download_queue, self.filename, regenerate_cbz=regenerate_cbz, file_type=file_type) return self.downloader.start_download(download_queue, self.filename)
else: else:
logger.critical('Downloader has not been loaded') logger.critical('Downloader has not been loaded')
return False return False

View File

@ -115,18 +115,13 @@ class Downloader(Singleton):
return 1, url return 1, url
def start_download(self, queue, folder='') -> bool:
def start_download(self, queue, folder='', regenerate_cbz=False, file_type='') -> bool:
if not isinstance(folder, (str, )): if not isinstance(folder, (str, )):
folder = str(folder) folder = str(folder)
if self.path: if self.path:
folder = os.path.join(self.path, folder) folder = os.path.join(self.path, folder)
if file_type != '' and os.path.exists(folder + file_type) and not regenerate_cbz:
logger.warning(f'Skipped download: "{folder}{file_type}" already exists')
return False
logger.info(f'Doujinshi will be saved at "{folder}"') logger.info(f'Doujinshi will be saved at "{folder}"')
if not os.path.exists(folder): if not os.path.exists(folder):
try: try:

View File

@ -15,7 +15,6 @@ from nhentai import constant
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
MAX_FIELD_LENGTH = 100 MAX_FIELD_LENGTH = 100
@ -41,7 +40,8 @@ def check_cookie():
username = re.findall('"/users/[0-9]+/(.*?)"', response.text) username = re.findall('"/users/[0-9]+/(.*?)"', response.text)
if not username: if not username:
logger.warning('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie') logger.warning(
'Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
else: else:
logger.log(16, f'Login successfully! Your username: {username[0]}') logger.log(16, f'Login successfully! Your username: {username[0]}')
@ -69,38 +69,28 @@ def readfile(path):
def parse_doujinshi_obj( def parse_doujinshi_obj(
output_dir: str, output_dir: str,
doujinshi_obj = None, doujinshi_obj=None,
file_type: str = '', file_type: str = ''
write_comic_info = False ) -> Tuple[str, str]:
) -> Tuple[str, str, bool]:
doujinshi_dir = '.'
filename = './doujinshi' + file_type filename = './doujinshi' + file_type
already_downloaded = False
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
if doujinshi_obj is not None and file_type != '.html': if doujinshi_obj is not None:
if os.path.exists(doujinshi_dir + file_type): _filename = f'{doujinshi_obj.filename}.{file_type}'
already_downloaded = True
elif file_type != '':
_filename = f'{doujinshi_obj.filename}{file_type}'
if file_type == '.cbz' and write_comic_info: if file_type == 'cbz':
serialize_comic_xml(doujinshi_obj, doujinshi_dir) serialize_comic_xml(doujinshi_obj, doujinshi_dir)
if file_type == '.pdf': if file_type == 'pdf':
_filename = _filename.replace('/', '-') _filename = _filename.replace('/', '-')
filename = os.path.join(output_dir, _filename) filename = os.path.join(output_dir, _filename)
return doujinshi_dir, filename, already_downloaded return doujinshi_dir, filename
def generate_html(output_dir='.', doujinshi_obj=None, template='default'): def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html') doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
if already_downloaded:
logger.info(f'Skipped download: {doujinshi_dir} already exists')
return
image_html = '' image_html = ''
if not os.path.exists(doujinshi_dir): if not os.path.exists(doujinshi_dir):
@ -195,12 +185,16 @@ def generate_main_html(output_dir='./'):
logger.warning(f'Writing Main Viewer failed ({e})') logger.warning(f'Writing Main Viewer failed ({e})')
def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True, move_to_folder=False): def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir=False,
doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, '.cbz', write_comic_info) move_to_folder=False, regenerate=False):
if already_downloaded:
logger.info(f'Skipped download: {doujinshi_dir} already exists') doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
logger.info(f'Skipped download: {doujinshi_dir}.{file_type} already exists')
return return
if file_type == 'cbz':
file_list = os.listdir(doujinshi_dir) file_list = os.listdir(doujinshi_dir)
file_list.sort() file_list.sort()
@ -210,33 +204,12 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_
image_path = os.path.join(doujinshi_dir, image) image_path = os.path.join(doujinshi_dir, image)
cbz_pf.write(image_path, image) cbz_pf.write(image_path, image)
if rm_origin_dir:
shutil.rmtree(doujinshi_dir, ignore_errors=True)
if move_to_folder:
for filename in os.listdir(doujinshi_dir):
file_path = os.path.join(doujinshi_dir, filename)
if os.path.isfile(file_path):
try:
os.remove(file_path)
except Exception as e:
print(f"Error deleting file: {e}")
shutil.move(filename, doujinshi_dir)
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"') logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
elif file_type == 'pdf':
def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, move_to_folder=False):
try: try:
import img2pdf import img2pdf
"""Write images to a PDF file using img2pdf.""" """Write images to a PDF file using img2pdf."""
doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, '.pdf')
if already_downloaded:
logger.info(f'Skipped download: {doujinshi_dir} already exists')
return
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))] file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]
file_list.sort() file_list.sort()
@ -247,6 +220,11 @@ def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, move_t
) )
pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid)) pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
logger.log(16, f'PDF file has been written to "{filename}"')
except ImportError:
logger.error("Please install img2pdf package by using pip.")
if rm_origin_dir: if rm_origin_dir:
shutil.rmtree(doujinshi_dir, ignore_errors=True) shutil.rmtree(doujinshi_dir, ignore_errors=True)
@ -261,11 +239,6 @@ def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, move_t
shutil.move(filename, doujinshi_dir) shutil.move(filename, doujinshi_dir)
logger.log(16, f'PDF file has been written to "{filename}"')
except ImportError:
logger.error("Please install img2pdf package by using pip.")
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False): def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
""" """
@ -320,20 +293,12 @@ def paging(page_string):
return page_list return page_list
def generate_metadata_file(output_dir, table, doujinshi_obj=None, check_file_type=''): def generate_metadata_file(output_dir, table, doujinshi_obj=None):
logger.info('Writing Metadata Info') logger.info('Writing Metadata Info')
doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type=check_file_type) doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj)
info_txt_path = os.path.join(doujinshi_dir, 'info.txt') info_txt_path = os.path.join(doujinshi_dir, 'info.txt')
if already_downloaded:
# Ensure that info.txt was generated for the folder (if it exists) before exiting.
if os.path.exists(doujinshi_dir) and os.path.exists(info_txt_path):
logger.info(f'Skipped download: {info_txt_path} already exists')
return False
logger.info(doujinshi_dir)
f = open(info_txt_path, 'w', encoding='utf-8') f = open(info_txt_path, 'w', encoding='utf-8')
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR', fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',