diff --git a/nhentai/cmdline.py b/nhentai/cmdline.py index 6a171b1..d10d7d7 100644 --- a/nhentai/cmdline.py +++ b/nhentai/cmdline.py @@ -118,8 +118,8 @@ def cmd_parser(): help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file') parser.add_option('--meta', dest='generate_metadata', action='store_true', help='generate a metadata file in doujinshi format') - parser.add_option('--regenerate-cbz', dest='regenerate_cbz', action='store_true', default=False, - help='regenerate the cbz file if exists') + parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False, + help='regenerate the cbz or pdf file if exists') # nhentai options parser.add_option('--cookie', type='str', dest='cookie', action='store', diff --git a/nhentai/command.py b/nhentai/command.py index 50abd8d..62af014 100644 --- a/nhentai/command.py +++ b/nhentai/command.py @@ -11,7 +11,7 @@ from nhentai.doujinshi import Doujinshi from nhentai.downloader import Downloader from nhentai.logger import logger from nhentai.constant import BASE_URL -from nhentai.utils import generate_html, generate_cbz, generate_main_html, generate_pdf, generate_metadata_file, \ +from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \ paging, check_cookie, signal_handler, DB @@ -85,21 +85,22 @@ def main(): else: continue - file_type = '' - if options.is_cbz: file_type = '.cbz' - elif options.is_pdf: file_type = '.pdf' - if not options.dryrun: doujinshi.downloader = downloader - result = doujinshi.download(regenerate_cbz=options.regenerate_cbz, file_type=file_type) + + result = doujinshi.download(skip_exists=not options.regenerate) # Already downloaded; continue on with the other doujins. - if not result: continue + if not result: + continue if options.generate_metadata: table = doujinshi.table - result = generate_metadata_file(options.output_dir, table, doujinshi, file_type) + result = generate_metadata_file(options.output_dir, table, doujinshi) # Already downloaded; continue on with the other doujins. - if not result: continue + # if cbz / pdf file exists, skip the download process? + # regenerate but not re-download? + if not result: + continue if options.is_save_download_history: with DB() as db: @@ -107,10 +108,14 @@ def main(): if not options.is_nohtml and not options.is_cbz and not options.is_pdf: generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template']) - elif options.is_cbz: - generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir, True, options.move_to_folder) - elif options.is_pdf: - generate_pdf(options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder) + + if options.is_cbz: + generate_doc('cbz', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder, + options.regenerate) + + if options.is_pdf: + generate_doc('pdf', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder, + options.regenerate) if options.main_viewer: generate_main_html(options.output_dir) diff --git a/nhentai/doujinshi.py b/nhentai/doujinshi.py index 28f6786..946f000 100644 --- a/nhentai/doujinshi.py +++ b/nhentai/doujinshi.py @@ -1,5 +1,5 @@ # coding: utf-8 -import datetime +import os from tabulate import tabulate @@ -72,8 +72,14 @@ class Doujinshi(object): def show(self): logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}') - def download(self, regenerate_cbz=False, file_type=''): + def download(self, skip_exists=True): logger.info(f'Starting to download doujinshi: {self.name}') + + base_path = os.path.join(self.downloader.path, self.filename) + if (os.path.exists(base_path + '.pdf') or os.path.exists(base_path + '.cbz')) and skip_exists: + logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {self.name}') + return False + if self.downloader: download_queue = [] if len(self.ext) != self.pages: @@ -82,7 +88,7 @@ class Doujinshi(object): for i in range(1, min(self.pages, len(self.ext)) + 1): download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}') - return self.downloader.start_download(download_queue, self.filename, regenerate_cbz=regenerate_cbz, file_type=file_type) + return self.downloader.start_download(download_queue, self.filename) else: logger.critical('Downloader has not been loaded') return False diff --git a/nhentai/downloader.py b/nhentai/downloader.py index cc1814c..a8e0e2f 100644 --- a/nhentai/downloader.py +++ b/nhentai/downloader.py @@ -115,18 +115,13 @@ class Downloader(Singleton): return 1, url - - def start_download(self, queue, folder='', regenerate_cbz=False, file_type='') -> bool: + def start_download(self, queue, folder='') -> bool: if not isinstance(folder, (str, )): folder = str(folder) if self.path: folder = os.path.join(self.path, folder) - if file_type != '' and os.path.exists(folder + file_type) and not regenerate_cbz: - logger.warning(f'Skipped download: "{folder}{file_type}" already exists') - return False - logger.info(f'Doujinshi will be saved at "{folder}"') if not os.path.exists(folder): try: diff --git a/nhentai/utils.py b/nhentai/utils.py index c95e8a7..70aaf25 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -15,7 +15,6 @@ from nhentai import constant from nhentai.logger import logger from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database - MAX_FIELD_LENGTH = 100 @@ -41,7 +40,8 @@ def check_cookie(): username = re.findall('"/users/[0-9]+/(.*?)"', response.text) if not username: - logger.warning('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie') + logger.warning( + 'Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie') else: logger.log(16, f'Login successfully! Your username: {username[0]}') @@ -68,39 +68,29 @@ def readfile(path): def parse_doujinshi_obj( - output_dir: str, - doujinshi_obj = None, - file_type: str = '', - write_comic_info = False -) -> Tuple[str, str, bool]: - doujinshi_dir = '.' + output_dir: str, + doujinshi_obj=None, + file_type: str = '' +) -> Tuple[str, str]: filename = './doujinshi' + file_type - already_downloaded = False doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) - if doujinshi_obj is not None and file_type != '.html': - if os.path.exists(doujinshi_dir + file_type): - already_downloaded = True - elif file_type != '': - _filename = f'{doujinshi_obj.filename}{file_type}' + if doujinshi_obj is not None: + _filename = f'{doujinshi_obj.filename}.{file_type}' - if file_type == '.cbz' and write_comic_info: - serialize_comic_xml(doujinshi_obj, doujinshi_dir) + if file_type == 'cbz': + serialize_comic_xml(doujinshi_obj, doujinshi_dir) - if file_type == '.pdf': - _filename = _filename.replace('/', '-') + if file_type == 'pdf': + _filename = _filename.replace('/', '-') - filename = os.path.join(output_dir, _filename) + filename = os.path.join(output_dir, _filename) - return doujinshi_dir, filename, already_downloaded + return doujinshi_dir, filename def generate_html(output_dir='.', doujinshi_obj=None, template='default'): - doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html') - if already_downloaded: - logger.info(f'Skipped download: {doujinshi_dir} already exists') - return - + doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html') image_html = '' if not os.path.exists(doujinshi_dir): @@ -195,20 +185,45 @@ def generate_main_html(output_dir='./'): logger.warning(f'Writing Main Viewer failed ({e})') -def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True, move_to_folder=False): - doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, '.cbz', write_comic_info) - if already_downloaded: - logger.info(f'Skipped download: {doujinshi_dir} already exists') +def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir=False, + move_to_folder=False, regenerate=False): + + doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type) + + if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate: + logger.info(f'Skipped download: {doujinshi_dir}.{file_type} already exists') return - file_list = os.listdir(doujinshi_dir) - file_list.sort() + if file_type == 'cbz': + file_list = os.listdir(doujinshi_dir) + file_list.sort() - logger.info(f'Writing CBZ file to path: {filename}') - with zipfile.ZipFile(filename, 'w') as cbz_pf: - for image in file_list: - image_path = os.path.join(doujinshi_dir, image) - cbz_pf.write(image_path, image) + logger.info(f'Writing CBZ file to path: {filename}') + with zipfile.ZipFile(filename, 'w') as cbz_pf: + for image in file_list: + image_path = os.path.join(doujinshi_dir, image) + cbz_pf.write(image_path, image) + + logger.log(16, f'Comic Book CBZ file has been written to "{filename}"') + elif file_type == 'pdf': + try: + import img2pdf + + """Write images to a PDF file using img2pdf.""" + file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))] + file_list.sort() + + logger.info(f'Writing PDF file to path: {filename}') + with open(filename, 'wb') as pdf_f: + full_path_list = ( + [os.path.join(doujinshi_dir, image) for image in file_list] + ) + pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid)) + + logger.log(16, f'PDF file has been written to "{filename}"') + + except ImportError: + logger.error("Please install img2pdf package by using pip.") if rm_origin_dir: shutil.rmtree(doujinshi_dir, ignore_errors=True) @@ -224,48 +239,6 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_ shutil.move(filename, doujinshi_dir) - logger.log(16, f'Comic Book CBZ file has been written to "{filename}"') - - -def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, move_to_folder=False): - try: - import img2pdf - - """Write images to a PDF file using img2pdf.""" - doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, '.pdf') - if already_downloaded: - logger.info(f'Skipped download: {doujinshi_dir} already exists') - return - - file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))] - file_list.sort() - - logger.info(f'Writing PDF file to path: {filename}') - with open(filename, 'wb') as pdf_f: - full_path_list = ( - [os.path.join(doujinshi_dir, image) for image in file_list] - ) - pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid)) - - if rm_origin_dir: - shutil.rmtree(doujinshi_dir, ignore_errors=True) - - if move_to_folder: - for filename in os.listdir(doujinshi_dir): - file_path = os.path.join(doujinshi_dir, filename) - if os.path.isfile(file_path): - try: - os.remove(file_path) - except Exception as e: - print(f"Error deleting file: {e}") - - shutil.move(filename, doujinshi_dir) - - logger.log(16, f'PDF file has been written to "{filename}"') - - except ImportError: - logger.error("Please install img2pdf package by using pip.") - def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False): """ @@ -320,20 +293,12 @@ def paging(page_string): return page_list -def generate_metadata_file(output_dir, table, doujinshi_obj=None, check_file_type=''): +def generate_metadata_file(output_dir, table, doujinshi_obj=None): logger.info('Writing Metadata Info') - doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type=check_file_type) + doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj) info_txt_path = os.path.join(doujinshi_dir, 'info.txt') - if already_downloaded: - # Ensure that info.txt was generated for the folder (if it exists) before exiting. - if os.path.exists(doujinshi_dir) and os.path.exists(info_txt_path): - logger.info(f'Skipped download: {info_txt_path} already exists') - return False - - logger.info(doujinshi_dir) - f = open(info_txt_path, 'w', encoding='utf-8') fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',