mirror of
				https://github.com/RicterZ/nhentai.git
				synced 2025-11-04 02:50:55 +01:00 
			
		
		
		
	improve #342
This commit is contained in:
		@@ -118,8 +118,8 @@ def cmd_parser():
 | 
			
		||||
                      help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
 | 
			
		||||
    parser.add_option('--meta', dest='generate_metadata', action='store_true',
 | 
			
		||||
                      help='generate a metadata file in doujinshi format')
 | 
			
		||||
    parser.add_option('--regenerate-cbz', dest='regenerate_cbz', action='store_true', default=False,
 | 
			
		||||
                      help='regenerate the cbz file if exists')
 | 
			
		||||
    parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
 | 
			
		||||
                      help='regenerate the cbz or pdf file if exists')
 | 
			
		||||
 | 
			
		||||
    # nhentai options
 | 
			
		||||
    parser.add_option('--cookie', type='str', dest='cookie', action='store',
 | 
			
		||||
 
 | 
			
		||||
@@ -11,7 +11,7 @@ from nhentai.doujinshi import Doujinshi
 | 
			
		||||
from nhentai.downloader import Downloader
 | 
			
		||||
from nhentai.logger import logger
 | 
			
		||||
from nhentai.constant import BASE_URL
 | 
			
		||||
from nhentai.utils import generate_html, generate_cbz, generate_main_html, generate_pdf, generate_metadata_file, \
 | 
			
		||||
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
 | 
			
		||||
    paging, check_cookie, signal_handler, DB
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -85,21 +85,22 @@ def main():
 | 
			
		||||
            else:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            file_type = ''
 | 
			
		||||
            if options.is_cbz: file_type = '.cbz'
 | 
			
		||||
            elif options.is_pdf: file_type = '.pdf'
 | 
			
		||||
 | 
			
		||||
            if not options.dryrun:
 | 
			
		||||
                doujinshi.downloader = downloader
 | 
			
		||||
                result = doujinshi.download(regenerate_cbz=options.regenerate_cbz, file_type=file_type)
 | 
			
		||||
 | 
			
		||||
                result = doujinshi.download(skip_exists=not options.regenerate)
 | 
			
		||||
                # Already downloaded; continue on with the other doujins.
 | 
			
		||||
                if not result: continue
 | 
			
		||||
                if not result:
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
            if options.generate_metadata:
 | 
			
		||||
                table = doujinshi.table
 | 
			
		||||
                result = generate_metadata_file(options.output_dir, table, doujinshi, file_type)
 | 
			
		||||
                result = generate_metadata_file(options.output_dir, table, doujinshi)
 | 
			
		||||
                # Already downloaded; continue on with the other doujins.
 | 
			
		||||
                if not result: continue
 | 
			
		||||
                # if cbz / pdf file exists, skip the download process?
 | 
			
		||||
                # regenerate but not re-download?
 | 
			
		||||
                if not result:
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
            if options.is_save_download_history:
 | 
			
		||||
                with DB() as db:
 | 
			
		||||
@@ -107,10 +108,14 @@ def main():
 | 
			
		||||
 | 
			
		||||
            if not options.is_nohtml and not options.is_cbz and not options.is_pdf:
 | 
			
		||||
                generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
 | 
			
		||||
            elif options.is_cbz:
 | 
			
		||||
                generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir, True, options.move_to_folder)
 | 
			
		||||
            elif options.is_pdf:
 | 
			
		||||
                generate_pdf(options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder)
 | 
			
		||||
 | 
			
		||||
            if options.is_cbz:
 | 
			
		||||
                generate_doc('cbz', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
 | 
			
		||||
                             options.regenerate)
 | 
			
		||||
 | 
			
		||||
            if options.is_pdf:
 | 
			
		||||
                generate_doc('pdf', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
 | 
			
		||||
                             options.regenerate)
 | 
			
		||||
 | 
			
		||||
        if options.main_viewer:
 | 
			
		||||
            generate_main_html(options.output_dir)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
import datetime
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
from tabulate import tabulate
 | 
			
		||||
 | 
			
		||||
@@ -72,8 +72,14 @@ class Doujinshi(object):
 | 
			
		||||
    def show(self):
 | 
			
		||||
        logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
 | 
			
		||||
 | 
			
		||||
    def download(self, regenerate_cbz=False, file_type=''):
 | 
			
		||||
    def download(self, skip_exists=True):
 | 
			
		||||
        logger.info(f'Starting to download doujinshi: {self.name}')
 | 
			
		||||
 | 
			
		||||
        base_path = os.path.join(self.downloader.path, self.filename)
 | 
			
		||||
        if (os.path.exists(base_path + '.pdf') or os.path.exists(base_path + '.cbz')) and skip_exists:
 | 
			
		||||
            logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {self.name}')
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        if self.downloader:
 | 
			
		||||
            download_queue = []
 | 
			
		||||
            if len(self.ext) != self.pages:
 | 
			
		||||
@@ -82,7 +88,7 @@ class Doujinshi(object):
 | 
			
		||||
            for i in range(1, min(self.pages, len(self.ext)) + 1):
 | 
			
		||||
                download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}')
 | 
			
		||||
 | 
			
		||||
            return self.downloader.start_download(download_queue, self.filename, regenerate_cbz=regenerate_cbz, file_type=file_type)
 | 
			
		||||
            return self.downloader.start_download(download_queue, self.filename)
 | 
			
		||||
        else:
 | 
			
		||||
            logger.critical('Downloader has not been loaded')
 | 
			
		||||
            return False
 | 
			
		||||
 
 | 
			
		||||
@@ -115,18 +115,13 @@ class Downloader(Singleton):
 | 
			
		||||
 | 
			
		||||
        return 1, url
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def start_download(self, queue, folder='', regenerate_cbz=False, file_type='') -> bool:
 | 
			
		||||
    def start_download(self, queue, folder='') -> bool:
 | 
			
		||||
        if not isinstance(folder, (str, )):
 | 
			
		||||
            folder = str(folder)
 | 
			
		||||
 | 
			
		||||
        if self.path:
 | 
			
		||||
            folder = os.path.join(self.path, folder)
 | 
			
		||||
 | 
			
		||||
        if file_type != '' and os.path.exists(folder + file_type) and not regenerate_cbz:
 | 
			
		||||
            logger.warning(f'Skipped download: "{folder}{file_type}" already exists')
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        logger.info(f'Doujinshi will be saved at "{folder}"')
 | 
			
		||||
        if not os.path.exists(folder):
 | 
			
		||||
            try:
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										141
									
								
								nhentai/utils.py
									
									
									
									
									
								
							
							
						
						
									
										141
									
								
								nhentai/utils.py
									
									
									
									
									
								
							@@ -15,7 +15,6 @@ from nhentai import constant
 | 
			
		||||
from nhentai.logger import logger
 | 
			
		||||
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
MAX_FIELD_LENGTH = 100
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -41,7 +40,8 @@ def check_cookie():
 | 
			
		||||
 | 
			
		||||
    username = re.findall('"/users/[0-9]+/(.*?)"', response.text)
 | 
			
		||||
    if not username:
 | 
			
		||||
        logger.warning('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
 | 
			
		||||
        logger.warning(
 | 
			
		||||
            'Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
 | 
			
		||||
    else:
 | 
			
		||||
        logger.log(16, f'Login successfully! Your username: {username[0]}')
 | 
			
		||||
 | 
			
		||||
@@ -68,39 +68,29 @@ def readfile(path):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def parse_doujinshi_obj(
 | 
			
		||||
    output_dir: str,
 | 
			
		||||
    doujinshi_obj = None,
 | 
			
		||||
    file_type: str = '',
 | 
			
		||||
    write_comic_info = False
 | 
			
		||||
) -> Tuple[str, str, bool]:
 | 
			
		||||
    doujinshi_dir = '.'
 | 
			
		||||
        output_dir: str,
 | 
			
		||||
        doujinshi_obj=None,
 | 
			
		||||
        file_type: str = ''
 | 
			
		||||
) -> Tuple[str, str]:
 | 
			
		||||
    filename = './doujinshi' + file_type
 | 
			
		||||
    already_downloaded = False
 | 
			
		||||
 | 
			
		||||
    doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
 | 
			
		||||
    if doujinshi_obj is not None and file_type != '.html':
 | 
			
		||||
        if os.path.exists(doujinshi_dir + file_type):
 | 
			
		||||
            already_downloaded = True
 | 
			
		||||
        elif file_type != '':
 | 
			
		||||
            _filename = f'{doujinshi_obj.filename}{file_type}'
 | 
			
		||||
    if doujinshi_obj is not None:
 | 
			
		||||
        _filename = f'{doujinshi_obj.filename}.{file_type}'
 | 
			
		||||
 | 
			
		||||
            if file_type == '.cbz' and write_comic_info:
 | 
			
		||||
                serialize_comic_xml(doujinshi_obj, doujinshi_dir)
 | 
			
		||||
        if file_type == 'cbz':
 | 
			
		||||
            serialize_comic_xml(doujinshi_obj, doujinshi_dir)
 | 
			
		||||
 | 
			
		||||
            if file_type == '.pdf':
 | 
			
		||||
                _filename = _filename.replace('/', '-')
 | 
			
		||||
        if file_type == 'pdf':
 | 
			
		||||
            _filename = _filename.replace('/', '-')
 | 
			
		||||
 | 
			
		||||
            filename = os.path.join(output_dir, _filename)
 | 
			
		||||
        filename = os.path.join(output_dir, _filename)
 | 
			
		||||
 | 
			
		||||
    return doujinshi_dir, filename, already_downloaded
 | 
			
		||||
    return doujinshi_dir, filename
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
 | 
			
		||||
    doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
 | 
			
		||||
    if already_downloaded:
 | 
			
		||||
        logger.info(f'Skipped download: {doujinshi_dir} already exists')
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
 | 
			
		||||
    image_html = ''
 | 
			
		||||
 | 
			
		||||
    if not os.path.exists(doujinshi_dir):
 | 
			
		||||
@@ -195,20 +185,45 @@ def generate_main_html(output_dir='./'):
 | 
			
		||||
        logger.warning(f'Writing Main Viewer failed ({e})')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True, move_to_folder=False):
 | 
			
		||||
    doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, '.cbz', write_comic_info)
 | 
			
		||||
    if already_downloaded:
 | 
			
		||||
        logger.info(f'Skipped download: {doujinshi_dir} already exists')
 | 
			
		||||
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir=False,
 | 
			
		||||
                 move_to_folder=False, regenerate=False):
 | 
			
		||||
 | 
			
		||||
    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
 | 
			
		||||
 | 
			
		||||
    if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
 | 
			
		||||
        logger.info(f'Skipped download: {doujinshi_dir}.{file_type} already exists')
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    file_list = os.listdir(doujinshi_dir)
 | 
			
		||||
    file_list.sort()
 | 
			
		||||
    if file_type == 'cbz':
 | 
			
		||||
        file_list = os.listdir(doujinshi_dir)
 | 
			
		||||
        file_list.sort()
 | 
			
		||||
 | 
			
		||||
    logger.info(f'Writing CBZ file to path: {filename}')
 | 
			
		||||
    with zipfile.ZipFile(filename, 'w') as cbz_pf:
 | 
			
		||||
        for image in file_list:
 | 
			
		||||
            image_path = os.path.join(doujinshi_dir, image)
 | 
			
		||||
            cbz_pf.write(image_path, image)
 | 
			
		||||
        logger.info(f'Writing CBZ file to path: {filename}')
 | 
			
		||||
        with zipfile.ZipFile(filename, 'w') as cbz_pf:
 | 
			
		||||
            for image in file_list:
 | 
			
		||||
                image_path = os.path.join(doujinshi_dir, image)
 | 
			
		||||
                cbz_pf.write(image_path, image)
 | 
			
		||||
 | 
			
		||||
        logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
 | 
			
		||||
    elif file_type == 'pdf':
 | 
			
		||||
        try:
 | 
			
		||||
            import img2pdf
 | 
			
		||||
 | 
			
		||||
            """Write images to a PDF file using img2pdf."""
 | 
			
		||||
            file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]
 | 
			
		||||
            file_list.sort()
 | 
			
		||||
 | 
			
		||||
            logger.info(f'Writing PDF file to path: {filename}')
 | 
			
		||||
            with open(filename, 'wb') as pdf_f:
 | 
			
		||||
                full_path_list = (
 | 
			
		||||
                    [os.path.join(doujinshi_dir, image) for image in file_list]
 | 
			
		||||
                )
 | 
			
		||||
                pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
 | 
			
		||||
 | 
			
		||||
            logger.log(16, f'PDF file has been written to "{filename}"')
 | 
			
		||||
 | 
			
		||||
        except ImportError:
 | 
			
		||||
            logger.error("Please install img2pdf package by using pip.")
 | 
			
		||||
 | 
			
		||||
    if rm_origin_dir:
 | 
			
		||||
        shutil.rmtree(doujinshi_dir, ignore_errors=True)
 | 
			
		||||
@@ -224,48 +239,6 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_
 | 
			
		||||
 | 
			
		||||
        shutil.move(filename, doujinshi_dir)
 | 
			
		||||
 | 
			
		||||
    logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, move_to_folder=False):
 | 
			
		||||
    try:
 | 
			
		||||
        import img2pdf
 | 
			
		||||
 | 
			
		||||
        """Write images to a PDF file using img2pdf."""
 | 
			
		||||
        doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, '.pdf')
 | 
			
		||||
        if already_downloaded:
 | 
			
		||||
            logger.info(f'Skipped download: {doujinshi_dir} already exists')
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]
 | 
			
		||||
        file_list.sort()
 | 
			
		||||
 | 
			
		||||
        logger.info(f'Writing PDF file to path: {filename}')
 | 
			
		||||
        with open(filename, 'wb') as pdf_f:
 | 
			
		||||
            full_path_list = (
 | 
			
		||||
                [os.path.join(doujinshi_dir, image) for image in file_list]
 | 
			
		||||
            )
 | 
			
		||||
            pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
 | 
			
		||||
 | 
			
		||||
        if rm_origin_dir:
 | 
			
		||||
            shutil.rmtree(doujinshi_dir, ignore_errors=True)
 | 
			
		||||
 | 
			
		||||
        if move_to_folder:
 | 
			
		||||
            for filename in os.listdir(doujinshi_dir):
 | 
			
		||||
                file_path = os.path.join(doujinshi_dir, filename)
 | 
			
		||||
                if os.path.isfile(file_path):
 | 
			
		||||
                    try:
 | 
			
		||||
                        os.remove(file_path)
 | 
			
		||||
                    except Exception as e:
 | 
			
		||||
                        print(f"Error deleting file: {e}")
 | 
			
		||||
 | 
			
		||||
            shutil.move(filename, doujinshi_dir)
 | 
			
		||||
 | 
			
		||||
        logger.log(16, f'PDF file has been written to "{filename}"')
 | 
			
		||||
 | 
			
		||||
    except ImportError:
 | 
			
		||||
        logger.error("Please install img2pdf package by using pip.")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
 | 
			
		||||
    """
 | 
			
		||||
@@ -320,20 +293,12 @@ def paging(page_string):
 | 
			
		||||
    return page_list
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def generate_metadata_file(output_dir, table, doujinshi_obj=None, check_file_type=''):
 | 
			
		||||
def generate_metadata_file(output_dir, table, doujinshi_obj=None):
 | 
			
		||||
    logger.info('Writing Metadata Info')
 | 
			
		||||
 | 
			
		||||
    doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type=check_file_type)
 | 
			
		||||
    doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj)
 | 
			
		||||
    info_txt_path = os.path.join(doujinshi_dir, 'info.txt')
 | 
			
		||||
 | 
			
		||||
    if already_downloaded:
 | 
			
		||||
        # Ensure that info.txt was generated for the folder (if it exists) before exiting.
 | 
			
		||||
        if os.path.exists(doujinshi_dir) and os.path.exists(info_txt_path):
 | 
			
		||||
            logger.info(f'Skipped download: {info_txt_path} already exists')
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
    logger.info(doujinshi_dir)
 | 
			
		||||
 | 
			
		||||
    f = open(info_txt_path, 'w', encoding='utf-8')
 | 
			
		||||
 | 
			
		||||
    fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user