Compare commits

..

15 Commits

Author SHA1 Message Date
f6e9d08fc7 0.5.8 #343 2024-09-22 14:42:02 +08:00
9c1c2ea069 improve download logic #343 2024-09-22 14:39:32 +08:00
984ae4262c generate_metadata_file no need to use parse_doujinshi_obj 2024-09-22 14:11:55 +08:00
cbf9448ed9 improve #342 2024-09-22 13:35:07 +08:00
16bac45f02 generate html viewer automatically after download #342 2024-09-22 12:30:55 +08:00
7fa9193112 fix: non-image files in pdf conversion causing crash 2024-09-22 02:05:32 +00:00
a05a308e71 fix: check if metadata file is downloaded before skipping 2024-09-22 01:39:40 +00:00
5a29eaf775 fix: add file_type check to downloader
If you wanted to generate both .cbz and .pdf, the .pdf will be skipped if .cbz was generated first.
2024-09-22 01:38:54 +00:00
497eb6fe50 fix: remove warning for folder already exists in downloader
Nothing is wrong with the folder already existing -- silently ignore and move on. Might still have other files inside that haven't been downloaded yet.
2024-09-22 01:00:06 +00:00
4bfe104714 refactor: de-dupe doujinshi_obj parsers 2024-09-22 00:44:06 +00:00
12364e980c fix process continuing despite cbz download request skipped 2024-09-22 00:43:10 +00:00
b51e812449 fix #330 2024-09-21 11:49:22 +08:00
0ed5fa1931 fix #320 2024-09-21 00:43:14 +08:00
7f655b0f10 fix #295 2024-09-21 00:32:10 +08:00
dec3f44542 add some debug hack 2024-09-21 00:21:01 +08:00
8 changed files with 140 additions and 120 deletions

View File

@ -1,3 +1,3 @@
__version__ = '0.5.7' __version__ = '0.5.8'
__author__ = 'RicterZ' __author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com' __email__ = 'ricterzheng@gmail.com'

View File

@ -118,8 +118,8 @@ def cmd_parser():
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file') help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
parser.add_option('--meta', dest='generate_metadata', action='store_true', parser.add_option('--meta', dest='generate_metadata', action='store_true',
help='generate a metadata file in doujinshi format') help='generate a metadata file in doujinshi format')
parser.add_option('--regenerate-cbz', dest='regenerate_cbz', action='store_true', default=False, parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
help='regenerate the cbz file if exists') help='regenerate the cbz or pdf file if exists')
# nhentai options # nhentai options
parser.add_option('--cookie', type='str', dest='cookie', action='store', parser.add_option('--cookie', type='str', dest='cookie', action='store',

View File

@ -11,7 +11,7 @@ from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader from nhentai.downloader import Downloader
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import BASE_URL from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_cbz, generate_main_html, generate_pdf, generate_metadata_file, \ from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
paging, check_cookie, signal_handler, DB paging, check_cookie, signal_handler, DB
@ -87,22 +87,29 @@ def main():
if not options.dryrun: if not options.dryrun:
doujinshi.downloader = downloader doujinshi.downloader = downloader
doujinshi.download(regenerate_cbz=options.regenerate_cbz)
if doujinshi.check_if_need_download(options):
doujinshi.download()
else:
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
if options.generate_metadata: if options.generate_metadata:
table = doujinshi.table generate_metadata_file(options.output_dir, doujinshi)
generate_metadata_file(options.output_dir, table, doujinshi)
if options.is_save_download_history: if options.is_save_download_history:
with DB() as db: with DB() as db:
db.add_one(doujinshi.id) db.add_one(doujinshi.id)
if not options.is_nohtml and not options.is_cbz and not options.is_pdf: if not options.is_nohtml:
generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template']) generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
elif options.is_cbz:
generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir, True, options.move_to_folder) if options.is_cbz:
elif options.is_pdf: generate_doc('cbz', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
generate_pdf(options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder) options.regenerate)
if options.is_pdf:
generate_doc('pdf', options.output_dir, doujinshi, options.rm_origin_dir, options.move_to_folder,
options.regenerate)
if options.main_viewer: if options.main_viewer:
generate_main_html(options.output_dir) generate_main_html(options.output_dir)

View File

@ -1,4 +1,5 @@
# coding: utf-8 # coding: utf-8
import os
from tabulate import tabulate from tabulate import tabulate
@ -55,6 +56,7 @@ class Doujinshi(object):
['Parodies', self.info.parodies], ['Parodies', self.info.parodies],
['Doujinshi', self.name], ['Doujinshi', self.name],
['Subtitle', self.info.subtitle], ['Subtitle', self.info.subtitle],
['Date', self.info.date],
['Characters', self.info.characters], ['Characters', self.info.characters],
['Authors', self.info.artists], ['Authors', self.info.artists],
['Groups', self.info.groups], ['Groups', self.info.groups],
@ -70,7 +72,33 @@ class Doujinshi(object):
def show(self): def show(self):
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}') logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
def download(self, regenerate_cbz=False): def check_if_need_download(self, options):
base_path = os.path.join(self.downloader.path, self.filename)
# doujinshi directory is not exist, we need to download definitely
if not (os.path.exists(base_path) and os.path.isdir(base_path)):
return True
# regenerate, we need to re-download from nhentai
if options.regenerate:
return True
if options.is_pdf:
file_ext = 'pdf'
elif options.is_cbz:
file_ext = 'cbz'
else:
# re-download
return True
# pdf or cbz file exists, we needn't to re-download it
if os.path.exists(f'{base_path}.{file_ext}') or os.path.exists(f'{base_path}/{self.filename}.{file_ext}'):
return False
# fallback
return True
def download(self):
logger.info(f'Starting to download doujinshi: {self.name}') logger.info(f'Starting to download doujinshi: {self.name}')
if self.downloader: if self.downloader:
download_queue = [] download_queue = []
@ -80,9 +108,10 @@ class Doujinshi(object):
for i in range(1, min(self.pages, len(self.ext)) + 1): for i in range(1, min(self.pages, len(self.ext)) + 1):
download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}') download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}')
self.downloader.start_download(download_queue, self.filename, regenerate_cbz=regenerate_cbz) return self.downloader.start_download(download_queue, self.filename)
else: else:
logger.critical('Downloader has not been loaded') logger.critical('Downloader has not been loaded')
return False
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -57,7 +57,7 @@ class Downloader(Singleton):
save_file_path = os.path.join(folder, base_filename.zfill(3) + extension) save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
try: try:
if os.path.exists(save_file_path): if os.path.exists(save_file_path):
logger.warning(f'Ignored exists file: {save_file_path}') logger.warning(f'Skipped download: {save_file_path} already exists')
return 1, url return 1, url
response = None response = None
@ -115,18 +115,13 @@ class Downloader(Singleton):
return 1, url return 1, url
def start_download(self, queue, folder='', regenerate_cbz=False): def start_download(self, queue, folder='') -> bool:
if not isinstance(folder, (str, )): if not isinstance(folder, (str, )):
folder = str(folder) folder = str(folder)
if self.path: if self.path:
folder = os.path.join(self.path, folder) folder = os.path.join(self.path, folder)
if os.path.exists(folder + '.cbz'):
if not regenerate_cbz:
logger.warning(f'CBZ file "{folder}.cbz" exists, ignored download request')
return
logger.info(f'Doujinshi will be saved at "{folder}"') logger.info(f'Doujinshi will be saved at "{folder}"')
if not os.path.exists(folder): if not os.path.exists(folder):
try: try:
@ -134,9 +129,9 @@ class Downloader(Singleton):
except EnvironmentError as e: except EnvironmentError as e:
logger.critical(str(e)) logger.critical(str(e))
else: if os.getenv('DEBUG', None) == 'NODOWNLOAD':
logger.warning(f'Path "{folder}" already exist.') # Assuming we want to continue with rest of process.
return True
queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue] queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
pool = multiprocessing.Pool(self.size, init_worker) pool = multiprocessing.Pool(self.size, init_worker)
@ -145,6 +140,8 @@ class Downloader(Singleton):
pool.close() pool.close()
pool.join() pool.join()
return True
def download_wrapper(obj, url, folder='', proxy=None): def download_wrapper(obj, url, folder='', proxy=None):
if sys.platform == 'darwin' or semaphore.get_value(): if sys.platform == 'darwin' or semaphore.get_value():

View File

@ -22,7 +22,7 @@ def serialize_json(doujinshi, output_dir):
metadata['group'] = [i.strip() for i in doujinshi.info.groups.split(',')] metadata['group'] = [i.strip() for i in doujinshi.info.groups.split(',')]
if doujinshi.info.languages: if doujinshi.info.languages:
metadata['language'] = [i.strip() for i in doujinshi.info.languages.split(',')] metadata['language'] = [i.strip() for i in doujinshi.info.languages.split(',')]
metadata['category'] = doujinshi.info.categories metadata['category'] = [i.strip() for i in doujinshi.info.categories.split(',')]
metadata['URL'] = doujinshi.url metadata['URL'] = doujinshi.url
metadata['Pages'] = doujinshi.pages metadata['Pages'] = doujinshi.pages

View File

@ -5,14 +5,16 @@ import re
import os import os
import zipfile import zipfile
import shutil import shutil
import requests import requests
import sqlite3 import sqlite3
import urllib.parse
from typing import Optional, Tuple
from nhentai import constant from nhentai import constant
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
MAX_FIELD_LENGTH = 100 MAX_FIELD_LENGTH = 100
@ -38,7 +40,8 @@ def check_cookie():
username = re.findall('"/users/[0-9]+/(.*?)"', response.text) username = re.findall('"/users/[0-9]+/(.*?)"', response.text)
if not username: if not username:
logger.warning('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie') logger.warning(
'Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
else: else:
logger.log(16, f'Login successfully! Your username: {username[0]}') logger.log(16, f'Login successfully! Your username: {username[0]}')
@ -64,13 +67,31 @@ def readfile(path):
return file.read() return file.read()
def generate_html(output_dir='.', doujinshi_obj=None, template='default'): def parse_doujinshi_obj(
image_html = '' output_dir: str,
doujinshi_obj=None,
file_type: str = ''
) -> Tuple[str, str]:
filename = './doujinshi' + file_type
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
if doujinshi_obj is not None: if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) _filename = f'{doujinshi_obj.filename}.{file_type}'
else:
doujinshi_dir = '.' if file_type == 'cbz':
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
if file_type == 'pdf':
_filename = _filename.replace('/', '-')
filename = os.path.join(output_dir, _filename)
return doujinshi_dir, filename
def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
image_html = ''
if not os.path.exists(doujinshi_dir): if not os.path.exists(doujinshi_dir):
logger.warning(f'Path "{doujinshi_dir}" does not exist, creating.') logger.warning(f'Path "{doujinshi_dir}" does not exist, creating.')
@ -148,7 +169,7 @@ def generate_main_html(output_dir='./'):
else: else:
title = 'nHentai HTML Viewer' title = 'nHentai HTML Viewer'
image_html += element.format(FOLDER=folder, IMAGE=image, TITLE=title) image_html += element.format(FOLDER=urllib.parse.quote(folder), IMAGE=image, TITLE=title)
if image_html == '': if image_html == '':
logger.warning('No index.html found, --gen-main paused.') logger.warning('No index.html found, --gen-main paused.')
return return
@ -158,94 +179,65 @@ def generate_main_html(output_dir='./'):
f.write(data.encode('utf-8')) f.write(data.encode('utf-8'))
shutil.copy(os.path.dirname(__file__) + '/viewer/logo.png', './') shutil.copy(os.path.dirname(__file__) + '/viewer/logo.png', './')
set_js_database() set_js_database()
logger.log(16, f'Main Viewer has been written to "{output_dir}main.html"') output_dir = output_dir[:-1] if output_dir.endswith('/') else output_dir
logger.log(16, f'Main Viewer has been written to "{output_dir}/main.html"')
except Exception as e: except Exception as e:
logger.warning(f'Writing Main Viewer failed ({e})') logger.warning(f'Writing Main Viewer failed ({e})')
def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True, move_to_folder=False): def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, rm_origin_dir=False,
if doujinshi_obj is not None: move_to_folder=False, regenerate=False):
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
if os.path.exists(doujinshi_dir+".cbz"):
logger.warning(f'Comic Book CBZ file exists, skip "{doujinshi_dir}"')
return
if write_comic_info:
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), f'{doujinshi_obj.filename}.cbz')
else:
cbz_filename = './doujinshi.cbz'
doujinshi_dir = '.'
file_list = os.listdir(doujinshi_dir) doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
file_list.sort()
logger.info(f'Writing CBZ file to path: {cbz_filename}') if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
with zipfile.ZipFile(cbz_filename, 'w') as cbz_pf: logger.info(f'Skipped {file_type} file generation: {doujinshi_dir}.{file_type} already exists')
for image in file_list: return
image_path = os.path.join(doujinshi_dir, image)
cbz_pf.write(image_path, image) if file_type == 'cbz':
file_list = os.listdir(doujinshi_dir)
file_list.sort()
logger.info(f'Writing CBZ file to path: {filename}')
with zipfile.ZipFile(filename, 'w') as cbz_pf:
for image in file_list:
image_path = os.path.join(doujinshi_dir, image)
cbz_pf.write(image_path, image)
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
elif file_type == 'pdf':
try:
import img2pdf
"""Write images to a PDF file using img2pdf."""
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]
file_list.sort()
logger.info(f'Writing PDF file to path: {filename}')
with open(filename, 'wb') as pdf_f:
full_path_list = (
[os.path.join(doujinshi_dir, image) for image in file_list]
)
pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
logger.log(16, f'PDF file has been written to "{filename}"')
except ImportError:
logger.error("Please install img2pdf package by using pip.")
if rm_origin_dir: if rm_origin_dir:
shutil.rmtree(doujinshi_dir, ignore_errors=True) shutil.rmtree(doujinshi_dir, ignore_errors=True)
if move_to_folder: if move_to_folder:
for filename in os.listdir(doujinshi_dir): for filename in os.listdir(doujinshi_dir):
file_path = os.path.join(doujinshi_dir, filename) file_path = os.path.join(doujinshi_dir, filename)
if os.path.isfile(file_path): if os.path.isfile(file_path):
try: try:
os.remove(file_path) os.remove(file_path)
except Exception as e: except Exception as e:
print(f"Error deleting file: {e}") print(f"Error deleting file: {e}")
shutil.move(cbz_filename, doujinshi_dir) shutil.move(filename, doujinshi_dir)
logger.log(16, f'Comic Book CBZ file has been written to "{doujinshi_dir}"')
def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, move_to_folder=False):
try:
import img2pdf
"""Write images to a PDF file using img2pdf."""
if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
filename = doujinshi_obj.filename.replace('/', '-')
pdf_filename = os.path.join(
os.path.join(doujinshi_dir, '..'),
f'{filename}.pdf'
)
else:
pdf_filename = './doujinshi.pdf'
doujinshi_dir = '.'
file_list = os.listdir(doujinshi_dir)
file_list.sort()
logger.info(f'Writing PDF file to path: {pdf_filename}')
with open(pdf_filename, 'wb') as pdf_f:
full_path_list = (
[os.path.join(doujinshi_dir, image) for image in file_list]
)
pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
if rm_origin_dir:
shutil.rmtree(doujinshi_dir, ignore_errors=True)
if move_to_folder:
for filename in os.listdir(doujinshi_dir):
file_path = os.path.join(doujinshi_dir, filename)
if os.path.isfile(file_path):
try:
os.remove(file_path)
except Exception as e:
print(f"Error deleting file: {e}")
shutil.move(pdf_filename, doujinshi_dir)
logger.log(16, f'PDF file has been written to "{doujinshi_dir}"')
except ImportError:
logger.error("Please install img2pdf package by using pip.")
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False): def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
@ -301,17 +293,11 @@ def paging(page_string):
return page_list return page_list
def generate_metadata_file(output_dir, table, doujinshi_obj=None): def generate_metadata_file(output_dir, doujinshi_obj):
logger.info('Writing Metadata Info')
if doujinshi_obj is not None: info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
else:
doujinshi_dir = '.'
logger.info(doujinshi_dir) f = open(info_txt_path, 'w', encoding='utf-8')
f = open(os.path.join(doujinshi_dir, 'info.txt'), 'w', encoding='utf-8')
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR', fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES', 'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
@ -323,10 +309,11 @@ def generate_metadata_file(output_dir, table, doujinshi_obj=None):
for i in range(len(fields)): for i in range(len(fields)):
f.write(f'{fields[i]}: ') f.write(f'{fields[i]}: ')
if fields[i] in special_fields: if fields[i] in special_fields:
f.write(str(table[special_fields.index(fields[i])][1])) f.write(str(doujinshi_obj.table[special_fields.index(fields[i])][1]))
f.write('\n') f.write('\n')
f.close() f.close()
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
class DB(object): class DB(object):

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "nhentai" name = "nhentai"
version = "0.5.7" version = "0.5.8"
description = "nhentai doujinshi downloader" description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"] authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT" license = "MIT"