This commit is contained in:
Ricter Z 2023-02-04 20:22:57 +08:00
parent c8c63cbc11
commit bc6ef0cf5d
2 changed files with 23 additions and 23 deletions

View File

@ -6,7 +6,7 @@ from nhentai.constant import DETAIL_URL, IMAGE_URL
from nhentai.logger import logger
from nhentai.utils import format_filename
MAX_FIELD_LENGTH = 100
EXT_MAP = {
'j': 'jpg',
'p': 'png',
@ -25,13 +25,6 @@ class DoujinshiInfo(dict):
return ''
def trunk_string(string):
if len(string) >= MAX_FIELD_LENGTH:
string = string[:MAX_FIELD_LENGTH] + u''
return string
class Doujinshi(object):
def __init__(self, name=None, pretty_name=None, id=None, img_id=None,
ext='', pages=0, name_format='[%i][%a][%t]', **kwargs):
@ -45,13 +38,13 @@ class Doujinshi(object):
self.url = '%s/%d' % (DETAIL_URL, self.id)
self.info = DoujinshiInfo(**kwargs)
name_format = name_format.replace('%i', str(self.id))
name_format = name_format.replace('%a', self.info.artists)
name_format = name_format.replace('%i', format_filename(str(self.id)))
name_format = name_format.replace('%a', format_filename(self.info.artists))
name_format = name_format.replace('%t', trunk_string(self.name))
name_format = name_format.replace('%p', trunk_string(self.pretty_name))
name_format = name_format.replace('%s', trunk_string(self.info.subtitle))
self.filename = format_filename(name_format)
name_format = name_format.replace('%t', format_filename(self.name))
name_format = name_format.replace('%p', format_filename(self.pretty_name))
name_format = name_format.replace('%s', format_filename(self.info.subtitle))
self.filename = format_filename(name_format, 255, True)
self.table = [
["Parodies", self.info.parodies],

View File

@ -13,6 +13,9 @@ from nhentai.logger import logger
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
MAX_FIELD_LENGTH = 100
def request(method, url, **kwargs):
session = requests.Session()
session.headers.update({
@ -247,7 +250,7 @@ def unicode_truncate(s, length, encoding='utf-8'):
return encoded.decode(encoding, 'ignore')
def format_filename(s):
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
"""
It used to be a whitelist approach allowed only alphabet and a part of symbols.
but most doujinshi's names include Japanese 2-byte characters and these was rejected.
@ -255,16 +258,20 @@ def format_filename(s):
if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' ').
"""
# maybe you can use `--format` to select a suitable filename
if not _truncate_only:
ban_chars = '\\\'/:,;*?"<>|\t'
filename = s.translate(str.maketrans(ban_chars, ' ' * len(ban_chars))).strip()
filename = ' '.join(filename.split())
while filename.endswith('.'):
filename = filename[:-1]
else:
filename = s
# limit 254 chars
if len(filename) >= 255:
filename = filename[:254] + u''
# limit `length` chars
if len(filename) >= length:
filename = filename[:length - 1] + u''
# Remove [] from filename
filename = filename.replace('[]', '').strip()