From bc6ef0cf5d590f09368b2887f1b57d7c58fd110c Mon Sep 17 00:00:00 2001 From: Ricter Z Date: Sat, 4 Feb 2023 20:22:57 +0800 Subject: [PATCH] solve #251 --- nhentai/doujinshi.py | 21 +++++++-------------- nhentai/utils.py | 25 ++++++++++++++++--------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/nhentai/doujinshi.py b/nhentai/doujinshi.py index 271ec74..66fa45b 100644 --- a/nhentai/doujinshi.py +++ b/nhentai/doujinshi.py @@ -6,7 +6,7 @@ from nhentai.constant import DETAIL_URL, IMAGE_URL from nhentai.logger import logger from nhentai.utils import format_filename -MAX_FIELD_LENGTH = 100 + EXT_MAP = { 'j': 'jpg', 'p': 'png', @@ -25,13 +25,6 @@ class DoujinshiInfo(dict): return '' -def trunk_string(string): - if len(string) >= MAX_FIELD_LENGTH: - string = string[:MAX_FIELD_LENGTH] + u'…' - - return string - - class Doujinshi(object): def __init__(self, name=None, pretty_name=None, id=None, img_id=None, ext='', pages=0, name_format='[%i][%a][%t]', **kwargs): @@ -45,13 +38,13 @@ class Doujinshi(object): self.url = '%s/%d' % (DETAIL_URL, self.id) self.info = DoujinshiInfo(**kwargs) - name_format = name_format.replace('%i', str(self.id)) - name_format = name_format.replace('%a', self.info.artists) + name_format = name_format.replace('%i', format_filename(str(self.id))) + name_format = name_format.replace('%a', format_filename(self.info.artists)) - name_format = name_format.replace('%t', trunk_string(self.name)) - name_format = name_format.replace('%p', trunk_string(self.pretty_name)) - name_format = name_format.replace('%s', trunk_string(self.info.subtitle)) - self.filename = format_filename(name_format) + name_format = name_format.replace('%t', format_filename(self.name)) + name_format = name_format.replace('%p', format_filename(self.pretty_name)) + name_format = name_format.replace('%s', format_filename(self.info.subtitle)) + self.filename = format_filename(name_format, 255, True) self.table = [ ["Parodies", self.info.parodies], diff --git a/nhentai/utils.py b/nhentai/utils.py index 936ba2f..f90f42f 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -13,6 +13,9 @@ from nhentai.logger import logger from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database +MAX_FIELD_LENGTH = 100 + + def request(method, url, **kwargs): session = requests.Session() session.headers.update({ @@ -247,7 +250,7 @@ def unicode_truncate(s, length, encoding='utf-8'): return encoded.decode(encoding, 'ignore') -def format_filename(s): +def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False): """ It used to be a whitelist approach allowed only alphabet and a part of symbols. but most doujinshi's names include Japanese 2-byte characters and these was rejected. @@ -255,16 +258,20 @@ def format_filename(s): if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' '). """ # maybe you can use `--format` to select a suitable filename - ban_chars = '\\\'/:,;*?"<>|\t' - filename = s.translate(str.maketrans(ban_chars, ' ' * len(ban_chars))).strip() - filename = ' '.join(filename.split()) - while filename.endswith('.'): - filename = filename[:-1] + if not _truncate_only: + ban_chars = '\\\'/:,;*?"<>|\t' + filename = s.translate(str.maketrans(ban_chars, ' ' * len(ban_chars))).strip() + filename = ' '.join(filename.split()) - # limit 254 chars - if len(filename) >= 255: - filename = filename[:254] + u'…' + while filename.endswith('.'): + filename = filename[:-1] + else: + filename = s + + # limit `length` chars + if len(filename) >= length: + filename = filename[:length - 1] + u'…' # Remove [] from filename filename = filename.replace('[]', '').strip()