mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-07-01 16:09:28 +02:00
Compare commits
48 Commits
Author | SHA1 | Date | |
---|---|---|---|
d9d2a6fb91 | |||
8cd4b948e7 | |||
f884384eb3 | |||
87afab46c4 | |||
c7b1d7e6a8 | |||
ad02371158 | |||
7c9d55e0ee | |||
00aad774ae | |||
373086b459 | |||
3a83f99771 | |||
00627ab36a | |||
592e163891 | |||
84523475b0 | |||
5f5461c902 | |||
05e6ceb3cd | |||
db59426503 | |||
74197f8f90 | |||
6d91a39533 | |||
e181e0b9dd | |||
6fed1f94cb | |||
9cfb23c8ec | |||
fc347cdadf | |||
1cdebaab61 | |||
9513141ccf | |||
bdc9fa113e | |||
36946111db | |||
ce8ae54536 | |||
7aedb905d6 | |||
8b8b5f193e | |||
fc99d91ac1 | |||
ba141efba7 | |||
f78d8750f3 | |||
08bb8ffda4 | |||
af379c825c | |||
2f9386f22c | |||
3667bc34b7 | |||
84749c56bd | |||
24f79e0945 | |||
edc46a9531 | |||
72035a14e6 | |||
472528e464 | |||
3f5915fd2a | |||
0cd2576dab | |||
445a8c052e | |||
7a75afef0a | |||
a5813e19b1 | |||
8462d2f2aa | |||
51074ee948 |
@ -1,9 +1,4 @@
|
||||
include README.md
|
||||
include requirements.txt
|
||||
include nhentai/viewer/index.html
|
||||
include nhentai/viewer/styles.css
|
||||
include nhentai/viewer/scripts.js
|
||||
include nhentai/viewer/main.html
|
||||
include nhentai/viewer/main.css
|
||||
include nhentai/viewer/main.js
|
||||
include nhentai/viewer/logo.png
|
||||
include nhentai/viewer/*
|
||||
include nhentai/viewer/default/*
|
@ -67,6 +67,15 @@ Set your nhentai cookie against captcha:
|
||||
|
||||
**NOTE**: The format of the cookie is `"csrftoken=TOKEN; sessionid=ID"`
|
||||
|
||||
| To get csrftoken and sessionid, first login to your nhentai account in web browser, then:
|
||||
| (Chrome) |ve| |ld| More tools |ld| Developer tools |ld| Application |ld| Storage |ld| Cookies |ld| https://nhentai.net
|
||||
| (Firefox) |hv| |ld| Web Developer |ld| Web Developer Tools |ld| Storage |ld| Cookies |ld| https://nhentai.net
|
||||
|
|
||||
|
||||
.. |hv| unicode:: U+2630 .. https://www.compart.com/en/unicode/U+2630
|
||||
.. |ve| unicode:: U+22EE .. https://www.compart.com/en/unicode/U+22EE
|
||||
.. |ld| unicode:: U+2014 .. https://www.compart.com/en/unicode/U+2014
|
||||
|
||||
Download specified doujinshi:
|
||||
|
||||
.. code-block:: bash
|
||||
|
@ -1,3 +1,3 @@
|
||||
__version__ = '0.4.6'
|
||||
__version__ = '0.4.15'
|
||||
__author__ = 'RicterZ'
|
||||
__email__ = 'ricterzheng@gmail.com'
|
||||
|
@ -1,5 +1,5 @@
|
||||
# coding: utf-8
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
@ -15,17 +15,6 @@ from nhentai import __version__
|
||||
from nhentai.utils import urlparse, generate_html, generate_main_html, DB
|
||||
from nhentai.logger import logger
|
||||
|
||||
try:
|
||||
if sys.version_info < (3, 0, 0):
|
||||
import codecs
|
||||
import locale
|
||||
sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
|
||||
sys.stderr = codecs.getwriter(locale.getpreferredencoding())(sys.stderr)
|
||||
|
||||
except NameError:
|
||||
# python3
|
||||
pass
|
||||
|
||||
|
||||
def banner():
|
||||
logger.info(u'''nHentai ver %s: あなたも変態。 いいね?
|
||||
@ -43,7 +32,7 @@ def load_config():
|
||||
|
||||
try:
|
||||
with open(constant.NHENTAI_CONFIG_FILE, 'r') as f:
|
||||
constant.CONFIG = json.load(f)
|
||||
constant.CONFIG.update(json.load(f))
|
||||
except json.JSONDecodeError:
|
||||
logger.error('Failed to load config file.')
|
||||
write_config()
|
||||
@ -95,7 +84,7 @@ def cmd_parser():
|
||||
help='timeout for downloading doujinshi')
|
||||
parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0,
|
||||
help='slow down between downloading every doujinshi')
|
||||
parser.add_option('--proxy', type='string', dest='proxy', action='store', default='',
|
||||
parser.add_option('--proxy', type='string', dest='proxy', action='store',
|
||||
help='store a proxy, for example: -p \'http://127.0.0.1:1080\'')
|
||||
parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.')
|
||||
parser.add_option('--format', type='string', dest='name_format', action='store',
|
||||
@ -126,10 +115,11 @@ def cmd_parser():
|
||||
default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them')
|
||||
parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history',
|
||||
help='clean download history')
|
||||
parser.add_option('--template', dest='viewer_template', action='store',
|
||||
help='set viewer template', default='')
|
||||
|
||||
try:
|
||||
sys.argv = [unicode(i.decode(sys.stdin.encoding)) for i in sys.argv]
|
||||
print()
|
||||
except (NameError, TypeError):
|
||||
pass
|
||||
except UnicodeDecodeError:
|
||||
@ -166,7 +156,7 @@ def cmd_parser():
|
||||
exit(0)
|
||||
# TODO: search without language
|
||||
|
||||
if args.proxy:
|
||||
if args.proxy is not None:
|
||||
proxy_url = urlparse(args.proxy)
|
||||
if not args.proxy == '' and proxy_url.scheme not in ('http', 'https'):
|
||||
logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(proxy_url.scheme))
|
||||
@ -179,6 +169,19 @@ def cmd_parser():
|
||||
logger.info('Proxy now set to \'{0}\'.'.format(args.proxy))
|
||||
write_config()
|
||||
exit(0)
|
||||
|
||||
if args.viewer_template is not None:
|
||||
if not args.viewer_template:
|
||||
args.viewer_template = 'default'
|
||||
|
||||
if not os.path.exists(os.path.join(os.path.dirname(__file__),
|
||||
'viewer/{}/index.html'.format(args.viewer_template))):
|
||||
logger.error('Template \'{}\' does not exists'.format(args.viewer_template))
|
||||
exit(1)
|
||||
else:
|
||||
constant.CONFIG['template'] = args.viewer_template
|
||||
write_config()
|
||||
|
||||
# --- end set config ---
|
||||
|
||||
if args.favorites:
|
||||
|
@ -1,8 +1,7 @@
|
||||
#!/usr/bin/env python2.7
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals, print_function
|
||||
import json
|
||||
import os
|
||||
|
||||
import sys
|
||||
import signal
|
||||
import platform
|
||||
import time
|
||||
@ -13,19 +12,29 @@ from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, fav
|
||||
from nhentai.doujinshi import Doujinshi
|
||||
from nhentai.downloader import Downloader
|
||||
from nhentai.logger import logger
|
||||
from nhentai.constant import NHENTAI_CONFIG_FILE, BASE_URL
|
||||
from nhentai.constant import BASE_URL
|
||||
from nhentai.utils import generate_html, generate_cbz, generate_main_html, generate_pdf, \
|
||||
paging, check_cookie, signal_handler, DB
|
||||
|
||||
|
||||
def main():
|
||||
banner()
|
||||
|
||||
if sys.version_info < (3, 0, 0):
|
||||
logger.error('nhentai now only support Python 3.x')
|
||||
exit(1)
|
||||
|
||||
options = cmd_parser()
|
||||
logger.info('Using mirror: {0}'.format(BASE_URL))
|
||||
|
||||
# CONFIG['proxy'] will be changed after cmd_parser()
|
||||
if constant.CONFIG['proxy']:
|
||||
logger.info('Using proxy: {0}'.format(constant.CONFIG['proxy']))
|
||||
if constant.CONFIG['proxy']['http']:
|
||||
logger.info('Using proxy: {0}'.format(constant.CONFIG['proxy']['http']))
|
||||
|
||||
if not constant.CONFIG['template']:
|
||||
constant.CONFIG['template'] = 'default'
|
||||
|
||||
logger.info('Using viewer template "{}"'.format(constant.CONFIG['template']))
|
||||
|
||||
# check your cookie
|
||||
check_cookie()
|
||||
@ -56,11 +65,11 @@ def main():
|
||||
if options.is_download and doujinshis:
|
||||
doujinshi_ids = [i['id'] for i in doujinshis]
|
||||
|
||||
if options.is_save_download_history:
|
||||
with DB() as db:
|
||||
data = map(int, db.get_all())
|
||||
if options.is_save_download_history:
|
||||
with DB() as db:
|
||||
data = map(int, db.get_all())
|
||||
|
||||
doujinshi_ids = list(set(doujinshi_ids) - set(data))
|
||||
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
|
||||
|
||||
if doujinshi_ids:
|
||||
for i, id_ in enumerate(doujinshi_ids):
|
||||
@ -88,7 +97,7 @@ def main():
|
||||
db.add_one(doujinshi.id)
|
||||
|
||||
if not options.is_nohtml and not options.is_cbz and not options.is_pdf:
|
||||
generate_html(options.output_dir, doujinshi)
|
||||
generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
|
||||
elif options.is_cbz:
|
||||
generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir)
|
||||
elif options.is_pdf:
|
||||
@ -108,5 +117,6 @@ def main():
|
||||
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -1,5 +1,5 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals, print_function
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
@ -30,8 +30,15 @@ NHENTAI_HISTORY = os.path.join(NHENTAI_HOME, 'history.sqlite3')
|
||||
NHENTAI_CONFIG_FILE = os.path.join(NHENTAI_HOME, 'config.json')
|
||||
|
||||
CONFIG = {
|
||||
'proxy': {},
|
||||
'proxy': {'http': '', 'https': ''},
|
||||
'cookie': '',
|
||||
'language': '',
|
||||
'template': '',
|
||||
}
|
||||
|
||||
LANGUAGEISO ={
|
||||
'english' : 'en',
|
||||
'chinese' : 'zh',
|
||||
'japanese' : 'ja',
|
||||
'translated' : 'translated'
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import print_function, unicode_literals
|
||||
|
||||
from tabulate import tabulate
|
||||
from future.builtins import range
|
||||
|
||||
from nhentai.constant import DETAIL_URL, IMAGE_URL
|
||||
from nhentai.logger import logger
|
||||
|
@ -1,5 +1,4 @@
|
||||
# coding: utf-
|
||||
from __future__ import unicode_literals, print_function
|
||||
|
||||
import multiprocessing
|
||||
import signal
|
||||
@ -15,6 +14,7 @@ try:
|
||||
except ImportError:
|
||||
from urlparse import urlparse
|
||||
|
||||
from nhentai import constant
|
||||
from nhentai.logger import logger
|
||||
from nhentai.parser import request
|
||||
from nhentai.utils import Singleton
|
||||
@ -35,7 +35,7 @@ class Downloader(Singleton):
|
||||
self.timeout = timeout
|
||||
self.delay = delay
|
||||
|
||||
def download_(self, url, folder='', filename='', retried=0):
|
||||
def download_(self, url, folder='', filename='', retried=0, proxy=None):
|
||||
if self.delay:
|
||||
time.sleep(self.delay)
|
||||
logger.info('Starting to download {0} ...'.format(url))
|
||||
@ -52,7 +52,7 @@ class Downloader(Singleton):
|
||||
i = 0
|
||||
while i < 10:
|
||||
try:
|
||||
response = request('get', url, stream=True, timeout=self.timeout)
|
||||
response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy)
|
||||
if response.status_code != 200:
|
||||
raise NHentaiImageNotExistException
|
||||
|
||||
@ -78,7 +78,8 @@ class Downloader(Singleton):
|
||||
except (requests.HTTPError, requests.Timeout) as e:
|
||||
if retried < 3:
|
||||
logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
|
||||
return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1)
|
||||
return 0, self.download_(url=url, folder=folder, filename=filename,
|
||||
retried=retried+1, proxy=proxy)
|
||||
else:
|
||||
return 0, None
|
||||
|
||||
@ -120,16 +121,16 @@ class Downloader(Singleton):
|
||||
folder = os.path.join(self.path, folder)
|
||||
|
||||
if not os.path.exists(folder):
|
||||
logger.warn('Path \'{0}\' does not exist, creating.'.format(folder))
|
||||
logger.warning('Path \'{0}\' does not exist, creating.'.format(folder))
|
||||
try:
|
||||
os.makedirs(folder)
|
||||
except EnvironmentError as e:
|
||||
logger.critical('{0}'.format(str(e)))
|
||||
|
||||
else:
|
||||
logger.warn('Path \'{0}\' already exist.'.format(folder))
|
||||
logger.warning('Path \'{0}\' already exist.'.format(folder))
|
||||
|
||||
queue = [(self, url, folder) for url in queue]
|
||||
queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
|
||||
|
||||
pool = multiprocessing.Pool(self.size, init_worker)
|
||||
[pool.apply_async(download_wrapper, args=item) for item in queue]
|
||||
@ -138,9 +139,9 @@ class Downloader(Singleton):
|
||||
pool.join()
|
||||
|
||||
|
||||
def download_wrapper(obj, url, folder=''):
|
||||
def download_wrapper(obj, url, folder='', proxy=None):
|
||||
if sys.platform == 'darwin' or semaphore.get_value():
|
||||
return Downloader.download_(obj, url=url, folder=folder)
|
||||
return Downloader.download_(obj, url=url, folder=folder, proxy=proxy)
|
||||
else:
|
||||
return -3, None
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
#
|
||||
# Copyright (C) 2010-2012 Vinay Sajip. All rights reserved. Licensed under the new BSD license.
|
||||
#
|
||||
from __future__ import print_function, unicode_literals
|
||||
import logging
|
||||
import re
|
||||
import platform
|
||||
@ -174,7 +173,7 @@ logger.setLevel(logging.DEBUG)
|
||||
if __name__ == '__main__':
|
||||
logger.log(15, 'nhentai')
|
||||
logger.info('info')
|
||||
logger.warn('warn')
|
||||
logger.warning('warning')
|
||||
logger.debug('debug')
|
||||
logger.error('error')
|
||||
logger.critical('critical')
|
||||
|
@ -1,5 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals, print_function
|
||||
|
||||
import os
|
||||
import re
|
||||
@ -116,15 +115,18 @@ def doujinshi_parser(id_):
|
||||
|
||||
try:
|
||||
response = request('get', url)
|
||||
if response.status_code in (200,):
|
||||
if response.status_code in (200, ):
|
||||
response = response.content
|
||||
elif response.status_code in (404,):
|
||||
logger.error("Doujinshi with id {0} cannot be found".format(id_))
|
||||
return []
|
||||
else:
|
||||
logger.debug('Slow down and retry ({}) ...'.format(id_))
|
||||
time.sleep(1)
|
||||
return doujinshi_parser(str(id_))
|
||||
|
||||
except Exception as e:
|
||||
logger.warn('Error: {}, ignored'.format(str(e)))
|
||||
logger.warning('Error: {}, ignored'.format(str(e)))
|
||||
return None
|
||||
|
||||
html = BeautifulSoup(response, 'html.parser')
|
||||
@ -178,7 +180,7 @@ def old_search_parser(keyword, sorting='date', page=1):
|
||||
|
||||
result = _get_title_and_id(response)
|
||||
if not result:
|
||||
logger.warn('Not found anything of keyword {}'.format(keyword))
|
||||
logger.warning('Not found anything of keyword {}'.format(keyword))
|
||||
|
||||
return result
|
||||
|
||||
@ -219,7 +221,7 @@ def search_parser(keyword, sorting, page, is_page_all=False):
|
||||
break
|
||||
|
||||
if 'result' not in response:
|
||||
logger.warn('No result in response in page {}'.format(p))
|
||||
logger.warning('No result in response in page {}'.format(p))
|
||||
break
|
||||
|
||||
for row in response['result']:
|
||||
@ -228,7 +230,7 @@ def search_parser(keyword, sorting, page, is_page_all=False):
|
||||
result.append({'id': row['id'], 'title': title})
|
||||
|
||||
if not result:
|
||||
logger.warn('No results for keywords {}'.format(keyword))
|
||||
logger.warning('No results for keywords {}'.format(keyword))
|
||||
|
||||
return result
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
import json
|
||||
import os
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
from nhentai.constant import LANGUAGEISO
|
||||
|
||||
def serialize_json(doujinshi, dir):
|
||||
metadata = {'title': doujinshi.name,
|
||||
@ -65,7 +65,8 @@ def serialize_comicxml(doujinshi, dir):
|
||||
if doujinshi.info.languages:
|
||||
languages = [i.strip() for i in doujinshi.info.languages.split(',')]
|
||||
xml_write_simple_tag(f, 'Translated', 'Yes' if 'translated' in languages else 'No')
|
||||
[xml_write_simple_tag(f, 'Language', i) for i in languages if i != 'translated']
|
||||
[xml_write_simple_tag(f, 'LanguageISO', LANGUAGEISO[i]) for i in languages \
|
||||
if (i != 'translated' and i in LANGUAGEISO)]
|
||||
|
||||
f.write('</ComicInfo>')
|
||||
|
||||
|
103
nhentai/utils.py
103
nhentai/utils.py
@ -1,10 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals, print_function
|
||||
|
||||
import sys
|
||||
import re
|
||||
import os
|
||||
import string
|
||||
import zipfile
|
||||
import shutil
|
||||
import requests
|
||||
@ -22,7 +20,11 @@ def request(method, url, **kwargs):
|
||||
'User-Agent': 'nhentai command line client (https://github.com/RicterZ/nhentai)',
|
||||
'Cookie': constant.CONFIG['cookie']
|
||||
})
|
||||
return getattr(session, method)(url, proxies=constant.CONFIG['proxy'], verify=False, **kwargs)
|
||||
|
||||
if not kwargs.get('proxies', None):
|
||||
kwargs['proxies'] = constant.CONFIG['proxy']
|
||||
|
||||
return getattr(session, method)(url, verify=False, **kwargs)
|
||||
|
||||
|
||||
def check_cookie():
|
||||
@ -64,7 +66,7 @@ def readfile(path):
|
||||
return file.read()
|
||||
|
||||
|
||||
def generate_html(output_dir='.', doujinshi_obj=None):
|
||||
def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
|
||||
image_html = ''
|
||||
|
||||
if doujinshi_obj is not None:
|
||||
@ -81,9 +83,9 @@ def generate_html(output_dir='.', doujinshi_obj=None):
|
||||
|
||||
image_html += '<img src="{0}" class="image-item"/>\n'\
|
||||
.format(image)
|
||||
html = readfile('viewer/index.html')
|
||||
css = readfile('viewer/styles.css')
|
||||
js = readfile('viewer/scripts.js')
|
||||
html = readfile('viewer/{}/index.html'.format(template))
|
||||
css = readfile('viewer/{}/styles.css'.format(template))
|
||||
js = readfile('viewer/{}/scripts.js'.format(template))
|
||||
|
||||
if doujinshi_obj is not None:
|
||||
serialize_json(doujinshi_obj, doujinshi_dir)
|
||||
@ -168,7 +170,7 @@ def generate_main_html(output_dir='./'):
|
||||
logger.warning('Writing Main Viewer failed ({})'.format(str(e)))
|
||||
|
||||
|
||||
def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=False):
|
||||
def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True):
|
||||
if doujinshi_obj is not None:
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
if write_comic_info:
|
||||
@ -196,52 +198,61 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_
|
||||
def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
|
||||
try:
|
||||
import img2pdf
|
||||
|
||||
"""Write images to a PDF file using img2pdf."""
|
||||
if doujinshi_obj is not None:
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
pdf_filename = os.path.join(
|
||||
os.path.join(doujinshi_dir, '..'),
|
||||
'{}.pdf'.format(doujinshi_obj.filename)
|
||||
)
|
||||
else:
|
||||
pdf_filename = './doujinshi.pdf'
|
||||
doujinshi_dir = '.'
|
||||
|
||||
file_list = os.listdir(doujinshi_dir)
|
||||
file_list.sort()
|
||||
|
||||
logger.info('Writing PDF file to path: {}'.format(pdf_filename))
|
||||
with open(pdf_filename, 'wb') as pdf_f:
|
||||
full_path_list = (
|
||||
[os.path.join(doujinshi_dir, image) for image in file_list]
|
||||
)
|
||||
pdf_f.write(img2pdf.convert(full_path_list))
|
||||
|
||||
if rm_origin_dir:
|
||||
shutil.rmtree(doujinshi_dir, ignore_errors=True)
|
||||
|
||||
logger.log(15, 'PDF file has been written to \'{0}\''.format(doujinshi_dir))
|
||||
|
||||
except ImportError:
|
||||
logger.error("Please install img2pdf package by using pip.")
|
||||
|
||||
"""Write images to a PDF file using img2pdf."""
|
||||
if doujinshi_obj is not None:
|
||||
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
|
||||
pdf_filename = os.path.join(
|
||||
os.path.join(doujinshi_dir, '..'),
|
||||
'{}.pdf'.format(doujinshi_obj.filename)
|
||||
)
|
||||
else:
|
||||
pdf_filename = './doujinshi.pdf'
|
||||
doujinshi_dir = '.'
|
||||
|
||||
file_list = os.listdir(doujinshi_dir)
|
||||
file_list.sort()
|
||||
|
||||
logger.info('Writing PDF file to path: {}'.format(pdf_filename))
|
||||
with open(pdf_filename, 'wb') as pdf_f:
|
||||
full_path_list = (
|
||||
[os.path.join(doujinshi_dir, image) for image in file_list]
|
||||
)
|
||||
pdf_f.write(img2pdf.convert(full_path_list))
|
||||
|
||||
if rm_origin_dir:
|
||||
shutil.rmtree(doujinshi_dir, ignore_errors=True)
|
||||
|
||||
logger.log(15, 'PDF file has been written to \'{0}\''.format(doujinshi_dir))
|
||||
def unicode_truncate(s, length, encoding='utf-8'):
|
||||
"""https://stackoverflow.com/questions/1809531/truncating-unicode-so-it-fits-a-maximum-size-when-encoded-for-wire-transfer
|
||||
"""
|
||||
encoded = s.encode(encoding)[:length]
|
||||
return encoded.decode(encoding, 'ignore')
|
||||
|
||||
|
||||
def format_filename(s):
|
||||
"""Take a string and return a valid filename constructed from the string.
|
||||
Uses a whitelist approach: any characters not present in valid_chars are
|
||||
removed. Also spaces are replaced with underscores.
|
||||
|
||||
Note: this method may produce invalid filenames such as ``, `.` or `..`
|
||||
When I use this method I prepend a date string like '2009_01_15_19_46_32_'
|
||||
and append a file extension like '.txt', so I avoid the potential of using
|
||||
an invalid filename.
|
||||
|
||||
"""
|
||||
"""
|
||||
It used to be a whitelist approach allowed only alphabet and a part of symbols.
|
||||
but most doujinshi's names include Japanese 2-byte characters and these was rejected.
|
||||
so it is using blacklist approach now.
|
||||
if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' ').
|
||||
"""
|
||||
# maybe you can use `--format` to select a suitable filename
|
||||
valid_chars = "-_.()[] %s%s" % (string.ascii_letters, string.digits)
|
||||
filename = ''.join(c for c in s if c in valid_chars)
|
||||
ban_chars = '\\\'/:,;*?"<>|\t'
|
||||
filename = s.translate(str.maketrans(ban_chars, ' '*len(ban_chars))).strip()
|
||||
filename = ' '.join(filename.split())
|
||||
print(repr(filename))
|
||||
|
||||
while filename.endswith('.'):
|
||||
filename = filename[:-1]
|
||||
|
||||
if len(filename) > 100:
|
||||
filename = filename[:100] + '...]'
|
||||
filename = filename[:100] + u'…'
|
||||
|
||||
# Remove [] from filename
|
||||
filename = filename.replace('[]', '').strip()
|
||||
|
@ -1,7 +1,6 @@
|
||||
requests>=2.5.0
|
||||
soupsieve<2.0
|
||||
soupsieve
|
||||
BeautifulSoup4>=4.0.0
|
||||
threadpool>=1.2.7
|
||||
tabulate>=0.7.5
|
||||
future>=0.15.2
|
||||
iso8601 >= 0.1
|
||||
|
Reference in New Issue
Block a user