Compare commits

..

18 Commits

Author SHA1 Message Date
cac07a517e remove debug print 2024-11-09 11:51:03 +08:00
f30ff59b2b Merge pull request #348 from JustAHumanBean/webp
add webp support
2024-11-08 16:33:21 +08:00
1504ee779f Update utils.py 2024-11-08 07:49:20 +00:00
98d9eecf6d Update parser.py 2024-11-08 07:47:50 +00:00
e16e623b9d Update doujinshi.py 2024-11-08 07:46:53 +00:00
c3f3182df3 0.5.12 2024-10-01 22:55:01 +09:00
12aad842f8 fix #347 2024-10-01 22:42:26 +09:00
f9f76ab0f5 0.5.11 2024-10-01 12:48:28 +09:00
744a9e4418 Merge branch 'master' of github.com:RicterZ/nhentai 2024-10-01 12:47:48 +09:00
c3e9fff491 fix bug #345 2024-10-01 12:47:13 +09:00
a84e2c5714 fix bug #341 2024-10-01 12:47:10 +09:00
c814c35c50 fix bug #341 2024-10-01 12:39:28 +09:00
e2f71437e2 fix setuptools warning 2024-09-22 16:37:49 +08:00
2fa45ae4df 0.5.10 2024-09-22 16:36:50 +08:00
17bc33c6cb fix arguments pass issue #344 2024-09-22 16:34:53 +08:00
09bb8460f6 fix overwrite issue #344 2024-09-22 16:32:01 +08:00
eb5b93d654 fix: pdf/cbz file already exists, but download process continues 2024-09-22 07:33:52 +00:00
cb6cf6df1a regression: pdf/cbz file already exists, but origin files are downloaded anyways.
- call download with `--cbz --rm-origin-dir`, and run command twice.
- user should pass `--regenerate` option to get back origin dir.
2024-09-22 07:24:16 +00:00
10 changed files with 31 additions and 26 deletions

1
.gitignore vendored
View File

@ -8,3 +8,4 @@ dist/
output/
venv/
.vscode/
test-output

View File

@ -1,3 +1,3 @@
__version__ = '0.5.9'
__version__ = '0.5.12'
__author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com'

View File

@ -94,6 +94,7 @@ def main():
doujinshi.download()
else:
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
continue
if options.generate_metadata:
generate_metadata_file(options.output_dir, doujinshi)
@ -106,10 +107,10 @@ def main():
generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
if options.is_cbz:
generate_doc('cbz', options.output_dir, doujinshi, options.rm_origin_dir)
generate_doc('cbz', options.output_dir, doujinshi, options.regenerate)
if options.is_pdf:
generate_doc('pdf', options.output_dir, doujinshi, options.rm_origin_dir)
generate_doc('pdf', options.output_dir, doujinshi, options.regenerate)
if options.move_to_folder:
if options.is_cbz:

View File

@ -38,9 +38,9 @@ FAV_URL = f'{BASE_URL}/favorites/'
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
IMAGE_URL_MIRRORS = [
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}'
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}'
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}'
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
]
NHENTAI_HOME = get_nhentai_home()

View File

@ -12,6 +12,7 @@ EXT_MAP = {
'j': 'jpg',
'p': 'png',
'g': 'gif',
'w': 'webp',
}
@ -75,26 +76,28 @@ class Doujinshi(object):
def check_if_need_download(self, options):
base_path = os.path.join(self.downloader.path, self.filename)
# doujinshi directory is not exist, we need to download definitely
if not (os.path.exists(base_path) and os.path.isdir(base_path)):
return True
# regenerate, we need to re-download from nhentai
# regenerate, re-download
if options.regenerate:
return True
# pdf or cbz file exists, skip re-download
# doujinshi directory may not exist b/c of --rm-origin-dir option set.
# user should pass --regenerate option to get back origin dir.
ret_pdf = ret_cbz = None
if options.is_pdf:
file_ext = 'pdf'
elif options.is_cbz:
file_ext = 'cbz'
else:
# re-download
return True
ret_pdf = os.path.exists(f'{base_path}.pdf') or os.path.exists(f'{base_path}/{self.filename}.pdf')
# pdf or cbz file exists, we needn't to re-download it
if os.path.exists(f'{base_path}.{file_ext}') or os.path.exists(f'{base_path}/{self.filename}.{file_ext}'):
if options.is_cbz:
ret_cbz = os.path.exists(f'{base_path}.cbz') or os.path.exists(f'{base_path}/{self.filename}.cbz')
ret = list(filter(lambda s: s is not None, [ret_cbz, ret_pdf]))
if ret and all(ret):
return False
# doujinshi directory doesn't exist, re-download
if not (os.path.exists(base_path) and os.path.isdir(base_path)):
return True
# fallback
return True

View File

@ -69,7 +69,7 @@ class Downloader(Singleton):
if response.status_code != 200:
path = urlparse(url).path
for mirror in constant.IMAGE_URL_MIRRORS:
print(f'{mirror}{path}')
# print(f'{mirror}{path}')
mirror_url = f'{mirror}{path}'
response = request('get', mirror_url, stream=True,
timeout=self.timeout, proxies=proxy)

View File

@ -148,7 +148,7 @@ def doujinshi_parser(id_, counter=0):
doujinshi['subtitle'] = subtitle.text if subtitle else ''
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif)$',
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
doujinshi_cover.a.img.attrs['data-src'])
ext = []

View File

@ -104,7 +104,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
file_list.sort()
for image in file_list:
if not os.path.splitext(image)[1] in ('.jpg', '.png'):
if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'):
continue
image_html += f'<img src="{image}" class="image-item"/>\n'
@ -230,7 +230,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
import img2pdf
"""Write images to a PDF file using img2pdf."""
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))]
file_list.sort()
logger.info(f'Writing PDF file to path: {filename}')
@ -309,7 +309,7 @@ def generate_metadata_file(output_dir, doujinshi_obj):
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'DATE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
'LANGUAGE', 'TAGS', 'URL', 'PAGES']
for i in range(len(fields)):

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "nhentai"
version = "0.5.9"
version = "0.5.12"
description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT"

View File

@ -1,3 +1,3 @@
[metadata]
description-file = README.rst
description_file = README.rst