Compare commits

..

107 Commits
dev ... 0.5.17

Author SHA1 Message Date
7089144ac6 fix #360 #359 2024-12-09 09:25:40 +08:00
0a9f7c3d3e 0.5.15 fix some bugs 2024-12-04 11:04:04 +08:00
40536ad456 Merge branch 'master' of github.com:RicterZ/nhentai 2024-12-04 11:03:48 +08:00
edb571c9dd fix #358 2024-12-04 11:00:50 +08:00
b2befd3473 Merge pull request #357 from FelixJS123/favorite_metadata
add favorites count metadata
2024-12-04 10:47:32 +08:00
c2e880f172 fix asyncio proxies settings and update httpx version 2024-12-04 10:46:45 +08:00
841988bc29 Updated README 2024-11-30 22:58:54 -08:00
390948e252 add favorites count metadata 2024-11-30 22:53:45 -08:00
b9b8468bfe 0.5.14 2024-12-01 10:37:59 +08:00
3d6263cf11 Merge pull request #354 from normalizedwater546/master
asyncio: fix downloader being run sequentially + httpx: fix proxy and missing headers
2024-11-24 13:50:22 +08:00
e3410f5a9a fix: add headers, proxy to async_request 2024-11-23 13:11:25 +00:00
feb7f45533 fix: semaphore bound to different event loop 2024-11-23 12:19:36 +00:00
0754caaeb7 fix: update threads argument 2024-11-23 11:20:58 +00:00
49e5a3094a fix: recent asyncio change resulting in sequential downloads
This was due to AsyncIO completely ignoring the thread (size) argument, and not updating sleep to be non-blocking.
2024-11-23 11:17:09 +00:00
c044b64beb Merge pull request #353 from hzxjy1/master
Fix issue #7
2024-11-19 02:10:34 +08:00
f8334c09b5 Add dependence httpx 2024-11-19 01:16:51 +08:00
c90c486fb4 Add a fix fatch for downloader 2024-11-19 01:13:16 +08:00
90b17832cc Merge pull request #351 from hzxjy1/master
Use coroutine in url download
2024-11-17 10:10:54 +08:00
14c6db9cc3 Use coroutine in url download and improve the extensibility of class Downloader 2024-11-16 15:57:59 +08:00
f30ff59b2b Merge pull request #348 from JustAHumanBean/webp
add webp support
2024-11-08 16:33:21 +08:00
1504ee779f Update utils.py 2024-11-08 07:49:20 +00:00
98d9eecf6d Update parser.py 2024-11-08 07:47:50 +00:00
e16e623b9d Update doujinshi.py 2024-11-08 07:46:53 +00:00
c3f3182df3 0.5.12 2024-10-01 22:55:01 +09:00
12aad842f8 fix #347 2024-10-01 22:42:26 +09:00
f9f76ab0f5 0.5.11 2024-10-01 12:48:28 +09:00
744a9e4418 Merge branch 'master' of github.com:RicterZ/nhentai 2024-10-01 12:47:48 +09:00
c3e9fff491 fix bug #345 2024-10-01 12:47:13 +09:00
a84e2c5714 fix bug #341 2024-10-01 12:47:10 +09:00
c814c35c50 fix bug #341 2024-10-01 12:39:28 +09:00
e2f71437e2 fix setuptools warning 2024-09-22 16:37:49 +08:00
2fa45ae4df 0.5.10 2024-09-22 16:36:50 +08:00
17bc33c6cb fix arguments pass issue #344 2024-09-22 16:34:53 +08:00
09bb8460f6 fix overwrite issue #344 2024-09-22 16:32:01 +08:00
eb5b93d654 fix: pdf/cbz file already exists, but download process continues 2024-09-22 07:33:52 +00:00
cb6cf6df1a regression: pdf/cbz file already exists, but origin files are downloaded anyways.
- call download with `--cbz --rm-origin-dir`, and run command twice.
- user should pass `--regenerate` option to get back origin dir.
2024-09-22 07:24:16 +00:00
98a66a3cb0 0.5.9 2024-09-22 15:09:36 +08:00
02d47632cf fix bug of move-to-dir 2024-09-22 15:07:53 +08:00
f932b1fbbe update README: mirror setup 2024-09-22 14:45:07 +08:00
fd9e92f9d4 update README 2024-09-22 14:44:42 +08:00
a8a48c6ce7 Merge pull request #343 from RicterZ/pull-342
improve #342
2024-09-22 14:42:32 +08:00
f6e9d08fc7 0.5.8 #343 2024-09-22 14:42:02 +08:00
9c1c2ea069 improve download logic #343 2024-09-22 14:39:32 +08:00
984ae4262c generate_metadata_file no need to use parse_doujinshi_obj 2024-09-22 14:11:55 +08:00
cbf9448ed9 improve #342 2024-09-22 13:35:07 +08:00
16bac45f02 generate html viewer automatically after download #342 2024-09-22 12:30:55 +08:00
7fa9193112 fix: non-image files in pdf conversion causing crash 2024-09-22 02:05:32 +00:00
a05a308e71 fix: check if metadata file is downloaded before skipping 2024-09-22 01:39:40 +00:00
5a29eaf775 fix: add file_type check to downloader
If you wanted to generate both .cbz and .pdf, the .pdf will be skipped if .cbz was generated first.
2024-09-22 01:38:54 +00:00
497eb6fe50 fix: remove warning for folder already exists in downloader
Nothing is wrong with the folder already existing -- silently ignore and move on. Might still have other files inside that haven't been downloaded yet.
2024-09-22 01:00:06 +00:00
4bfe104714 refactor: de-dupe doujinshi_obj parsers 2024-09-22 00:44:06 +00:00
12364e980c fix process continuing despite cbz download request skipped 2024-09-22 00:43:10 +00:00
b51e812449 fix #330 2024-09-21 11:49:22 +08:00
0ed5fa1931 fix #320 2024-09-21 00:43:14 +08:00
7f655b0f10 fix #295 2024-09-21 00:32:10 +08:00
dec3f44542 add some debug hack 2024-09-21 00:21:01 +08:00
40072a8483 0.5.7 2024-09-21 00:00:04 +08:00
f97469259d fix #331 2024-09-20 23:59:34 +08:00
ec608cc741 fix workflow docker issue 2024-09-20 23:58:25 +08:00
30e2814fe2 update version number in pyproject.toml 2024-09-20 23:57:10 +08:00
da298e1fe7 Merge pull request #312 from RicterZ/dependabot/pip/idna-3.7
Bump idna from 3.4 to 3.7
2024-09-20 23:56:25 +08:00
51d43ddde0 Merge branch 'master' into dependabot/pip/idna-3.7 2024-09-20 23:56:18 +08:00
c734881fc7 Merge pull request #316 from RicterZ/dependabot/pip/requests-2.32.0
Bump requests from 2.31.0 to 2.32.0
2024-09-20 23:55:33 +08:00
8d5803a45e Merge branch 'master' into dependabot/pip/requests-2.32.0 2024-09-20 23:55:28 +08:00
b441085b45 Merge pull request #318 from RicterZ/dependabot/pip/urllib3-1.26.19
Bump urllib3 from 1.26.18 to 1.26.19
2024-09-20 23:55:08 +08:00
132b26f8c4 Merge branch 'master' into dependabot/pip/urllib3-1.26.19 2024-09-20 23:54:57 +08:00
a0dc952fd3 Merge pull request #319 from RicterZ/dependabot/pip/certifi-2024.7.4
Bump certifi from 2022.12.7 to 2024.7.4
2024-09-20 23:54:18 +08:00
2bd862777b fix #333 2024-09-20 23:53:26 +08:00
35c55503fa 0.5.6 2024-09-20 23:39:38 +08:00
29aac84d53 fix #336 2024-09-20 23:34:26 +08:00
4ed4523782 fix #341 2024-09-20 23:27:37 +08:00
4223326c13 Merge pull request #340 from vglint/patch-3
Fix gallery search for folders with underscore
2024-09-14 10:17:57 +08:00
a248ff98c4 Fix gallery search for folders with underscore
Gallery title names replace '_' in the folder name with ' ' (generate_main_html()). To match against these title names when searching, we must also replace '_' with ' ' for each folder name we add to the list of titles to unhide.
2024-09-13 15:56:01 -07:00
021f17d229 Merge pull request #321 from PenitentMonke/xdg-base-dir
Adhere to XDG base dir spec on Linux
2024-07-08 22:03:38 +08:00
4162eabe93 Adhere to XDG base dir spec on Linux
Change how NHENTAI_HOME is set to follow the XDG Base Directory
Specification where possible, when running on Linux.

ISSUE: 299
2024-07-07 02:40:33 -03:00
c75e9efb21 Bump certifi from 2022.12.7 to 2024.7.4
Bumps [certifi](https://github.com/certifi/python-certifi) from 2022.12.7 to 2024.7.4.
- [Commits](https://github.com/certifi/python-certifi/compare/2022.12.07...2024.07.04)

---
updated-dependencies:
- dependency-name: certifi
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-07-05 21:52:23 +00:00
f2dec5c2a3 Bump urllib3 from 1.26.18 to 1.26.19
Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.18 to 1.26.19.
- [Release notes](https://github.com/urllib3/urllib3/releases)
- [Changelog](https://github.com/urllib3/urllib3/blob/1.26.19/CHANGES.rst)
- [Commits](https://github.com/urllib3/urllib3/compare/1.26.18...1.26.19)

---
updated-dependencies:
- dependency-name: urllib3
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-06-18 01:35:13 +00:00
845a0d5659 ---
updated-dependencies:
- dependency-name: requests
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-05-21 05:39:26 +00:00
03d85c4e5d Bump idna from 3.4 to 3.7
Bumps [idna](https://github.com/kjd/idna) from 3.4 to 3.7.
- [Release notes](https://github.com/kjd/idna/releases)
- [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst)
- [Commits](https://github.com/kjd/idna/compare/v3.4...v3.7)

---
updated-dependencies:
- dependency-name: idna
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-04-12 02:06:40 +00:00
dc54a43610 Merge pull request #311 from RicterZ/dev
Dev merge to master
2024-03-28 17:56:28 +08:00
473f948565 update 2024-02-20 10:28:54 +08:00
f701485840 remove print 2024-02-20 10:27:34 +08:00
d8e4f50609 support #291 2024-02-20 10:25:44 +08:00
a893f54da1 0.5.4 2023-12-28 17:46:40 +08:00
4e307911ce Merge pull request #297 from RicterZ/dependabot/pip/urllib3-1.26.18
Bump urllib3 from 1.26.14 to 1.26.18
2023-12-28 17:46:07 +08:00
f9b7f828a5 fix #298 2023-12-28 17:45:37 +08:00
092df9e539 Bump urllib3 from 1.26.14 to 1.26.18
Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.14 to 1.26.18.
- [Release notes](https://github.com/urllib3/urllib3/releases)
- [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst)
- [Commits](https://github.com/urllib3/urllib3/compare/1.26.14...1.26.18)

---
updated-dependencies:
- dependency-name: urllib3
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-10-17 23:59:22 +00:00
8d74866abf Update README.rst 2023-08-21 21:47:07 +08:00
bc5b7f982d Merge pull request #294 from edgar1016/master
Added --move-to-folder
2023-08-19 19:13:38 +08:00
e54f3cbd06 Added --move-to-folder 2023-08-18 18:30:14 -07:00
a31c615259 Merge pull request #284 from RicterZ/dependabot/pip/requests-2.31.0
Bump requests from 2.28.2 to 2.31.0
2023-05-25 20:40:59 +08:00
cf0b76204d Bump requests from 2.28.2 to 2.31.0
Bumps [requests](https://github.com/psf/requests) from 2.28.2 to 2.31.0.
- [Release notes](https://github.com/psf/requests/releases)
- [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md)
- [Commits](https://github.com/psf/requests/compare/v2.28.2...v2.31.0)

---
updated-dependencies:
- dependency-name: requests
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-05-23 06:19:34 +00:00
17402623c4 Merge pull request #282 from edgar1016/master
--page-all works with favorites
2023-04-22 13:06:40 +08:00
a1a310f06b --page-all works with favorites 2023-04-21 22:00:00 -07:00
57673da762 update version 2023-03-28 21:02:47 +08:00
dab61291cb Merge pull request #280 from RicterZ/dev
0.5.3
2023-03-28 20:58:08 +08:00
9ed4e04241 Merge pull request #279 from RicterZ/dev
update setup informations
2023-03-28 20:56:53 +08:00
f1cc63a591 Merge pull request #278 from RicterZ/dev
fix #277
2023-03-28 20:54:49 +08:00
f534b0b47f Merge pull request #275 from RicterZ/dev
remove tests
2023-03-04 18:40:45 +08:00
458c68d5e6 Merge pull request #274 from RicterZ/dev
Dev
2023-03-04 18:39:07 +08:00
fc507d246a Merge pull request #271 from edgar1016/master
Fixed info.txt
2023-02-20 23:58:26 +08:00
3ed84c5a67 Fixed info.txt 2023-02-20 01:54:32 -07:00
61f4a43081 remove test 2023-02-20 12:58:28 +08:00
4179947f16 add %ag %g formatter #269 2023-02-20 12:55:18 +08:00
9f55223e28 use Unknown as field value if it is null #269 2023-02-20 12:47:00 +08:00
b56e5b63a9 Merge pull request #268 from RicterZ/dev
enhancement of legacy search parser
2023-02-07 19:46:09 +08:00
179852a343 Merge pull request #267 from RicterZ/dev
add counter
2023-02-06 17:51:54 +08:00
17 changed files with 514 additions and 245 deletions

1
.gitignore vendored
View File

@ -8,3 +8,4 @@ dist/
output/
venv/
.vscode/
test-output

View File

@ -11,6 +11,8 @@ nhentai
nhentai is a CLI tool for downloading doujinshi from `nhentai.net <https://nhentai.net>`_
GUI version: `https://github.com/edgar1016/nhentai-GUI <https://github.com/edgar1016/nhentai-GUI>`_
===================
Manual Installation
===================
@ -138,10 +140,13 @@ Format output doujinshi folder name:
Supported doujinshi folder formatter:
- %i: Doujinshi id
- %f: Doujinshi favorite count
- %t: Doujinshi name
- %s: Doujinshi subtitle (translated name)
- %a: Doujinshi authors' name
- %g: Doujinshi groups name
- %p: Doujinshi pretty name
- %ag: Doujinshi authors name or groups name
Other options:
@ -157,25 +162,21 @@ Other options:
NHENTAI nhentai mirror url
Options:
# Operation options, control the program behaviors
-h, --help show this help message and exit
-D, --download download doujinshi (for search results)
-S, --show just show the doujinshi information
# Doujinshi options, specify id, keyword, etc.
--id doujinshi ids set, e.g. 167680 167681 167682
-s KEYWORD, --search=KEYWORD
search doujinshi by keyword
-F, --favorites list or download your favorites
# Page options, control the page to fetch / download
-a ARTIST, --artist=ARTIST
list doujinshi by artist name
--page-all all search results
--page=PAGE, --page-range=PAGE
page number of search results. e.g. 1,2-5,14
--sorting=SORTING sorting of doujinshi (recent / popular /
--sorting=SORTING, --sort=SORTING
sorting of doujinshi (recent / popular /
popular-[today|week])
# Download options, the output directory, threads, timeout, delay, etc.
-o OUTPUT_DIR, --output=OUTPUT_DIR
output dir
-t THREADS, --threads=THREADS
@ -188,8 +189,6 @@ Other options:
-f FILE, --file=FILE read gallery IDs from file.
--format=NAME_FORMAT format the saved folder name
--dry-run Dry run, skip file download
# Generate options, for generate html viewer, cbz file, pdf file, etc
--html generate a html viewer at current directory
--no-html don't generate HTML after downloading
--gen-main generate a main viewer contain all the doujin in the
@ -198,10 +197,10 @@ Other options:
-P, --pdf generate PDF file
--rm-origin-dir remove downloaded doujinshi dir when generated CBZ or
PDF file
--move-to-folder remove files in doujinshi dir then move new file to
folder when generated CBZ or PDF file
--meta generate a metadata file in doujinshi format
--regenerate-cbz regenerate the cbz file if exists
# nhentai options, set cookie, user-agent, language, remove caches, histories, etc
--regenerate regenerate the cbz or pdf file if exists
--cookie=COOKIE set cookie of nhentai to bypass Cloudflare captcha
--useragent=USERAGENT, --user-agent=USERAGENT
set useragent to bypass Cloudflare captcha
@ -225,6 +224,9 @@ For example:
.. code-block::
i.h.loli.club -> i.nhentai.net
i3.h.loli.club -> i3.nhentai.net
i5.h.loli.club -> i5.nhentai.net
i7.h.loli.club -> i7.nhentai.net
h.loli.club -> nhentai.net
Set `NHENTAI` env var to your nhentai mirror.

View File

@ -1,3 +1,3 @@
__version__ = '0.5.3'
__version__ = '0.5.15'
__author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com'

View File

@ -81,9 +81,9 @@ def cmd_parser():
help='all search results')
parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='1',
help='page number of search results. e.g. 1,2-5,14')
parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular-all',
help='sorting of doujinshi (recent / popular-all / popular-[today|week])',
choices=['recent', 'popular-all', 'popular-today', 'popular-week', 'date'])
parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
help='sorting of doujinshi (recent / popular / popular-[today|week])',
choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
# download options
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='./',
@ -118,8 +118,8 @@ def cmd_parser():
help='remove files in doujinshi dir then move new file to folder when generated CBZ or PDF file')
parser.add_option('--meta', dest='generate_metadata', action='store_true',
help='generate a metadata file in doujinshi format')
parser.add_option('--regenerate-cbz', dest='regenerate_cbz', action='store_true', default=False,
help='regenerate the cbz file if exists')
parser.add_option('--regenerate', dest='regenerate', action='store_true', default=False,
help='regenerate the cbz or pdf file if exists')
# nhentai options
parser.add_option('--cookie', type='str', dest='cookie', action='store',

View File

@ -1,4 +1,6 @@
# coding: utf-8
import os
import shutil
import sys
import signal
import platform
@ -11,8 +13,8 @@ from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader
from nhentai.logger import logger
from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_cbz, generate_main_html, generate_pdf, generate_metadata_file, \
paging, check_cookie, signal_handler, DB
from nhentai.utils import generate_html, generate_doc, generate_main_html, generate_metadata_file, \
paging, check_cookie, signal_handler, DB, move_to_folder
def main():
@ -46,7 +48,7 @@ def main():
if not options.is_download:
logger.warning('You do not specify --download option')
doujinshis = favorites_parser(page=page_list)
doujinshis = favorites_parser() if options.page_all else favorites_parser(page=page_list)
elif options.keyword:
if constant.CONFIG['language']:
@ -57,6 +59,10 @@ def main():
doujinshis = _search_parser(options.keyword, sorting=options.sorting, page=page_list,
is_page_all=options.page_all)
elif options.artist:
doujinshis = legacy_search_parser(options.artist, sorting=options.sorting, page=page_list,
is_page_all=options.page_all, type_='ARTIST')
elif not doujinshi_ids:
doujinshi_ids = options.id
@ -71,7 +77,7 @@ def main():
doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
if not options.is_show:
downloader = Downloader(path=options.output_dir, size=options.threads,
downloader = Downloader(path=options.output_dir, threads=options.threads,
timeout=options.timeout, delay=options.delay)
for doujinshi_id in doujinshi_ids:
@ -83,22 +89,40 @@ def main():
if not options.dryrun:
doujinshi.downloader = downloader
doujinshi.download(regenerate_cbz=options.regenerate_cbz)
if doujinshi.check_if_need_download(options):
doujinshi.download()
else:
logger.info(f'Skip download doujinshi because a PDF/CBZ file exists of doujinshi {doujinshi.name}')
continue
if options.generate_metadata:
table = doujinshi.table
generate_metadata_file(options.output_dir, table, doujinshi)
generate_metadata_file(options.output_dir, doujinshi)
if options.is_save_download_history:
with DB() as db:
db.add_one(doujinshi.id)
if not options.is_nohtml and not options.is_cbz and not options.is_pdf:
if not options.is_nohtml:
generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
elif options.is_cbz:
generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir)
elif options.is_pdf:
generate_pdf(options.output_dir, doujinshi, options.rm_origin_dir)
if options.is_cbz:
generate_doc('cbz', options.output_dir, doujinshi, options.regenerate)
if options.is_pdf:
generate_doc('pdf', options.output_dir, doujinshi, options.regenerate)
if options.move_to_folder:
if options.is_cbz:
move_to_folder(options.output_dir, doujinshi, 'cbz')
if options.is_pdf:
move_to_folder(options.output_dir, doujinshi, 'pdf')
if options.rm_origin_dir:
if options.move_to_folder:
logger.critical('You specified both --move-to-folder and --rm-origin-dir options, '
'you will not get anything :(')
shutil.rmtree(os.path.join(options.output_dir, doujinshi.filename), ignore_errors=True)
if options.main_viewer:
generate_main_html(options.output_dir)

View File

@ -3,6 +3,23 @@ import os
import tempfile
from urllib.parse import urlparse
from platform import system
def get_nhentai_home() -> str:
home = os.getenv('HOME', tempfile.gettempdir())
if system() == 'Linux':
xdgdat = os.getenv('XDG_DATA_HOME')
if xdgdat and os.path.exists(os.path.join(xdgdat, 'nhentai')):
return os.path.join(xdgdat, 'nhentai')
if home and os.path.exists(os.path.join(home, '.nhentai')):
return os.path.join(home, '.nhentai')
if xdgdat:
return os.path.join(xdgdat, 'nhentai')
# Use old default path in other systems
return os.path.join(home, '.nhentai')
DEBUG = os.getenv('DEBUG', False)
@ -11,15 +28,22 @@ BASE_URL = os.getenv('NHENTAI', 'https://nhentai.net')
DETAIL_URL = f'{BASE_URL}/g'
LEGACY_SEARCH_URL = f'{BASE_URL}/search/'
SEARCH_URL = f'{BASE_URL}/api/galleries/search'
ARTIST_URL = f'{BASE_URL}/artist/'
TAG_API_URL = f'{BASE_URL}/api/galleries/tagged'
LOGIN_URL = f'{BASE_URL}/login/'
CHALLENGE_URL = f'{BASE_URL}/challenge'
FAV_URL = f'{BASE_URL}/favorites/'
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
NHENTAI_HOME = os.path.join(os.getenv('HOME', tempfile.gettempdir()), '.nhentai')
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
IMAGE_URL_MIRRORS = [
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
]
NHENTAI_HOME = get_nhentai_home()
NHENTAI_HISTORY = os.path.join(NHENTAI_HOME, 'history.sqlite3')
NHENTAI_CONFIG_FILE = os.path.join(NHENTAI_HOME, 'config.json')
@ -30,7 +54,8 @@ CONFIG = {
'cookie': '',
'language': '',
'template': '',
'useragent': 'nhentai command line client (https://github.com/RicterZ/nhentai)'
'useragent': 'nhentai command line client (https://github.com/RicterZ/nhentai)',
'max_filename': 85
}
LANGUAGE_ISO = {

View File

@ -1,4 +1,5 @@
# coding: utf-8
import os
from tabulate import tabulate
@ -11,6 +12,7 @@ EXT_MAP = {
'j': 'jpg',
'p': 'png',
'g': 'gif',
'w': 'webp',
}
@ -20,17 +22,19 @@ class DoujinshiInfo(dict):
def __getattr__(self, item):
try:
return dict.__getitem__(self, item)
ret = dict.__getitem__(self, item)
return ret if ret else 'Unknown'
except KeyError:
return ''
return 'Unknown'
class Doujinshi(object):
def __init__(self, name=None, pretty_name=None, id=None, img_id=None,
def __init__(self, name=None, pretty_name=None, id=None, favorite_counts=0, img_id=None,
ext='', pages=0, name_format='[%i][%a][%t]', **kwargs):
self.name = name
self.pretty_name = pretty_name
self.id = id
self.favorite_counts = favorite_counts
self.img_id = img_id
self.ext = ext
self.pages = pages
@ -38,8 +42,13 @@ class Doujinshi(object):
self.url = f'{DETAIL_URL}/{self.id}'
self.info = DoujinshiInfo(**kwargs)
ag_value = self.info.groups if self.info.artists == 'Unknown' else self.info.artists
name_format = name_format.replace('%ag', format_filename(ag_value))
name_format = name_format.replace('%i', format_filename(str(self.id)))
name_format = name_format.replace('%f', format_filename(str(self.favorite_counts)))
name_format = name_format.replace('%a', format_filename(self.info.artists))
name_format = name_format.replace('%g', format_filename(self.info.groups))
name_format = name_format.replace('%t', format_filename(self.name))
name_format = name_format.replace('%p', format_filename(self.pretty_name))
@ -47,15 +56,18 @@ class Doujinshi(object):
self.filename = format_filename(name_format, 255, True)
self.table = [
["Parodies", self.info.parodies],
["Doujinshi", self.name],
["Subtitle", self.info.subtitle],
["Characters", self.info.characters],
["Authors", self.info.artists],
["Languages", self.info.languages],
["Tags", self.info.tags],
["URL", self.url],
["Pages", self.pages],
['Parodies', self.info.parodies],
['Doujinshi', self.name],
['Subtitle', self.info.subtitle],
['Date', self.info.date],
['Characters', self.info.characters],
['Authors', self.info.artists],
['Groups', self.info.groups],
['Languages', self.info.languages],
['Tags', self.info.tags],
['Favorite Counts', self.info.favorite_counts],
['URL', self.url],
['Pages', self.pages],
]
def __repr__(self):
@ -64,7 +76,35 @@ class Doujinshi(object):
def show(self):
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
def download(self, regenerate_cbz=False):
def check_if_need_download(self, options):
base_path = os.path.join(self.downloader.path, self.filename)
# regenerate, re-download
if options.regenerate:
return True
# pdf or cbz file exists, skip re-download
# doujinshi directory may not exist b/c of --rm-origin-dir option set.
# user should pass --regenerate option to get back origin dir.
ret_pdf = ret_cbz = None
if options.is_pdf:
ret_pdf = os.path.exists(f'{base_path}.pdf') or os.path.exists(f'{base_path}/{self.filename}.pdf')
if options.is_cbz:
ret_cbz = os.path.exists(f'{base_path}.cbz') or os.path.exists(f'{base_path}/{self.filename}.cbz')
ret = list(filter(lambda s: s is not None, [ret_cbz, ret_pdf]))
if ret and all(ret):
return False
# doujinshi directory doesn't exist, re-download
if not (os.path.exists(base_path) and os.path.isdir(base_path)):
return True
# fallback
return True
def download(self):
logger.info(f'Starting to download doujinshi: {self.name}')
if self.downloader:
download_queue = []
@ -74,9 +114,10 @@ class Doujinshi(object):
for i in range(1, min(self.pages, len(self.ext)) + 1):
download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}')
self.downloader.start_download(download_queue, self.filename, regenerate_cbz=regenerate_cbz)
return self.downloader.start_download(download_queue, self.filename)
else:
logger.critical('Downloader has not been loaded')
return False
if __name__ == '__main__':

View File

@ -1,24 +1,17 @@
# coding: utf-
import multiprocessing
import signal
import sys
import os
import requests
import time
import asyncio
import httpx
import urllib3.exceptions
from urllib.parse import urlparse
from nhentai import constant
from nhentai.logger import logger
from nhentai.parser import request
from nhentai.utils import Singleton
from nhentai.utils import Singleton, async_request
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
semaphore = multiprocessing.Semaphore(1)
class NHentaiImageNotExistException(Exception):
pass
@ -39,60 +32,68 @@ def download_callback(result):
logger.log(16, f'{data} downloaded successfully')
class Downloader(Singleton):
def __init__(self, path='', size=5, timeout=30, delay=0):
self.size = size
class Downloader(Singleton):
def __init__(self, path='', threads=5, timeout=30, delay=0):
self.threads = threads
self.path = str(path)
self.timeout = timeout
self.delay = delay
def download(self, url, folder='', filename='', retried=0, proxy=None):
if self.delay:
time.sleep(self.delay)
logger.info(f'Starting to download {url} ...')
filename = filename if filename else os.path.basename(urlparse(url).path)
base_filename, extension = os.path.splitext(filename)
async def fiber(self, tasks):
self.semaphore = asyncio.Semaphore(self.threads)
for completed_task in asyncio.as_completed(tasks):
try:
result = await completed_task
logger.info(f'{result[1]} download completed')
except Exception as e:
logger.error(f'An error occurred: {e}')
async def _semaphore_download(self, *args, **kwargs):
async with self.semaphore:
return await self.download(*args, **kwargs)
async def download(self, url, folder='', filename='', retried=0, proxy=None):
logger.info(f'Starting to download {url} ...')
if self.delay:
await asyncio.sleep(self.delay)
filename = filename if filename else os.path.basename(urlparse(url).path)
save_file_path = os.path.join(self.folder, filename)
save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
try:
if os.path.exists(save_file_path):
logger.warning(f'Ignored exists file: {save_file_path}')
logger.warning(f'Skipped download: {save_file_path} already exists')
return 1, url
response = None
with open(save_file_path, "wb") as f:
i = 0
while i < 10:
try:
response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy)
if response.status_code != 200:
raise NHentaiImageNotExistException
response = await async_request('GET', url, timeout=self.timeout, proxies=proxy)
except NHentaiImageNotExistException as e:
raise e
if response.status_code != 200:
path = urlparse(url).path
for mirror in constant.IMAGE_URL_MIRRORS:
logger.info(f"Try mirror: {mirror}{path}")
mirror_url = f'{mirror}{path}'
response = await async_request('GET', mirror_url, timeout=self.timeout, proxies=proxy)
if response.status_code == 200:
break
except Exception as e:
i += 1
if not i < 10:
logger.critical(str(e))
return 0, None
continue
if not await self.save(filename, response):
logger.error(f'Can not download image {url}')
return 1, None
break
length = response.headers.get('content-length')
if length is None:
f.write(response.content)
else:
for chunk in response.iter_content(2048):
f.write(chunk)
except (requests.HTTPError, requests.Timeout) as e:
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
if retried < 3:
logger.warning(f'Warning: {e}, retrying({retried}) ...')
return 0, self.download(url=url, folder=folder, filename=filename,
retried=retried+1, proxy=proxy)
logger.info(f'Download {filename} failed, retrying({retried + 1}) times...')
return await self.download(
url=url,
folder=folder,
filename=filename,
retried=retried + 1,
proxy=proxy,
)
else:
return 0, None
@ -102,6 +103,8 @@ class Downloader(Singleton):
except Exception as e:
import traceback
logger.error(f"Exception type: {type(e)}")
traceback.print_stack()
logger.critical(str(e))
return 0, None
@ -111,49 +114,48 @@ class Downloader(Singleton):
return 1, url
def start_download(self, queue, folder='', regenerate_cbz=False):
if not isinstance(folder, (str, )):
async def save(self, save_file_path, response) -> bool:
if response is None:
logger.error('Error: Response is None')
return False
save_file_path = os.path.join(self.folder, save_file_path)
with open(save_file_path, 'wb') as f:
if response is not None:
length = response.headers.get('content-length')
if length is None:
f.write(response.content)
else:
async for chunk in response.aiter_bytes(2048):
f.write(chunk)
return True
def start_download(self, queue, folder='') -> bool:
if not isinstance(folder, (str,)):
folder = str(folder)
if self.path:
folder = os.path.join(self.path, folder)
if os.path.exists(folder + '.cbz'):
if not regenerate_cbz:
logger.warning(f'CBZ file "{folder}.cbz" exists, ignored download request')
return
logger.info(f'Doujinshi will be saved at "{folder}"')
if not os.path.exists(folder):
try:
os.makedirs(folder)
except EnvironmentError as e:
logger.critical(str(e))
self.folder = folder
else:
logger.warning(f'Path "{folder}" already exist.')
queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
pool = multiprocessing.Pool(self.size, init_worker)
[pool.apply_async(download_wrapper, args=item) for item in queue]
pool.close()
pool.join()
if os.getenv('DEBUG', None) == 'NODOWNLOAD':
# Assuming we want to continue with rest of process.
return True
def download_wrapper(obj, url, folder='', proxy=None):
if sys.platform == 'darwin' or semaphore.get_value():
return Downloader.download(obj, url=url, folder=folder, proxy=proxy)
else:
return -3, None
coroutines = [
self._semaphore_download(url, filename=os.path.basename(urlparse(url).path))
for url in queue
]
# Prevent coroutines infection
asyncio.run(self.fiber(coroutines))
def init_worker():
signal.signal(signal.SIGINT, subprocess_signal)
def subprocess_signal(sig, frame):
if semaphore.acquire(timeout=1):
logger.warning('Ctrl-C pressed, exiting sub processes ...')
raise KeyboardInterrupt
return True

View File

@ -135,19 +135,24 @@ def doujinshi_parser(id_, counter=0):
logger.warning(f'Error: {e}, ignored')
return None
# print(response)
html = BeautifulSoup(response, 'html.parser')
doujinshi_info = html.find('div', attrs={'id': 'info'})
title = doujinshi_info.find('h1').text
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
subtitle = doujinshi_info.find('h2')
favorite_counts = doujinshi_info.find('span', class_='nobold').find('span', class_='count')
if favorite_counts is None:
favorite_counts = '0'
doujinshi['name'] = title
doujinshi['pretty_name'] = pretty_name
doujinshi['subtitle'] = subtitle.text if subtitle else ''
doujinshi['favorite_counts'] = favorite_counts.strip()
doujinshi_cover = html.find('div', attrs={'id': 'cover'})
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif)$',
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
doujinshi_cover.a.img.attrs['data-src'])
ext = []
@ -240,13 +245,21 @@ def print_doujinshi(doujinshi_list):
print(tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
def legacy_search_parser(keyword, sorting, page, is_page_all=False):
def legacy_search_parser(keyword, sorting, page, is_page_all=False, type_='SEARCH'):
logger.info(f'Searching doujinshis of keyword {keyword}')
result = []
if type_ not in ('SEARCH', 'ARTIST', ):
raise ValueError('Invalid type')
if is_page_all:
response = request('get', url=constant.LEGACY_SEARCH_URL,
params={'q': keyword, 'page': 1, 'sort': sorting}).content
if type_ == 'SEARCH':
response = request('get', url=constant.LEGACY_SEARCH_URL,
params={'q': keyword, 'page': 1, 'sort': sorting}).content
else:
url = constant.ARTIST_URL + keyword + '/' + ('' if sorting == 'recent' else sorting)
response = request('get', url=url, params={'page': 1}).content
html = BeautifulSoup(response, 'lxml')
pagination = html.find(attrs={'class': 'pagination'})
last_page = pagination.find(attrs={'class': 'last'})
@ -258,8 +271,13 @@ def legacy_search_parser(keyword, sorting, page, is_page_all=False):
for p in pages:
logger.info(f'Fetching page {p} ...')
response = request('get', url=constant.LEGACY_SEARCH_URL,
params={'q': keyword, 'page': p, 'sort': sorting}).content
if type_ == 'SEARCH':
response = request('get', url=constant.LEGACY_SEARCH_URL,
params={'q': keyword, 'page': p, 'sort': sorting}).content
else:
url = constant.ARTIST_URL + keyword + '/' + ('' if sorting == 'recent' else sorting)
response = request('get', url=url, params={'page': p}).content
if response is None:
logger.warning(f'No result in response in page {p}')
continue
@ -313,7 +331,9 @@ def search_parser(keyword, sorting, page, is_page_all=False):
for row in response['result']:
title = row['title']['english']
title = title[:85] + '..' if len(title) > 85 else title
title = title[:constant.CONFIG['max_filename']] + '..' if \
len(title) > constant.CONFIG['max_filename'] else title
result.append({'id': row['id'], 'title': title})
not_exists_persist = False

View File

@ -8,6 +8,8 @@ from nhentai.constant import LANGUAGE_ISO
def serialize_json(doujinshi, output_dir):
metadata = {'title': doujinshi.name,
'subtitle': doujinshi.info.subtitle}
if doujinshi.info.favorite_counts:
metadata['favorite_counts'] = doujinshi.favorite_counts
if doujinshi.info.date:
metadata['upload_date'] = doujinshi.info.date
if doujinshi.info.parodies:
@ -22,7 +24,7 @@ def serialize_json(doujinshi, output_dir):
metadata['group'] = [i.strip() for i in doujinshi.info.groups.split(',')]
if doujinshi.info.languages:
metadata['language'] = [i.strip() for i in doujinshi.info.languages.split(',')]
metadata['category'] = doujinshi.info.categories
metadata['category'] = [i.strip() for i in doujinshi.info.categories.split(',')]
metadata['URL'] = doujinshi.url
metadata['Pages'] = doujinshi.pages
@ -44,6 +46,7 @@ def serialize_comic_xml(doujinshi, output_dir):
xml_write_simple_tag(f, 'PageCount', doujinshi.pages)
xml_write_simple_tag(f, 'URL', doujinshi.url)
xml_write_simple_tag(f, 'NhentaiId', doujinshi.id)
xml_write_simple_tag(f, 'Favorites', doujinshi.favorite_counts)
xml_write_simple_tag(f, 'Genre', doujinshi.info.categories)
xml_write_simple_tag(f, 'BlackAndWhite', 'No' if doujinshi.info.tags and

View File

@ -5,14 +5,18 @@ import re
import os
import zipfile
import shutil
import copy
import httpx
import requests
import sqlite3
import urllib.parse
from typing import Optional, Tuple
from nhentai import constant
from nhentai.logger import logger
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
MAX_FIELD_LENGTH = 100
@ -30,15 +34,40 @@ def request(method, url, **kwargs):
return getattr(session, method)(url, verify=False, **kwargs)
async def async_request(method, url, proxies = None, **kwargs):
headers = {
'Referer': constant.LOGIN_URL,
'User-Agent': constant.CONFIG['useragent'],
'Cookie': constant.CONFIG['cookie'],
}
if proxies is None:
proxies = constant.CONFIG['proxy']
if proxies.get('http') == '' and proxies.get('https') == '':
proxies = None
if proxies:
_proxies = {f'{k}://': v for k, v in proxies.items() if v}
proxies = _proxies
async with httpx.AsyncClient(headers=headers, verify=False, proxies=proxies, **kwargs) as client:
response = await client.request(method, url, **kwargs)
return response
def check_cookie():
response = request('get', constant.BASE_URL)
if response.status_code == 403 and 'Just a moment...' in response.text:
logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
sys.exit(1)
username = re.findall('"/users/[0-9]+/(.*?)"', response.text)
if not username:
logger.warning('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
logger.warning(
'Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
else:
logger.log(16, f'Login successfully! Your username: {username[0]}')
@ -64,13 +93,31 @@ def readfile(path):
return file.read()
def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
image_html = ''
def parse_doujinshi_obj(
output_dir: str,
doujinshi_obj=None,
file_type: str = ''
) -> Tuple[str, str]:
filename = f'./doujinshi.{file_type}'
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
else:
doujinshi_dir = '.'
_filename = f'{doujinshi_obj.filename}.{file_type}'
if file_type == 'cbz':
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
if file_type == 'pdf':
_filename = _filename.replace('/', '-')
filename = os.path.join(output_dir, _filename)
return doujinshi_dir, filename
def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, '.html')
image_html = ''
if not os.path.exists(doujinshi_dir):
logger.warning(f'Path "{doujinshi_dir}" does not exist, creating.')
@ -83,7 +130,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
file_list.sort()
for image in file_list:
if not os.path.splitext(image)[1] in ('.jpg', '.png'):
if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'):
continue
image_html += f'<img src="{image}" class="image-item"/>\n'
@ -107,6 +154,27 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
logger.warning(f'Writing HTML Viewer failed ({e})')
def move_to_folder(output_dir='.', doujinshi_obj=None, file_type=None):
if not file_type:
raise RuntimeError('no file_type specified')
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
for fn in os.listdir(doujinshi_dir):
file_path = os.path.join(doujinshi_dir, fn)
_, ext = os.path.splitext(file_path)
if ext in ['.pdf', '.cbz']:
continue
if os.path.isfile(file_path):
try:
os.remove(file_path)
except Exception as e:
print(f"Error deleting file: {e}")
shutil.move(filename, os.path.join(doujinshi_dir, os.path.basename(filename)))
def generate_main_html(output_dir='./'):
"""
Generate a main html to show all the contains doujinshi.
@ -148,7 +216,7 @@ def generate_main_html(output_dir='./'):
else:
title = 'nHentai HTML Viewer'
image_html += element.format(FOLDER=folder, IMAGE=image, TITLE=title)
image_html += element.format(FOLDER=urllib.parse.quote(folder), IMAGE=image, TITLE=title)
if image_html == '':
logger.warning('No index.html found, --gen-main paused.')
return
@ -158,71 +226,50 @@ def generate_main_html(output_dir='./'):
f.write(data.encode('utf-8'))
shutil.copy(os.path.dirname(__file__) + '/viewer/logo.png', './')
set_js_database()
logger.log(16, f'Main Viewer has been written to "{output_dir}main.html"')
output_dir = output_dir[:-1] if output_dir.endswith('/') else output_dir
logger.log(16, f'Main Viewer has been written to "{output_dir}/main.html"')
except Exception as e:
logger.warning(f'Writing Main Viewer failed ({e})')
def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True):
if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
if os.path.exists(doujinshi_dir+".cbz"):
logger.warning(f'Comic Book CBZ file exists, skip "{doujinshi_dir}"')
return
if write_comic_info:
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), f'{doujinshi_obj.filename}.cbz')
else:
cbz_filename = './doujinshi.cbz'
doujinshi_dir = '.'
def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=False):
file_list = os.listdir(doujinshi_dir)
file_list.sort()
doujinshi_dir, filename = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type)
logger.info(f'Writing CBZ file to path: {cbz_filename}')
with zipfile.ZipFile(cbz_filename, 'w') as cbz_pf:
for image in file_list:
image_path = os.path.join(doujinshi_dir, image)
cbz_pf.write(image_path, image)
if rm_origin_dir:
shutil.rmtree(doujinshi_dir, ignore_errors=True)
logger.log(16, f'Comic Book CBZ file has been written to "{doujinshi_dir}"')
def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
try:
import img2pdf
"""Write images to a PDF file using img2pdf."""
if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
pdf_filename = os.path.join(
os.path.join(doujinshi_dir, '..'),
f'{doujinshi_obj.filename}.pdf'
)
else:
pdf_filename = './doujinshi.pdf'
doujinshi_dir = '.'
if os.path.exists(f'{doujinshi_dir}.{file_type}') and not regenerate:
logger.info(f'Skipped {file_type} file generation: {doujinshi_dir}.{file_type} already exists')
return
if file_type == 'cbz':
file_list = os.listdir(doujinshi_dir)
file_list.sort()
logger.info(f'Writing PDF file to path: {pdf_filename}')
with open(pdf_filename, 'wb') as pdf_f:
full_path_list = (
[os.path.join(doujinshi_dir, image) for image in file_list]
)
pdf_f.write(img2pdf.convert(full_path_list))
logger.info(f'Writing CBZ file to path: {filename}')
with zipfile.ZipFile(filename, 'w') as cbz_pf:
for image in file_list:
image_path = os.path.join(doujinshi_dir, image)
cbz_pf.write(image_path, image)
if rm_origin_dir:
shutil.rmtree(doujinshi_dir, ignore_errors=True)
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')
elif file_type == 'pdf':
try:
import img2pdf
logger.log(16, f'PDF file has been written to "{doujinshi_dir}"')
"""Write images to a PDF file using img2pdf."""
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))]
file_list.sort()
except ImportError:
logger.error("Please install img2pdf package by using pip.")
logger.info(f'Writing PDF file to path: {filename}')
with open(filename, 'wb') as pdf_f:
full_path_list = (
[os.path.join(doujinshi_dir, image) for image in file_list]
)
pdf_f.write(img2pdf.convert(full_path_list, rotation=img2pdf.Rotation.ifvalid))
logger.log(16, f'PDF file has been written to "{filename}"')
except ImportError:
logger.error("Please install img2pdf package by using pip.")
def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
@ -235,7 +282,7 @@ def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
# maybe you can use `--format` to select a suitable filename
if not _truncate_only:
ban_chars = '\\\'/:,;*?"<>|\t'
ban_chars = '\\\'/:,;*?"<>|\t\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b'
filename = s.translate(str.maketrans(ban_chars, ' ' * len(ban_chars))).strip()
filename = ' '.join(filename.split())
@ -278,32 +325,27 @@ def paging(page_string):
return page_list
def generate_metadata_file(output_dir, table, doujinshi_obj=None):
logger.info('Writing Metadata Info')
def generate_metadata_file(output_dir, doujinshi_obj):
if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
else:
doujinshi_dir = '.'
info_txt_path = os.path.join(output_dir, doujinshi_obj.filename, 'info.txt')
logger.info(doujinshi_dir)
f = open(info_txt_path, 'w', encoding='utf-8')
f = open(os.path.join(doujinshi_dir, 'info.txt'), 'w', encoding='utf-8')
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'CIRCLE', 'SCANLATOR',
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'CHARACTERS', 'AUTHOR',
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'DATE', 'CHARACTERS', 'AUTHOR', 'GROUPS',
'LANGUAGE', 'TAGS', 'URL', 'PAGES']
for i in range(len(fields)):
f.write(f'{fields[i]}: ')
if fields[i] in special_fields:
f.write(str(table[special_fields.index(fields[i])][1]))
f.write(str(doujinshi_obj.table[special_fields.index(fields[i])][1]))
f.write('\n')
f.close()
logger.log(16, f'Metadata Info has been written to "{info_txt_path}"')
class DB(object):

View File

@ -139,7 +139,7 @@ function filter_searcher(){
break
}
}
if (verifier){doujinshi_id.push(data[i].Folder);}
if (verifier){doujinshi_id.push(data[i].Folder.replace("_", " "));}
}
var gallery = document.getElementsByClassName("gallery-favorite");
for (var i = 0; i < gallery.length; i++){

164
poetry.lock generated
View File

@ -1,10 +1,31 @@
# This file is automatically @generated by Poetry 1.4.0 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
[[package]]
name = "anyio"
version = "4.5.2"
description = "High level compatibility layer for multiple asynchronous event loop implementations"
optional = false
python-versions = ">=3.8"
files = [
{file = "anyio-4.5.2-py3-none-any.whl", hash = "sha256:c011ee36bc1e8ba40e5a81cb9df91925c218fe9b778554e0b56a21e1b5d4716f"},
{file = "anyio-4.5.2.tar.gz", hash = "sha256:23009af4ed04ce05991845451e11ef02fc7c5ed29179ac9a420e5ad0ac7ddc5b"},
]
[package.dependencies]
exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
idna = ">=2.8"
sniffio = ">=1.1"
typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
[package.extras]
doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"]
trio = ["trio (>=0.26.1)"]
[[package]]
name = "beautifulsoup4"
version = "4.11.2"
description = "Screen-scraping library"
category = "main"
optional = false
python-versions = ">=3.6.0"
files = [
@ -21,21 +42,19 @@ lxml = ["lxml"]
[[package]]
name = "certifi"
version = "2022.12.7"
version = "2024.7.4"
description = "Python package for providing Mozilla's CA Bundle."
category = "main"
optional = false
python-versions = ">=3.6"
files = [
{file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"},
{file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"},
{file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
{file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
]
[[package]]
name = "charset-normalizer"
version = "3.0.1"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
category = "main"
optional = false
python-versions = "*"
files = [
@ -129,23 +148,92 @@ files = [
{file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"},
]
[[package]]
name = "exceptiongroup"
version = "1.2.2"
description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
files = [
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
]
[package.extras]
test = ["pytest (>=6)"]
[[package]]
name = "h11"
version = "0.14.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
optional = false
python-versions = ">=3.7"
files = [
{file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
]
[[package]]
name = "httpcore"
version = "1.0.7"
description = "A minimal low-level HTTP client."
optional = false
python-versions = ">=3.8"
files = [
{file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
{file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
]
[package.dependencies]
certifi = "*"
h11 = ">=0.13,<0.15"
[package.extras]
asyncio = ["anyio (>=4.0,<5.0)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
trio = ["trio (>=0.22.0,<1.0)"]
[[package]]
name = "httpx"
version = "0.27.2"
description = "The next generation HTTP client."
optional = false
python-versions = ">=3.8"
files = [
{file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
{file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
]
[package.dependencies]
anyio = "*"
certifi = "*"
httpcore = "==1.*"
idna = "*"
sniffio = "*"
[package.extras]
brotli = ["brotli", "brotlicffi"]
cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (==1.*)"]
zstd = ["zstandard (>=0.18.0)"]
[[package]]
name = "idna"
version = "3.4"
version = "3.7"
description = "Internationalized Domain Names in Applications (IDNA)"
category = "main"
optional = false
python-versions = ">=3.5"
files = [
{file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
{file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
{file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
{file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
]
[[package]]
name = "iso8601"
version = "1.1.0"
description = "Simple module to parse ISO 8601 dates"
category = "main"
optional = false
python-versions = ">=3.6.2,<4.0"
files = [
@ -155,31 +243,40 @@ files = [
[[package]]
name = "requests"
version = "2.28.2"
version = "2.32.0"
description = "Python HTTP for Humans."
category = "main"
optional = false
python-versions = ">=3.7, <4"
python-versions = ">=3.8"
files = [
{file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"},
{file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"},
{file = "requests-2.32.0-py3-none-any.whl", hash = "sha256:f2c3881dddb70d056c5bd7600a4fae312b2a300e39be6a118d30b90bd27262b5"},
{file = "requests-2.32.0.tar.gz", hash = "sha256:fa5490319474c82ef1d2c9bc459d3652e3ae4ef4c4ebdd18a21145a47ca4b6b8"},
]
[package.dependencies]
certifi = ">=2017.4.17"
charset-normalizer = ">=2,<4"
idna = ">=2.5,<4"
urllib3 = ">=1.21.1,<1.27"
urllib3 = ">=1.21.1,<3"
[package.extras]
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "sniffio"
version = "1.3.1"
description = "Sniff out which async library your code is running under"
optional = false
python-versions = ">=3.7"
files = [
{file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
{file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
]
[[package]]
name = "soupsieve"
version = "2.4"
description = "A modern CSS selector implementation for Beautiful Soup."
category = "main"
optional = false
python-versions = ">=3.7"
files = [
@ -191,7 +288,6 @@ files = [
name = "tabulate"
version = "0.9.0"
description = "Pretty-print tabular data"
category = "main"
optional = false
python-versions = ">=3.7"
files = [
@ -203,23 +299,33 @@ files = [
widechars = ["wcwidth"]
[[package]]
name = "urllib3"
version = "1.26.14"
description = "HTTP library with thread-safe connection pooling, file post, and more."
category = "main"
name = "typing-extensions"
version = "4.12.2"
description = "Backported and Experimental Type Hints for Python 3.8+"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
python-versions = ">=3.8"
files = [
{file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"},
{file = "urllib3-1.26.14.tar.gz", hash = "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72"},
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
]
[[package]]
name = "urllib3"
version = "1.26.19"
description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
files = [
{file = "urllib3-1.26.19-py2.py3-none-any.whl", hash = "sha256:37a0344459b199fce0e80b0d3569837ec6b6937435c5244e7fd73fa6006830f3"},
{file = "urllib3-1.26.19.tar.gz", hash = "sha256:3e3d753a8618b86d7de333b4223005f68720bcd6a7d2bcb9fbd2229ec7c1e429"},
]
[package.extras]
brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.8"
content-hash = "0a1d5abd47a669c7a1f2dc7b43824a449e29ba94908a4338d2ea0f2dfb4f805e"
content-hash = "a69dbf5dcfd6dcc5afc0fd2de4ab153841f7d210d4be60c426e332e36a79d679"

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "nhentai"
version = "0.5.2"
version = "0.5.15"
description = "nhentai doujinshi downloader"
authors = ["Ricter Z <ricterzheng@gmail.com>"]
license = "MIT"
@ -14,6 +14,7 @@ beautifulsoup4 = "^4.11.2"
tabulate = "^0.9.0"
iso8601 = "^1.1.0"
urllib3 = "^1.26.14"
httpx = "0.27.2"
[build-system]

View File

@ -1,5 +1,7 @@
httpx==0.27.2
requests
soupsieve
setuptools
BeautifulSoup4
tabulate
iso8601

View File

@ -1,3 +1,3 @@
[metadata]
description-file = README.rst
description_file = README.rst

View File

@ -20,7 +20,7 @@ class TestDownload(unittest.TestCase):
def test_download(self):
did = 440546
info = Doujinshi(**doujinshi_parser(did), name_format='%i')
info.downloader = Downloader(path='/tmp', size=5)
info.downloader = Downloader(path='/tmp', threads=5)
info.download()
self.assertTrue(os.path.exists(f'/tmp/{did}/001.jpg'))