Compare commits

..

128 Commits
0.4.3 ... 0.5.0

Author SHA1 Message Date
927d5b1b39 update requirements 2023-02-05 23:45:33 +08:00
a8566482aa change log color and update images 2023-02-05 23:44:15 +08:00
8c900a833d update README 2023-02-05 23:25:41 +08:00
466fa4c094 rename some constants 2023-02-05 23:17:23 +08:00
2adf8ccc9d reformat files #266 2023-02-05 23:13:47 +08:00
06fdf0dade reformat files #266 2023-02-05 22:44:37 +08:00
a609243794 change logger 2023-02-05 07:07:19 +08:00
e89c2c0860 fix bug #265 2023-02-05 07:02:45 +08:00
e08b0659e5 improve #265 2023-02-05 06:55:03 +08:00
221ff6b32c 0.4.18 bugs fix 2023-02-04 20:24:53 +08:00
bc6ef0cf5d solve #251 2023-02-04 20:22:57 +08:00
c8c63cbc11 add usage images 2023-02-04 20:09:51 +08:00
a63856d076 update usage 2023-02-04 20:09:46 +08:00
aa4986189f resolve issue #264 2023-02-04 19:55:51 +08:00
0fb81599dc resolve #265 2023-02-04 19:47:24 +08:00
e9f9651d07 change the default sort method 2023-02-04 19:38:29 +08:00
1860b5f0cf resoved issue #249 2022-05-03 16:54:38 +08:00
eff4f3bf9b remove debug print 2022-05-03 16:51:49 +08:00
501840172e change sorting from recent to date 2022-05-03 16:49:26 +08:00
e5ed6d098a update README 2022-05-02 18:53:40 +08:00
98606202fb remove some unused images 2022-05-02 18:49:34 +08:00
5a3f1009c9 update README for issue #237 2022-05-02 18:48:02 +08:00
61945a6e97 fix for issue #236 2022-05-02 17:01:30 +08:00
443fcdc7da fix for issue #232 2022-05-02 16:53:23 +08:00
31b95fe2dd 0.4.17 releases, for #246 2022-05-02 16:24:04 +08:00
be8c97f8d4 Merge pull request #247 from krrr/master 2022-05-02 13:21:53 +08:00
348e51676e Update README.rst 2022-05-02 12:13:19 +08:00
ea356a1ca2 Merge pull request #244 from krrr/master 2022-04-30 13:47:57 +08:00
5a4dfb8a76 Add new option to avoid cloudflare captcha 2022-04-30 11:22:41 +08:00
4b15744ceb Merge pull request #235 from TravisDavis-ops/nixpkg 2021-12-24 03:27:07 +08:00
b05fa16286 Update README.rst 2021-12-23 12:43:20 -06:00
0879486881 Merge pull request #228 from culturecloud/master 2021-08-23 20:27:38 +08:00
c66ba730d3 Fix UnicodeEncodeError 2021-07-28 18:43:45 +06:00
606c5e0ffd Merge pull request #226 from nanaih/minimal_viewer 2021-06-23 18:14:47 +08:00
ba04f81a6f add minimal viewer, fix not using config's template on --html only option 2021-06-22 23:17:03 -04:00
6519e6f221 Merge pull request #224 from RicterZ/pull/221
Pull/221
2021-06-07 17:21:00 +08:00
7594625d72 fix format 2021-06-07 17:17:54 +08:00
4948c8f0c5 update README 2021-06-07 16:50:03 +08:00
e22a99fa8c Merge branch 'master' of github.com:RicterZ/nhentai 2021-06-07 16:48:36 +08:00
19a1d5c404 fix #220 add pretty name of doujinshi format 2021-06-07 16:47:54 +08:00
ad1e876611 Merge pull request #221 from SomeRandomDude870/master
HDoujin-format Metadata file
2021-06-07 16:02:43 +08:00
1de7e1f998 Merge branch 'pull/221' into master 2021-06-07 16:01:54 +08:00
b97e707817 HDoujin-format Metadata file 2021-06-05 17:13:18 +02:00
6ef2189bfe Merge pull request #214 from lleene/master
Add dryrun option to command line interface
2021-06-03 08:00:18 +08:00
24be2d37d4 0.4.16 2021-06-02 23:22:23 +08:00
d9d2a6fb91 fix bug of proxy while downloading doujinshi 2021-06-02 23:20:56 +08:00
bd38294bb7 undo whitespace edits 2021-05-16 19:49:26 +02:00
2cf4e6718e Add the option to perform a dry-run and only download meta-data / generate file structure 2021-05-16 19:44:01 +02:00
8cd4b948e7 0.4.15 2021-05-08 15:36:49 +08:00
f884384eb3 fix bug 2021-05-08 15:36:36 +08:00
87afab46c4 Merge pull request #211 from jwfiredragon/master 2021-04-25 09:56:49 +08:00
c7b1d7e6a8 Fix broken constant import 2021-04-24 16:39:54 -07:00
ad02371158 Update constant.py 2021-04-21 15:37:13 +08:00
7c9d55e0ee Merge pull request #208 from karamori77/master
Changed write_comic_info from False to True
2021-04-21 15:30:51 +08:00
00aad774ae Fixed potential re-download
Moved forward save-history check 1 indent so it works with download by id too
Mapped all ids to int since there are cases where its a string in the API
2021-04-20 11:04:52 +08:00
373086b459 Update serializer.py
changed Language to LanguageISO for ComicInfo.xml
Language will be displayed by the LanguageISO code, it also forgoes rare language tags like rewrite and speechless
2021-04-18 21:45:15 +08:00
3a83f99771 Update constant.py 2021-04-18 21:40:47 +08:00
00627ab36a Update utils.py 2021-04-03 23:11:33 +08:00
592e163891 Update requirements.txt 2021-03-26 22:25:49 +08:00
84523475b0 Merge pull request #206 from Un1Gfn/patch-1 2021-03-25 19:01:39 +08:00
5f5461c902 Instuctions on getting csrftoken & sessionid 2021-03-25 18:57:20 +08:00
05e6ceb3cd Merge pull request #205 from Nontre12/master 2021-03-25 09:22:13 +08:00
db59426503 FIX: Use of img2lib even if it is not installed 2021-03-24 21:49:45 +01:00
74197f8f90 0.4.14 released for fix issue #204 2021-02-11 15:42:53 +08:00
6d91a39533 Merge pull request #203 from jwfiredragon/master
Switching 'logger.warn' to 'logger.warning'
2021-02-11 15:41:15 +08:00
e181e0b9dd Switching 'logger.warn' to 'logger.warning' 2021-02-10 22:45:22 -08:00
6fed1f94cb 0.4.13 2021-01-18 16:26:39 +08:00
9cfb23c8ec Merge pull request #201 from mobrine1/patch-1
Fix #200
2021-01-18 16:25:42 +08:00
fc347cdadf Fix #200 2021-01-17 15:02:43 -05:00
1cdebaab61 Merge pull request #199 from RicterZ/dev
0.4.12
2021-01-17 12:16:56 +08:00
9513141ccf 0.4.12 2021-01-17 11:51:22 +08:00
bdc9fa113e fix #197 set proxy to null 2021-01-17 11:50:22 +08:00
36946111db fix #198 add notice 2021-01-17 11:42:06 +08:00
ce8ae54536 Merge pull request #195 from RicterZ/dev
0.4.11
2021-01-11 11:19:58 +08:00
7aedb905d6 Merge pull request #194 from RicterZ/dev
0.4.11
2021-01-11 11:16:09 +08:00
8b8b5f193e 0.4.11 2021-01-11 11:15:21 +08:00
fc99d91ac1 fix #193 2021-01-11 11:14:35 +08:00
ba141efba7 remove repeated spaces 2021-01-11 11:04:29 +08:00
f78d8750f3 remove __future__ 2021-01-11 11:03:45 +08:00
08bb8ffda4 Merge pull request #192 from RicterZ/dev
Dev
2021-01-10 14:41:02 +08:00
af379c825c Merge branch 'master' into dev 2021-01-10 14:40:09 +08:00
2f9386f22c fix #188 2021-01-10 11:44:04 +08:00
3667bc34b7 0.4.10 2021-01-10 11:41:38 +08:00
84749c56bd fix #191 2021-01-10 11:40:46 +08:00
24f79e0945 Merge pull request #190 from RicterZ/dev
fix bugs
2021-01-07 20:42:26 +08:00
edc46a9531 Merge pull request #189 from mobrine1/mobrine1-patch-1
Fixing loop when id not found, issue #188
2021-01-07 20:39:44 +08:00
72035a14e6 Fixing loop when id not found, issue #188 2021-01-07 07:32:29 -05:00
472528e464 Merge pull request #187 from atsushi-hirako/patch-1
fix issue #186
2021-01-02 02:16:50 +08:00
3f5915fd2a fix issue #186
change to blacklist approach (allow 2-bytes character)
2021-01-01 20:11:09 +09:00
0cd2576dab 0.4.9 2020-12-02 07:45:31 +08:00
445a8c052e Merge pull request #180 from RicterZ/dev
0.4.8
2020-12-01 21:01:00 +08:00
7a75afef0a 0.4.8 2020-12-01 20:58:28 +08:00
a5813e19b1 fix bug on first start 2020-12-01 20:56:27 +08:00
8462d2f2aa use dict.update to update config values 2020-11-26 17:52:10 +08:00
51074ee948 support multi viewers 2020-11-26 17:22:23 +08:00
9c7354be32 0.4.6 2020-11-07 12:04:42 +08:00
7f48b3edd1 Merge pull request #175 from RicterZ/dev
add default value of output dir
2020-10-15 02:10:06 +08:00
d84b827241 add default value of output dir 2020-10-15 02:09:09 +08:00
4ac161a38c Merge pull request #174 from Nontre12/fix-gen-main
Fix change directory output_dir option on gen-main
2020-10-15 01:47:51 +08:00
648b6f87bf Added logo.png to the installation 2020-10-14 12:09:39 +02:00
2ec1283ba8 Fix change directory output_dir option on gen-main 2020-10-14 12:02:57 +02:00
a9bd46b426 Merge pull request #173 from Nontre12/db-ignored
Fix db ignored
2020-10-14 02:44:03 +08:00
c52bc271fc Fix db ignored 2020-10-13 13:39:24 +02:00
f2d22f8e7d Merge pull request #169 from Nontre12/master
Fix running without parameters
2020-10-11 03:48:39 +08:00
ea6089ff31 Fix 2020-10-10 21:15:20 +02:00
670d14c3f3 Merge pull request #4 from RicterZ/master
Update master branch
2020-10-10 20:50:01 +02:00
b46106a5bc Merge pull request #167 from RicterZ/0.4.5
0.4.5
2020-10-11 02:00:02 +08:00
f04359e486 0.4.5 2020-10-11 01:57:37 +08:00
6861cbcbc1 Merge pull request #166 from RicterZ/dev
0.4.4
2020-10-11 01:45:53 +08:00
e0938c5a0e Merge pull request #165 from RicterZ/dev
0.4.4
2020-10-11 01:43:41 +08:00
641f8e4c51 0.4.4 2020-10-11 01:42:02 +08:00
b2fae226f9 use config.json 2020-10-11 01:38:08 +08:00
4aa34c668a Merge pull request #3 from RicterZ/master
Update master branch from origin
2020-10-10 19:11:56 +02:00
f157ac3246 merge to functions 2020-10-11 01:09:13 +08:00
139e01d3ca Merge pull request #163 from Nontre12/dev-page-range
Added --page-all option to download all search results
2020-10-11 00:58:57 +08:00
4d870e36a1 Merge branch 'master' into dev-page-range 2020-10-11 00:53:27 +08:00
74b0df26a9 Merge pull request #164 from RicterZ/fix-page-range
fix page range issue #158
2020-10-11 00:51:58 +08:00
1746e731ec fix page range issue #158 2020-10-11 00:48:36 +08:00
8ad60d9838 Merge pull request #1 from RicterZ/master
Merge pull request #162 from Nontre12/master
2020-10-10 18:31:47 +02:00
be05b9c0eb Added --page-all option to download all search results 2020-10-10 18:29:00 +02:00
9054b98934 Merge pull request #162 from Nontre12/master
Added 'Parodies' output and Updated package version
2020-10-11 00:10:27 +08:00
b82201ff27 Added to -S --show option the "Parodies" output 2020-10-10 12:33:14 +02:00
532c74e075 Update __version__ 2020-10-10 12:31:54 +02:00
5a50a5b1ba Merge pull request #159 from Nontre12/dev
Added --clean-language option
2020-10-10 04:56:51 +08:00
b5fe48746e Added --clean-language option 2020-10-09 17:34:03 +02:00
94d8da655a Fix misspelling 2020-10-09 17:30:11 +02:00
6ff2816d95 Merge pull request #157 from RicterZ/dev
0.4.3
2020-10-02 01:59:50 +08:00
0a94ef9cf1 Merge pull request #156 from RicterZ/dev
0.4.2
2020-10-02 01:56:04 +08:00
26 changed files with 797 additions and 455 deletions

1
.gitignore vendored
View File

@ -7,3 +7,4 @@ dist/
.DS_Store .DS_Store
output/ output/
venv/ venv/
.vscode/

View File

@ -1,8 +1,5 @@
include README.md include README.md
include requirements.txt include requirements.txt
include nhentai/viewer/index.html include nhentai/viewer/*
include nhentai/viewer/styles.css include nhentai/viewer/default/*
include nhentai/viewer/scripts.js include nhentai/viewer/minimal/*
include nhentai/viewer/main.html
include nhentai/viewer/main.css
include nhentai/viewer/main.js

View File

@ -51,10 +51,17 @@ Installation (Gentoo)
layman -fa glicOne layman -fa glicOne
sudo emerge net-misc/nhentai sudo emerge net-misc/nhentai
=====================
Installation (NixOs)
=====================
.. code-block::
nix-env -iA nixos.nhentai
===== =====
Usage Usage
===== =====
**IMPORTANT**: To bypass the nhentai frequency limit, you should use `--cookie` option to store your cookie. **⚠️IMPORTANT⚠️**: To bypass the nhentai frequency limit, you should use `--cookie` and `--useragent` options to store your cookie and your user-agent.
*The default download folder will be the path where you run the command (CLI path).* *The default download folder will be the path where you run the command (CLI path).*
@ -63,15 +70,32 @@ Set your nhentai cookie against captcha:
.. code-block:: bash .. code-block:: bash
nhentai --useragent "USER AGENT of YOUR BROWSER"
nhentai --cookie "YOUR COOKIE FROM nhentai.net" nhentai --cookie "YOUR COOKIE FROM nhentai.net"
**NOTE**: The format of the cookie is `"csrftoken=TOKEN; sessionid=ID"` **NOTE**
- The format of the cookie is `"csrftoken=TOKEN; sessionid=ID; cf_clearance=CLOUDFLARE"`
- `cf_clearance` cookie and useragent must be set if you encounter "blocked by cloudflare captcha" error. Make sure you use the same IP and useragent as when you got it
| To get csrftoken and sessionid, first login to your nhentai account in web browser, then:
| (Chrome) |ve| |ld| More tools |ld| Developer tools |ld| Application |ld| Storage |ld| Cookies |ld| https://nhentai.net
| (Firefox) |hv| |ld| Web Developer |ld| Web Developer Tools |ld| Storage |ld| Cookies |ld| https://nhentai.net
|
.. |hv| unicode:: U+2630 .. https://www.compart.com/en/unicode/U+2630
.. |ve| unicode:: U+22EE .. https://www.compart.com/en/unicode/U+22EE
.. |ld| unicode:: U+2014 .. https://www.compart.com/en/unicode/U+2014
.. image:: ./images/usage.png?raw=true
:alt: nhentai
:align: center
Download specified doujinshi: Download specified doujinshi:
.. code-block:: bash .. code-block:: bash
nhentai --id=123855,123866 nhentai --id 123855 123866 123877
Download doujinshi with ids specified in a file (doujinshi ids split by line): Download doujinshi with ids specified in a file (doujinshi ids split by line):
@ -112,30 +136,41 @@ Supported doujinshi folder formatter:
- %t: Doujinshi name - %t: Doujinshi name
- %s: Doujinshi subtitle (translated name) - %s: Doujinshi subtitle (translated name)
- %a: Doujinshi authors' name - %a: Doujinshi authors' name
- %p: Doujinshi pretty name
Other options: Other options:
.. code-block:: .. code-block::
Usage:
nhentai --search [keyword] --download
NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]
nhentai --file [filename]
Environment Variable:
NHENTAI nhentai mirror url
Options: Options:
# Operation options # Operation options, control the program behaviors
-h, --help show this help message and exit -h, --help show this help message and exit
-D, --download download doujinshi (for search results) -D, --download download doujinshi (for search results)
-S, --show just show the doujinshi information -S, --show just show the doujinshi information
# Doujinshi options # Doujinshi options, specify id, keyword, etc.
--id=ID doujinshi ids set, e.g. 1,2,3 --id doujinshi ids set, e.g. 167680 167681 167682
-s KEYWORD, --search=KEYWORD -s KEYWORD, --search=KEYWORD
search doujinshi by keyword search doujinshi by keyword
--tag=TAG download doujinshi by tag -F, --favorites list or download your favorites
-F, --favorites list or download your favorites.
# Multi-page options # Page options, control the page to fetch / download
--page=PAGE page number of search results --page-all all search results
--max-page=MAX_PAGE The max page when recursive download tagged doujinshi --page=PAGE, --page-range=PAGE
page number of search results. e.g. 1,2-5,14
--sorting=SORTING sorting of doujinshi (recent / popular /
popular-[today|week])
# Download options # Download options, the output directory, threads, timeout, delay, etc.
-o OUTPUT_DIR, --output=OUTPUT_DIR -o OUTPUT_DIR, --output=OUTPUT_DIR
output dir output dir
-t THREADS, --threads=THREADS -t THREADS, --threads=THREADS
@ -144,23 +179,37 @@ Other options:
timeout for downloading doujinshi timeout for downloading doujinshi
-d DELAY, --delay=DELAY -d DELAY, --delay=DELAY
slow down between downloading every doujinshi slow down between downloading every doujinshi
-p PROXY, --proxy=PROXY --proxy=PROXY store a proxy, for example: -p "http://127.0.0.1:1080"
uses a proxy, for example: http://127.0.0.1:1080
-f FILE, --file=FILE read gallery IDs from file. -f FILE, --file=FILE read gallery IDs from file.
--format=NAME_FORMAT format the saved folder name --format=NAME_FORMAT format the saved folder name
--dry-run Dry run, skip file download
# Generating options # Generate options, for generate html viewer, cbz file, pdf file, etc
--html generate a html viewer at current directory --html generate a html viewer at current directory
--no-html don't generate HTML after downloading --no-html don't generate HTML after downloading
--gen-main generate a main viewer contain all the doujin in the folder --gen-main generate a main viewer contain all the doujin in the
folder
-C, --cbz generate Comic Book CBZ File -C, --cbz generate Comic Book CBZ File
-P --pdf generate PDF file -P, --pdf generate PDF file
--rm-origin-dir remove downloaded doujinshi dir when generated CBZ --rm-origin-dir remove downloaded doujinshi dir when generated CBZ or
or PDF file. PDF file
--meta generate a metadata file in doujinshi format
# nHentai options --regenerate-cbz regenerate the cbz file if exists
--cookie=COOKIE set cookie of nhentai to bypass Google recaptcha
# nhentai options, set cookie, user-agent, language, remove caches, histories, etc
--cookie=COOKIE set cookie of nhentai to bypass Cloudflare captcha
--useragent=USERAGENT, --user-agent=USERAGENT
set useragent to bypass Cloudflare captcha
--language=LANGUAGE set default language to parse doujinshis
--clean-language set DEFAULT as language to parse doujinshis
--save-download-history
save downloaded doujinshis, whose will be skipped if
you re-download them
--clean-download-history
clean download history
--template=VIEWER_TEMPLATE
set viewer template
--legacy use legacy searching method
============== ==============
nHentai Mirror nHentai Mirror
@ -177,7 +226,7 @@ Set `NHENTAI` env var to your nhentai mirror.
.. code-block:: bash .. code-block:: bash
NHENTAI=http://h.loli.club nhentai --id 123456 NHENTAI=https://h.loli.club nhentai --id 123456
.. image:: ./images/search.png?raw=true .. image:: ./images/search.png?raw=true
@ -190,14 +239,6 @@ Set `NHENTAI` env var to your nhentai mirror.
:alt: nhentai :alt: nhentai
:align: center :align: center
============
あなたも変態
============
.. image:: ./images/image.jpg?raw=true
:alt: nhentai
:align: center
.. |travis| image:: https://travis-ci.org/RicterZ/nhentai.svg?branch=master .. |travis| image:: https://travis-ci.org/RicterZ/nhentai.svg?branch=master
:target: https://travis-ci.org/RicterZ/nhentai :target: https://travis-ci.org/RicterZ/nhentai

Binary file not shown.

Before

Width:  |  Height:  |  Size: 189 KiB

After

Width:  |  Height:  |  Size: 1.0 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 173 KiB

After

Width:  |  Height:  |  Size: 991 KiB

BIN
images/usage.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 679 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 311 KiB

After

Width:  |  Height:  |  Size: 1.9 MiB

View File

@ -1,3 +1,3 @@
__version__ = '0.4.2' __version__ = '0.5.0'
__author__ = 'RicterZ' __author__ = 'RicterZ'
__email__ = 'ricterzheng@gmail.com' __email__ = 'ricterzheng@gmail.com'

View File

@ -1,8 +1,10 @@
# coding: utf-8 # coding: utf-8
from __future__ import print_function
import os import os
import sys import sys
import json
from optparse import OptionParser from optparse import OptionParser
try: try:
from itertools import ifilter as filter from itertools import ifilter as filter
except ImportError: except ImportError:
@ -13,31 +15,52 @@ from nhentai import __version__
from nhentai.utils import urlparse, generate_html, generate_main_html, DB from nhentai.utils import urlparse, generate_html, generate_main_html, DB
from nhentai.logger import logger from nhentai.logger import logger
try:
if sys.version_info < (3, 0, 0):
import codecs
import locale
sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout)
sys.stderr = codecs.getwriter(locale.getpreferredencoding())(sys.stderr)
except NameError:
# python3
pass
def banner(): def banner():
logger.info(u'''nHentai ver %s: あなたも変態。 いいね? logger.debug(f'nHentai ver {__version__}: あなたも変態。 いいね?')
_ _ _ _
_ __ | | | | ___ _ __ | |_ __ _(_)
| '_ \| |_| |/ _ \ '_ \| __/ _` | | def load_config():
| | | | _ | __/ | | | || (_| | | if not os.path.exists(constant.NHENTAI_CONFIG_FILE):
|_| |_|_| |_|\___|_| |_|\__\__,_|_| return
''' % __version__)
try:
with open(constant.NHENTAI_CONFIG_FILE, 'r') as f:
constant.CONFIG.update(json.load(f))
except json.JSONDecodeError:
logger.error('Failed to load config file.')
write_config()
def write_config():
if not os.path.exists(constant.NHENTAI_HOME):
os.mkdir(constant.NHENTAI_HOME)
with open(constant.NHENTAI_CONFIG_FILE, 'w') as f:
f.write(json.dumps(constant.CONFIG))
def callback(option, opt_str, value, parser):
if option == '--id':
pass
value = []
for arg in parser.rargs:
if arg.isdigit():
value.append(int(arg))
elif arg.startswith('-'):
break
else:
logger.warning(f'Ignore invalid id {arg}')
setattr(parser.values, option.dest, value)
def cmd_parser(): def cmd_parser():
load_config()
parser = OptionParser('\n nhentai --search [keyword] --download' parser = OptionParser('\n nhentai --search [keyword] --download'
'\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' '\n NHENTAI=https://nhentai-mirror-url/ nhentai --id [ID ...]'
'\n nhentai --file [filename]' '\n nhentai --file [filename]'
'\n\nEnvironment Variable:\n' '\n\nEnvironment Variable:\n'
' NHENTAI nhentai mirror url') ' NHENTAI nhentai mirror url')
@ -47,23 +70,24 @@ def cmd_parser():
parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information') parser.add_option('--show', '-S', dest='is_show', action='store_true', help='just show the doujinshi information')
# doujinshi options # doujinshi options
parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--id', dest='id', action='callback', callback=callback,
help='doujinshi ids set, e.g. 167680 167681 167682')
parser.add_option('--search', '-s', type='string', dest='keyword', action='store', parser.add_option('--search', '-s', type='string', dest='keyword', action='store',
help='search doujinshi by keyword') help='search doujinshi by keyword')
parser.add_option('--favorites', '-F', action='store_true', dest='favorites', parser.add_option('--favorites', '-F', action='store_true', dest='favorites',
help='list or download your favorites.') help='list or download your favorites')
# page options # page options
parser.add_option('--page', type='int', dest='page', action='store', default=1, parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
help='page number of search results') help='all search results')
parser.add_option('--page-range', type='string', dest='page_range', action='store', parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='',
help='page range of favorites. e.g. 1,2-5,14') help='page number of search results. e.g. 1,2-5,14')
parser.add_option('--sorting', dest='sorting', action='store', default='recent', parser.add_option('--sorting', dest='sorting', action='store', default='popular',
help='sorting of doujinshi (recent / popular / popular-[today|week])', help='sorting of doujinshi (recent / popular / popular-[today|week])',
choices=['recent', 'popular', 'popular-today', 'popular-week']) choices=['recent', 'popular', 'popular-today', 'popular-week', 'date'])
# download options # download options
parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='', parser.add_option('--output', '-o', type='string', dest='output_dir', action='store', default='./',
help='output dir') help='output dir')
parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
help='thread count for downloading doujinshi') help='thread count for downloading doujinshi')
@ -71,11 +95,12 @@ def cmd_parser():
help='timeout for downloading doujinshi') help='timeout for downloading doujinshi')
parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0, parser.add_option('--delay', '-d', type='int', dest='delay', action='store', default=0,
help='slow down between downloading every doujinshi') help='slow down between downloading every doujinshi')
parser.add_option('--proxy', '-p', type='string', dest='proxy', action='store', default='', parser.add_option('--proxy', type='string', dest='proxy', action='store',
help='store a proxy, for example: -p \'http://127.0.0.1:1080\'') help='store a proxy, for example: -p "http://127.0.0.1:1080"')
parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.') parser.add_option('--file', '-f', type='string', dest='file', action='store', help='read gallery IDs from file.')
parser.add_option('--format', type='string', dest='name_format', action='store', parser.add_option('--format', type='string', dest='name_format', action='store',
help='format the saved folder name', default='[%i][%a][%t]') help='format the saved folder name', default='[%i][%a][%t]')
parser.add_option('--dry-run', action='store_true', dest='dryrun', help='Dry run, skip file download')
# generate options # generate options
parser.add_option('--html', dest='html_viewer', action='store_true', parser.add_option('--html', dest='html_viewer', action='store_true',
@ -89,30 +114,34 @@ def cmd_parser():
parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true', parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true',
help='generate PDF file') help='generate PDF file')
parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False,
help='remove downloaded doujinshi dir when generated CBZ or PDF file.') help='remove downloaded doujinshi dir when generated CBZ or PDF file')
parser.add_option('--meta', dest='generate_metadata', action='store_true',
help='generate a metadata file in doujinshi format')
parser.add_option('--regenerate-cbz', dest='regenerate_cbz', action='store_true', default=False,
help='regenerate the cbz file if exists')
# nhentai options # nhentai options
parser.add_option('--cookie', type='str', dest='cookie', action='store', parser.add_option('--cookie', type='str', dest='cookie', action='store',
help='set cookie of nhentai to bypass Google recaptcha') help='set cookie of nhentai to bypass Cloudflare captcha')
parser.add_option('--useragent', '--user-agent', type='str', dest='useragent', action='store',
help='set useragent to bypass Cloudflare captcha')
parser.add_option('--language', type='str', dest='language', action='store', parser.add_option('--language', type='str', dest='language', action='store',
help='set default language to parse doujinshis') help='set default language to parse doujinshis')
parser.add_option('--clean-language', dest='clean_language', action='store_true', default=False,
help='set DEFAULT as language to parse doujinshis')
parser.add_option('--save-download-history', dest='is_save_download_history', action='store_true', parser.add_option('--save-download-history', dest='is_save_download_history', action='store_true',
default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them') default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them')
parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history', parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history',
help='clean download history') help='clean download history')
parser.add_option('--template', dest='viewer_template', action='store',
try: help='set viewer template', default='')
sys.argv = [unicode(i.decode(sys.stdin.encoding)) for i in sys.argv] parser.add_option('--legacy', dest='legacy', action='store_true', default=False,
print() help='use legacy searching method')
except (NameError, TypeError):
pass
except UnicodeDecodeError:
exit(0)
args, _ = parser.parse_args(sys.argv[1:]) args, _ = parser.parse_args(sys.argv[1:])
if args.html_viewer: if args.html_viewer:
generate_html() generate_html(template=constant.CONFIG['template'])
exit(0) exit(0)
if args.main_viewer and not args.id and not args.keyword and not args.favorites: if args.main_viewer and not args.id and not args.keyword and not args.favorites:
@ -126,76 +155,58 @@ def cmd_parser():
logger.info('Download history cleaned.') logger.info('Download history cleaned.')
exit(0) exit(0)
if os.path.exists(constant.NHENTAI_COOKIE): # --- set config ---
with open(constant.NHENTAI_COOKIE, 'r') as f: if args.cookie is not None:
constant.COOKIE = f.read() constant.CONFIG['cookie'] = args.cookie
write_config()
if args.cookie:
try:
if not os.path.exists(constant.NHENTAI_HOME):
os.mkdir(constant.NHENTAI_HOME)
with open(constant.NHENTAI_COOKIE, 'w') as f:
f.write(args.cookie)
except Exception as e:
logger.error('Cannot create NHENTAI_HOME: {}'.format(str(e)))
exit(1)
logger.info('Cookie saved.') logger.info('Cookie saved.')
exit(0) exit(0)
elif args.useragent is not None:
if os.path.exists(constant.NHENTAI_LANGUAGE) and not args.language: constant.CONFIG['useragent'] = args.useragent
with open(constant.NHENTAI_LANGUAGE, 'r') as f: write_config()
constant.LANGUAGE = f.read() logger.info('User-Agent saved.')
args.language = f.read()
if args.language:
try:
if not os.path.exists(constant.NHENTAI_HOME):
os.mkdir(constant.NHENTAI_HOME)
with open(constant.NHENTAI_LANGUAGE, 'w') as f:
f.write(args.language)
except Exception as e:
logger.error('Cannot create NHENTAI_HOME: {}'.format(str(e)))
exit(1)
logger.info('Default language now is {}.'.format(args.language))
exit(0) exit(0)
elif args.language is not None:
constant.CONFIG['language'] = args.language
write_config()
logger.info(f'Default language now set to "{args.language}"')
exit(0)
# TODO: search without language
if os.path.exists(constant.NHENTAI_PROXY): if args.proxy is not None:
with open(constant.NHENTAI_PROXY, 'r') as f:
link = f.read()
constant.PROXY = {'http': link, 'https': link}
if args.proxy:
try:
if not os.path.exists(constant.NHENTAI_HOME):
os.mkdir(constant.NHENTAI_HOME)
proxy_url = urlparse(args.proxy) proxy_url = urlparse(args.proxy)
if proxy_url.scheme not in ('http', 'https'): if not args.proxy == '' and proxy_url.scheme not in ('http', 'https', 'socks5', 'socks5h',
logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(proxy_url.scheme)) 'socks4', 'socks4a'):
else: logger.error(f'Invalid protocol "{proxy_url.scheme}" of proxy, ignored')
with open(constant.NHENTAI_PROXY, 'w') as f:
f.write(args.proxy)
except Exception as e:
logger.error('Cannot create NHENTAI_HOME: {}'.format(str(e)))
exit(1)
logger.info('Proxy \'{0}\' saved.'.format(args.proxy))
exit(0) exit(0)
else:
constant.CONFIG['proxy'] = {
'http': args.proxy,
'https': args.proxy,
}
logger.info(f'Proxy now set to "{args.proxy}"')
write_config()
exit(0)
if args.viewer_template is not None:
if not args.viewer_template:
args.viewer_template = 'default'
if not os.path.exists(os.path.join(os.path.dirname(__file__),
f'viewer/{args.viewer_template}/index.html')):
logger.error(f'Template "{args.viewer_template}" does not exists')
exit(1)
else:
constant.CONFIG['template'] = args.viewer_template
write_config()
# --- end set config ---
if args.favorites: if args.favorites:
if not constant.COOKIE: if not constant.CONFIG['cookie']:
logger.warning('Cookie has not been set, please use `nhentai --cookie \'COOKIE\'` to set it.') logger.warning('Cookie has not been set, please use `nhentai --cookie \'COOKIE\'` to set it.')
exit(1) exit(1)
if args.id:
_ = [i.strip() for i in args.id.split(',')]
args.id = set(int(i) for i in _ if i.isdigit())
if args.file: if args.file:
with open(args.file, 'r') as f: with open(args.file, 'r') as f:
_ = [i.strip() for i in f.readlines()] _ = [i.strip() for i in f.readlines()]
@ -217,4 +228,8 @@ def cmd_parser():
logger.critical('Maximum number of used threads is 15') logger.critical('Maximum number of used threads is 15')
exit(1) exit(1)
if args.dryrun and (args.is_cbz or args.is_pdf):
logger.critical('Cannot generate PDF or CBZ during dry-run')
exit(1)
return args return args

View File

@ -1,28 +1,38 @@
#!/usr/bin/env python2.7
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals, print_function import sys
import signal import signal
import platform import platform
import time import urllib3.exceptions
from nhentai import constant
from nhentai.cmdline import cmd_parser, banner from nhentai.cmdline import cmd_parser, banner
from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, favorites_parser from nhentai.parser import doujinshi_parser, search_parser, legacy_search_parser, print_doujinshi, favorites_parser
from nhentai.doujinshi import Doujinshi from nhentai.doujinshi import Doujinshi
from nhentai.downloader import Downloader from nhentai.downloader import Downloader
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.constant import BASE_URL from nhentai.constant import BASE_URL
from nhentai.utils import generate_html, generate_cbz, generate_main_html, generate_pdf, check_cookie, signal_handler, DB from nhentai.utils import generate_html, generate_cbz, generate_main_html, generate_pdf, generate_metadata_file, \
paging, check_cookie, signal_handler, DB
def main(): def main():
banner() banner()
options = cmd_parser()
logger.info('Using mirror: {0}'.format(BASE_URL))
from nhentai.constant import PROXY if sys.version_info < (3, 0, 0):
# constant.PROXY will be changed after cmd_parser() logger.error('nhentai now only support Python 3.x')
if PROXY: exit(1)
logger.info('Using proxy: {0}'.format(PROXY))
options = cmd_parser()
logger.info(f'Using mirror: {BASE_URL}')
# CONFIG['proxy'] will be changed after cmd_parser()
if constant.CONFIG['proxy']['http']:
logger.info(f'Using proxy: {constant.CONFIG["proxy"]["http"]}')
if not constant.CONFIG['template']:
constant.CONFIG['template'] = 'default'
logger.info(f'Using viewer template "{constant.CONFIG["template"]}"')
# check your cookie # check your cookie
check_cookie() check_cookie()
@ -31,18 +41,22 @@ def main():
doujinshi_ids = [] doujinshi_ids = []
doujinshi_list = [] doujinshi_list = []
page_list = paging(options.page)
if options.favorites: if options.favorites:
if not options.is_download: if not options.is_download:
logger.warning('You do not specify --download option') logger.warning('You do not specify --download option')
doujinshis = favorites_parser(options.page_range) doujinshis = favorites_parser(page=page_list)
elif options.keyword: elif options.keyword:
from nhentai.constant import LANGUAGE if constant.CONFIG['language']:
if LANGUAGE: logger.info(f'Using default language: {constant.CONFIG["language"]}')
logger.info('Using deafult language: {0}'.format(LANGUAGE)) options.keyword += f' language:{constant.CONFIG["language"]}'
options.keyword += ', language:{}'.format(LANGUAGE)
doujinshis = search_parser(options.keyword, sorting=options.sorting, page=options.page) _search_parser = legacy_search_parser if options.legacy else search_parser
doujinshis = _search_parser(options.keyword, sorting=options.sorting, page=page_list,
is_page_all=options.page_all)
elif not doujinshi_ids: elif not doujinshi_ids:
doujinshi_ids = options.id doujinshi_ids = options.id
@ -53,37 +67,35 @@ def main():
if options.is_save_download_history: if options.is_save_download_history:
with DB() as db: with DB() as db:
data = set(db.get_all()) data = map(int, db.get_all())
doujinshi_ids = list(set(doujinshi_ids) - data) doujinshi_ids = list(set(map(int, doujinshi_ids)) - set(data))
if doujinshi_ids:
for i, id_ in enumerate(doujinshi_ids):
if options.delay:
time.sleep(options.delay)
doujinshi_info = doujinshi_parser(id_)
if doujinshi_info:
doujinshi_list.append(Doujinshi(name_format=options.name_format, **doujinshi_info))
if (i + 1) % 10 == 0:
logger.info('Progress: %d / %d' % (i + 1, len(doujinshi_ids)))
if not options.is_show: if not options.is_show:
downloader = Downloader(path=options.output_dir, size=options.threads, downloader = Downloader(path=options.output_dir, size=options.threads,
timeout=options.timeout, delay=options.delay) timeout=options.timeout, delay=options.delay)
for doujinshi in doujinshi_list: for doujinshi_id in doujinshi_ids:
doujinshi_info = doujinshi_parser(doujinshi_id)
if doujinshi_info:
doujinshi = Doujinshi(name_format=options.name_format, **doujinshi_info)
else:
continue
if not options.dryrun:
doujinshi.downloader = downloader doujinshi.downloader = downloader
doujinshi.download() doujinshi.download(regenerate_cbz=options.regenerate_cbz)
if options.generate_metadata:
table = doujinshi.table
generate_metadata_file(options.output_dir, table, doujinshi)
if options.is_save_download_history: if options.is_save_download_history:
with DB() as db: with DB() as db:
db.add_one(doujinshi.id) db.add_one(doujinshi.id)
if not options.is_nohtml and not options.is_cbz and not options.is_pdf: if not options.is_nohtml and not options.is_cbz and not options.is_pdf:
generate_html(options.output_dir, doujinshi) generate_html(options.output_dir, doujinshi, template=constant.CONFIG['template'])
elif options.is_cbz: elif options.is_cbz:
generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir) generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir)
elif options.is_pdf: elif options.is_pdf:
@ -93,14 +105,21 @@ def main():
generate_main_html(options.output_dir) generate_main_html(options.output_dir)
if not platform.system() == 'Windows': if not platform.system() == 'Windows':
logger.log(15, '🍻 All done.') logger.log(16, '🍻 All done.')
else: else:
logger.log(15, 'All done.') logger.log(16, 'All done.')
else: else:
[doujinshi.show() for doujinshi in doujinshi_list] for doujinshi_id in doujinshi_ids:
doujinshi_info = doujinshi_parser(doujinshi_id)
if doujinshi_info:
doujinshi = Doujinshi(name_format=options.name_format, **doujinshi_info)
else:
continue
doujinshi.show()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGINT, signal_handler)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,37 +1,40 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals, print_function
import os import os
import tempfile import tempfile
try: from urllib.parse import urlparse
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
BASE_URL = os.getenv('NHENTAI', 'https://nhentai.net') BASE_URL = os.getenv('NHENTAI', 'https://nhentai.net')
__api_suspended_DETAIL_URL = '%s/api/gallery' % BASE_URL DETAIL_URL = f'{BASE_URL}/g'
LEGACY_SEARCH_URL = f'{BASE_URL}/search/'
SEARCH_URL = f'{BASE_URL}/api/galleries/search'
DETAIL_URL = '%s/g' % BASE_URL TAG_API_URL = f'{BASE_URL}/api/galleries/tagged'
SEARCH_URL = '%s/api/galleries/search' % BASE_URL LOGIN_URL = f'{BASE_URL}/login/'
CHALLENGE_URL = f'{BASE_URL}/challenge'
FAV_URL = f'{BASE_URL}/favorites/'
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
TAG_API_URL = '%s/api/galleries/tagged' % BASE_URL
LOGIN_URL = '%s/login/' % BASE_URL
CHALLENGE_URL = '%s/challenge' % BASE_URL
FAV_URL = '%s/favorites/' % BASE_URL
u = urlparse(BASE_URL)
IMAGE_URL = '%s://i.%s/galleries' % (u.scheme, u.hostname)
NHENTAI_HOME = os.path.join(os.getenv('HOME', tempfile.gettempdir()), '.nhentai') NHENTAI_HOME = os.path.join(os.getenv('HOME', tempfile.gettempdir()), '.nhentai')
NHENTAI_PROXY = os.path.join(NHENTAI_HOME, 'proxy')
NHENTAI_COOKIE = os.path.join(NHENTAI_HOME, 'cookie')
NHENTAI_LANGUAGE = os.path.join(NHENTAI_HOME, 'language')
NHENTAI_HISTORY = os.path.join(NHENTAI_HOME, 'history.sqlite3') NHENTAI_HISTORY = os.path.join(NHENTAI_HOME, 'history.sqlite3')
NHENTAI_CONFIG_FILE = os.path.join(NHENTAI_HOME, 'config.json')
PROXY = {} __api_suspended_DETAIL_URL = f'{BASE_URL}/api/gallery'
COOKIE = '' CONFIG = {
LANGUAGE = '' 'proxy': {'http': '', 'https': ''},
'cookie': '',
'language': '',
'template': '',
'useragent': 'nhentai command line client (https://github.com/RicterZ/nhentai)'
}
LANGUAGE_ISO = {
'english': 'en',
'chinese': 'zh',
'japanese': 'ja',
'translated': 'translated'
}

View File

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import print_function, unicode_literals
from tabulate import tabulate from tabulate import tabulate
from future.builtins import range
from nhentai.constant import DETAIL_URL, IMAGE_URL from nhentai.constant import DETAIL_URL, IMAGE_URL
from nhentai.logger import logger from nhentai.logger import logger
@ -27,27 +26,28 @@ class DoujinshiInfo(dict):
class Doujinshi(object): class Doujinshi(object):
def __init__(self, name=None, id=None, img_id=None, ext='', pages=0, name_format='[%i][%a][%t]', **kwargs): def __init__(self, name=None, pretty_name=None, id=None, img_id=None,
ext='', pages=0, name_format='[%i][%a][%t]', **kwargs):
self.name = name self.name = name
self.pretty_name = pretty_name
self.id = id self.id = id
self.img_id = img_id self.img_id = img_id
self.ext = ext self.ext = ext
self.pages = pages self.pages = pages
self.downloader = None self.downloader = None
self.url = '%s/%d' % (DETAIL_URL, self.id) self.url = f'{DETAIL_URL}/{self.id}'
self.info = DoujinshiInfo(**kwargs) self.info = DoujinshiInfo(**kwargs)
name_format = name_format.replace('%i', str(self.id)) name_format = name_format.replace('%i', format_filename(str(self.id)))
name_format = name_format.replace('%a', self.info.artists) name_format = name_format.replace('%a', format_filename(self.info.artists))
name_format = name_format.replace('%t', self.name)
name_format = name_format.replace('%s', self.info.subtitle)
self.filename = format_filename(name_format)
def __repr__(self): name_format = name_format.replace('%t', format_filename(self.name))
return '<Doujinshi: {0}>'.format(self.name) name_format = name_format.replace('%p', format_filename(self.pretty_name))
name_format = name_format.replace('%s', format_filename(self.info.subtitle))
self.filename = format_filename(name_format, 255, True)
def show(self): self.table = [
table = [ ["Parodies", self.info.parodies],
["Doujinshi", self.name], ["Doujinshi", self.name],
["Subtitle", self.info.subtitle], ["Subtitle", self.info.subtitle],
["Characters", self.info.characters], ["Characters", self.info.characters],
@ -57,26 +57,24 @@ class Doujinshi(object):
["URL", self.url], ["URL", self.url],
["Pages", self.pages], ["Pages", self.pages],
] ]
logger.info(u'Print doujinshi information of {0}\n{1}'.format(self.id, tabulate(table)))
def download(self): def __repr__(self):
logger.info('Starting to download doujinshi: %s' % self.name) return f'<Doujinshi: {self.name}>'
def show(self):
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')
def download(self, regenerate_cbz=False):
logger.info(f'Starting to download doujinshi: {self.name}')
if self.downloader: if self.downloader:
download_queue = [] download_queue = []
if len(self.ext) != self.pages: if len(self.ext) != self.pages:
logger.warning('Page count and ext count do not equal') logger.warning('Page count and ext count do not equal')
for i in range(1, min(self.pages, len(self.ext)) + 1): for i in range(1, min(self.pages, len(self.ext)) + 1):
download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext[i-1])) download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}')
self.downloader.download(download_queue, self.filename)
'''
for i in range(len(self.ext)):
download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i+1, EXT_MAP[self.ext[i]]))
'''
self.downloader.start_download(download_queue, self.filename, regenerate_cbz=regenerate_cbz)
else: else:
logger.critical('Downloader has not been loaded') logger.critical('Downloader has not been loaded')
@ -88,4 +86,4 @@ if __name__ == '__main__':
try: try:
test.download() test.download()
except Exception as e: except Exception as e:
print('Exception: %s' % str(e)) print(f'Exception: {e}')

View File

@ -1,25 +1,22 @@
# coding: utf- # coding: utf-
from __future__ import unicode_literals, print_function
import multiprocessing import multiprocessing
import signal import signal
from future.builtins import str as text
import sys import sys
import os import os
import requests import requests
import time import time
import urllib3.exceptions
try: from urllib.parse import urlparse
from urllib.parse import urlparse from nhentai import constant
except ImportError:
from urlparse import urlparse
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.parser import request from nhentai.parser import request
from nhentai.utils import Singleton from nhentai.utils import Singleton
requests.packages.urllib3.disable_warnings()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
semaphore = multiprocessing.Semaphore(1) semaphore = multiprocessing.Semaphore(1)
@ -27,6 +24,21 @@ class NHentaiImageNotExistException(Exception):
pass pass
def download_callback(result):
result, data = result
if result == 0:
logger.warning('fatal errors occurred, ignored')
elif result == -1:
logger.warning(f'url {data} return status code 404')
elif result == -2:
logger.warning('Ctrl-C pressed, exiting sub processes ...')
elif result == -3:
# workers won't be run, just pass
pass
else:
logger.log(16, f'{data} downloaded successfully')
class Downloader(Singleton): class Downloader(Singleton):
def __init__(self, path='', size=5, timeout=30, delay=0): def __init__(self, path='', size=5, timeout=30, delay=0):
@ -35,24 +47,25 @@ class Downloader(Singleton):
self.timeout = timeout self.timeout = timeout
self.delay = delay self.delay = delay
def download_(self, url, folder='', filename='', retried=0): def download(self, url, folder='', filename='', retried=0, proxy=None):
if self.delay: if self.delay:
time.sleep(self.delay) time.sleep(self.delay)
logger.info('Starting to download {0} ...'.format(url)) logger.info(f'Starting to download {url} ...')
filename = filename if filename else os.path.basename(urlparse(url).path) filename = filename if filename else os.path.basename(urlparse(url).path)
base_filename, extension = os.path.splitext(filename) base_filename, extension = os.path.splitext(filename)
save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
try: try:
if os.path.exists(os.path.join(folder, base_filename.zfill(3) + extension)): if os.path.exists(save_file_path):
logger.warning('File: {0} exists, ignoring'.format(os.path.join(folder, base_filename.zfill(3) + logger.warning(f'Ignored exists file: {save_file_path}')
extension)))
return 1, url return 1, url
response = None response = None
with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f: with open(save_file_path, "wb") as f:
i = 0 i = 0
while i < 10: while i < 10:
try: try:
response = request('get', url, stream=True, timeout=self.timeout) response = request('get', url, stream=True, timeout=self.timeout, proxies=proxy)
if response.status_code != 200: if response.status_code != 200:
raise NHentaiImageNotExistException raise NHentaiImageNotExistException
@ -77,13 +90,14 @@ class Downloader(Singleton):
except (requests.HTTPError, requests.Timeout) as e: except (requests.HTTPError, requests.Timeout) as e:
if retried < 3: if retried < 3:
logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried)) logger.warning(f'Warning: {e}, retrying({retried}) ...')
return 0, self.download_(url=url, folder=folder, filename=filename, retried=retried+1) return 0, self.download(url=url, folder=folder, filename=filename,
retried=retried+1, proxy=proxy)
else: else:
return 0, None return 0, None
except NHentaiImageNotExistException as e: except NHentaiImageNotExistException as e:
os.remove(os.path.join(folder, base_filename.zfill(3) + extension)) os.remove(save_file_path)
return -1, url return -1, url
except Exception as e: except Exception as e:
@ -97,39 +111,28 @@ class Downloader(Singleton):
return 1, url return 1, url
def _download_callback(self, result): def start_download(self, queue, folder='', regenerate_cbz=False):
result, data = result if not isinstance(folder, (str, )):
if result == 0:
logger.warning('fatal errors occurred, ignored')
# exit(1)
elif result == -1:
logger.warning('url {} return status code 404'.format(data))
elif result == -2:
logger.warning('Ctrl-C pressed, exiting sub processes ...')
elif result == -3:
# workers wont be run, just pass
pass
else:
logger.log(15, '{0} downloaded successfully'.format(data))
def download(self, queue, folder=''):
if not isinstance(folder, text):
folder = str(folder) folder = str(folder)
if self.path: if self.path:
folder = os.path.join(self.path, folder) folder = os.path.join(self.path, folder)
if os.path.exists(folder + '.cbz'):
if not regenerate_cbz:
logger.warning(f'CBZ file "{folder}.cbz" exists, ignored download request')
return
if not os.path.exists(folder): if not os.path.exists(folder):
logger.warn('Path \'{0}\' does not exist, creating.'.format(folder))
try: try:
os.makedirs(folder) os.makedirs(folder)
except EnvironmentError as e: except EnvironmentError as e:
logger.critical('{0}'.format(str(e))) logger.critical(str(e))
else: else:
logger.warn('Path \'{0}\' already exist.'.format(folder)) logger.warning(f'Path "{folder}" already exist.')
queue = [(self, url, folder) for url in queue] queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]
pool = multiprocessing.Pool(self.size, init_worker) pool = multiprocessing.Pool(self.size, init_worker)
[pool.apply_async(download_wrapper, args=item) for item in queue] [pool.apply_async(download_wrapper, args=item) for item in queue]
@ -138,9 +141,9 @@ class Downloader(Singleton):
pool.join() pool.join()
def download_wrapper(obj, url, folder=''): def download_wrapper(obj, url, folder='', proxy=None):
if sys.platform == 'darwin' or semaphore.get_value(): if sys.platform == 'darwin' or semaphore.get_value():
return Downloader.download_(obj, url=url, folder=folder) return Downloader.download(obj, url=url, folder=folder, proxy=proxy)
else: else:
return -3, None return -3, None
@ -149,7 +152,7 @@ def init_worker():
signal.signal(signal.SIGINT, subprocess_signal) signal.signal(signal.SIGINT, subprocess_signal)
def subprocess_signal(signal, frame): def subprocess_signal(sig, frame):
if semaphore.acquire(timeout=1): if semaphore.acquire(timeout=1):
logger.warning('Ctrl-C pressed, exiting sub processes ...') logger.warning('Ctrl-C pressed, exiting sub processes ...')

View File

@ -1,7 +1,6 @@
# #
# Copyright (C) 2010-2012 Vinay Sajip. All rights reserved. Licensed under the new BSD license. # Copyright (C) 2010-2012 Vinay Sajip. All rights reserved. Licensed under the new BSD license.
# #
from __future__ import print_function, unicode_literals
import logging import logging
import re import re
import platform import platform
@ -35,7 +34,7 @@ class ColorizingStreamHandler(logging.StreamHandler):
# levels to (background, foreground, bold/intense) # levels to (background, foreground, bold/intense)
level_map = { level_map = {
logging.DEBUG: (None, 'blue', False), logging.DEBUG: (None, 'blue', False),
logging.INFO: (None, 'green', False), logging.INFO: (None, 'white', False),
logging.WARNING: (None, 'yellow', False), logging.WARNING: (None, 'yellow', False),
logging.ERROR: (None, 'red', False), logging.ERROR: (None, 'red', False),
logging.CRITICAL: ('red', 'white', False) logging.CRITICAL: ('red', 'white', False)
@ -161,20 +160,20 @@ class ColorizingStreamHandler(logging.StreamHandler):
return self.colorize(message, record) return self.colorize(message, record)
logging.addLevelName(15, "INFO") logging.addLevelName(16, "SUCCESS")
logger = logging.getLogger('nhentai') logger = logging.getLogger('nhentai')
LOGGER_HANDLER = ColorizingStreamHandler(sys.stdout) LOGGER_HANDLER = ColorizingStreamHandler(sys.stdout)
FORMATTER = logging.Formatter("\r[%(asctime)s] [%(levelname)s] %(message)s", "%H:%M:%S") FORMATTER = logging.Formatter("\r[%(asctime)s] %(funcName)s: %(message)s", "%H:%M:%S")
LOGGER_HANDLER.setFormatter(FORMATTER) LOGGER_HANDLER.setFormatter(FORMATTER)
LOGGER_HANDLER.level_map[logging.getLevelName("INFO")] = (None, "cyan", False) LOGGER_HANDLER.level_map[logging.getLevelName("SUCCESS")] = (None, "green", False)
logger.addHandler(LOGGER_HANDLER) logger.addHandler(LOGGER_HANDLER)
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
if __name__ == '__main__': if __name__ == '__main__':
logger.log(15, 'nhentai') logger.log(16, 'nhentai')
logger.info('info') logger.info('info')
logger.warn('warn') logger.warning('warning')
logger.debug('debug') logger.debug('debug')
logger.error('error') logger.error('error')
logger.critical('critical') logger.critical('critical')

View File

@ -1,7 +1,5 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals, print_function
import sys
import os import os
import re import re
import time import time
@ -28,7 +26,7 @@ def login(username, password):
logger.info('Getting CSRF token ...') logger.info('Getting CSRF token ...')
if os.getenv('DEBUG'): if os.getenv('DEBUG'):
logger.info('CSRF token is {}'.format(csrf_token)) logger.info(f'CSRF token is {csrf_token}')
login_dict = { login_dict = {
'csrfmiddlewaretoken': csrf_token, 'csrfmiddlewaretoken': csrf_token,
@ -58,18 +56,18 @@ def _get_title_and_id(response):
doujinshi_container = doujinshi.find('div', attrs={'class': 'caption'}) doujinshi_container = doujinshi.find('div', attrs={'class': 'caption'})
title = doujinshi_container.text.strip() title = doujinshi_container.text.strip()
title = title if len(title) < 85 else title[:82] + '...' title = title if len(title) < 85 else title[:82] + '...'
id_ = re.search('/g/(\d+)/', doujinshi.a['href']).group(1) id_ = re.search('/g/([0-9]+)/', doujinshi.a['href']).group(1)
result.append({'id': id_, 'title': title}) result.append({'id': id_, 'title': title})
return result return result
def favorites_parser(page_range=''): def favorites_parser(page=None):
result = [] result = []
html = BeautifulSoup(request('get', constant.FAV_URL).content, 'html.parser') html = BeautifulSoup(request('get', constant.FAV_URL).content, 'html.parser')
count = html.find('span', attrs={'class': 'count'}) count = html.find('span', attrs={'class': 'count'})
if not count: if not count:
logger.error("Can't get your number of favorited doujins. Did the login failed?") logger.error("Can't get your number of favorite doujinshis. Did the login failed?")
return [] return []
count = int(count.text.strip('(').strip(')').replace(',', '')) count = int(count.text.strip('(').strip(')').replace(',', ''))
@ -78,93 +76,73 @@ def favorites_parser(page_range=''):
return [] return []
pages = int(count / 25) pages = int(count / 25)
if page:
page_range_list = page
else:
if pages: if pages:
pages += 1 if count % (25 * pages) else 0 pages += 1 if count % (25 * pages) else 0
else: else:
pages = 1 pages = 1
logger.info('You have %d favorites in %d pages.' % (count, pages)) logger.info(f'You have {count} favorites in {pages} pages.')
if os.getenv('DEBUG'): if os.getenv('DEBUG'):
pages = 1 pages = 1
page_range_list = range(1, pages + 1) page_range_list = range(1, pages + 1)
if page_range:
logger.info('page range is {0}'.format(page_range))
page_range_list = page_range_parser(page_range, pages)
for page in page_range_list: for page in page_range_list:
try: try:
logger.info('Getting doujinshi ids of page %d' % page) logger.info(f'Getting doujinshi ids of page {page}')
resp = request('get', constant.FAV_URL + '?page=%d' % page).content resp = request('get', f'{constant.FAV_URL}?page={page}').content
result.extend(_get_title_and_id(resp)) result.extend(_get_title_and_id(resp))
except Exception as e: except Exception as e:
logger.error('Error: %s, continue', str(e)) logger.error(f'Error: {e}, continue')
return result return result
def page_range_parser(page_range, max_page_num):
pages = set()
ranges = str.split(page_range, ',')
for range_str in ranges:
idx = range_str.find('-')
if idx == -1:
try:
page = int(range_str)
if page <= max_page_num:
pages.add(page)
except ValueError:
logger.error('page range({0}) is not valid'.format(page_range))
else:
try:
left = int(range_str[:idx])
right = int(range_str[idx + 1:])
if right > max_page_num:
right = max_page_num
for page in range(left, right + 1):
pages.add(page)
except ValueError:
logger.error('page range({0}) is not valid'.format(page_range))
return list(pages)
def doujinshi_parser(id_): def doujinshi_parser(id_):
if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()): if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
raise Exception('Doujinshi id({0}) is not valid'.format(id_)) raise Exception(f'Doujinshi id({id_}) is not valid')
id_ = int(id_) id_ = int(id_)
logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_)) logger.info(f'Fetching doujinshi information of id {id_}')
doujinshi = dict() doujinshi = dict()
doujinshi['id'] = id_ doujinshi['id'] = id_
url = '{0}/{1}/'.format(constant.DETAIL_URL, id_) url = f'{constant.DETAIL_URL}/{id_}/'
try: try:
response = request('get', url) response = request('get', url)
if response.status_code in (200,): if response.status_code in (200, ):
response = response.content response = response.content
elif response.status_code in (404,):
logger.error(f'Doujinshi with id {id_} cannot be found')
return []
else: else:
logger.debug('Slow down and retry ({}) ...'.format(id_)) logger.debug(f'Slow down and retry ({id_}) ...')
time.sleep(1) time.sleep(1)
return doujinshi_parser(str(id_)) return doujinshi_parser(str(id_))
except Exception as e: except Exception as e:
logger.warn('Error: {}, ignored'.format(str(e))) logger.warning(f'Error: {e}, ignored')
return None return None
html = BeautifulSoup(response, 'html.parser') html = BeautifulSoup(response, 'html.parser')
doujinshi_info = html.find('div', attrs={'id': 'info'}) doujinshi_info = html.find('div', attrs={'id': 'info'})
title = doujinshi_info.find('h1').text title = doujinshi_info.find('h1').text
pretty_name = doujinshi_info.find('h1').find('span', attrs={'class': 'pretty'}).text
subtitle = doujinshi_info.find('h2') subtitle = doujinshi_info.find('h2')
doujinshi['name'] = title doujinshi['name'] = title
doujinshi['pretty_name'] = pretty_name
doujinshi['subtitle'] = subtitle.text if subtitle else '' doujinshi['subtitle'] = subtitle.text if subtitle else ''
doujinshi_cover = html.find('div', attrs={'id': 'cover'}) doujinshi_cover = html.find('div', attrs={'id': 'cover'})
img_id = re.search('/galleries/([\d]+)/cover\.(jpg|png|gif)$', doujinshi_cover.a.img.attrs['data-src']) img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif)$',
doujinshi_cover.a.img.attrs['data-src'])
ext = [] ext = []
for i in html.find_all('div', attrs={'class': 'thumb-container'}): for i in html.find_all('div', attrs={'class': 'thumb-container'}):
@ -178,6 +156,7 @@ def doujinshi_parser(id_):
doujinshi['img_id'] = img_id.group(1) doujinshi['img_id'] = img_id.group(1)
doujinshi['ext'] = ext doujinshi['ext'] = ext
pages = 0
for _ in doujinshi_info.find_all('div', class_='tag-container field-name'): for _ in doujinshi_info.find_all('div', class_='tag-container field-name'):
if re.search('Pages:', _.text): if re.search('Pages:', _.text):
pages = _.find('span', class_='name').string pages = _.find('span', class_='name').string
@ -199,13 +178,38 @@ def doujinshi_parser(id_):
return doujinshi return doujinshi
def old_search_parser(keyword, sorting='date', page=1): def legacy_search_parser(keyword, sorting, page, is_page_all=False):
logger.debug('Searching doujinshis of keyword {0}'.format(keyword)) logger.debug(f'Searching doujinshis of keyword {keyword}')
response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page, 'sort': sorting}).content
response = None
result = []
if is_page_all and len(page) != 1:
# `--page-all` option will override the `--page` option
page = [1]
for p in page:
logger.debug(f'Fetching page {p} ...')
response = request('get', url=constant.LEGACY_SEARCH_URL,
params={'q': keyword, 'page': p, 'sort': sorting}).content
result.extend(_get_title_and_id(response))
result = _get_title_and_id(response)
if not result: if not result:
logger.warn('Not found anything of keyword {}'.format(keyword)) logger.warning(f'Not found anything of keyword {keyword} on page {page[0]}')
return result
if is_page_all:
html = BeautifulSoup(response, 'lxml')
pagination = html.find(attrs={'class': 'pagination'})
next_page = pagination.find(attrs={'class': 'next'})
if next_page is None:
logger.warning('Reached the last page')
return result
else:
next_page = re.findall('page=([0-9]+)', next_page.attrs['href'])[0]
result.extend(legacy_search_parser(keyword, sorting, [next_page], is_page_all))
return result
return result return result
@ -215,47 +219,64 @@ def print_doujinshi(doujinshi_list):
return return
doujinshi_list = [(i['id'], i['title']) for i in doujinshi_list] doujinshi_list = [(i['id'], i['title']) for i in doujinshi_list]
headers = ['id', 'doujinshi'] headers = ['id', 'doujinshi']
logger.info('Search Result\n' + logger.info(f'Search Result || Found {doujinshi_list.__len__()} doujinshis')
tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst')) print(tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))
def search_parser(keyword, sorting, page): def search_parser(keyword, sorting, page, is_page_all=False):
logger.debug('Searching doujinshis using keywords {0}'.format(keyword))
# keyword = '+'.join([i.strip().replace(' ', '-').lower() for i in keyword.split(',')])
result = [] result = []
response = None
if not page:
page = [1]
if is_page_all:
url = request('get', url=constant.SEARCH_URL, params={'query': keyword}).url
init_response = request('get', url.replace('%2B', '+')).json()
page = range(1, init_response['num_pages']+1)
total = f'/{page[-1]}' if is_page_all else ''
not_exists_persist = False
for p in page:
i = 0 i = 0
while i < 5:
logger.info(f'Searching doujinshis using keywords "{keyword}" on page {p}{total}')
while i < 3:
try: try:
url = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page, 'sort': sorting}).url url = request('get', url=constant.SEARCH_URL, params={'query': keyword,
'page': p, 'sort': sorting}).url
response = request('get', url.replace('%2B', '+')).json() response = request('get', url.replace('%2B', '+')).json()
except Exception as e: except Exception as e:
logger.critical(str(e)) logger.critical(str(e))
response = None
break break
if 'result' not in response: if response is None or 'result' not in response:
raise Exception('No result in response') logger.warning(f'No result in response in page {p}')
if not_exists_persist is True:
break
continue
for row in response['result']: for row in response['result']:
title = row['title']['english'] title = row['title']['english']
title = title[:85] + '..' if len(title) > 85 else title title = title[:85] + '..' if len(title) > 85 else title
result.append({'id': row['id'], 'title': title}) result.append({'id': row['id'], 'title': title})
not_exists_persist = False
if not result: if not result:
logger.warn('No results for keywords {}'.format(keyword)) logger.warning(f'No results for keywords {keyword}')
return result return result
def __api_suspended_doujinshi_parser(id_): def __api_suspended_doujinshi_parser(id_):
if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()): if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
raise Exception('Doujinshi id({0}) is not valid'.format(id_)) raise Exception(f'Doujinshi id({id_}) is not valid')
id_ = int(id_) id_ = int(id_)
logger.log(15, 'Fetching information of doujinshi id {0}'.format(id_)) logger.info(f'Fetching information of doujinshi id {id_}')
doujinshi = dict() doujinshi = dict()
doujinshi['id'] = id_ doujinshi['id'] = id_
url = '{0}/{1}'.format(constant.DETAIL_URL, id_) url = f'{constant.DETAIL_URL}/{id_}'
i = 0 i = 0
while 5 > i: while 5 > i:
try: try:

View File

@ -2,9 +2,10 @@
import json import json
import os import os
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from nhentai.constant import LANGUAGE_ISO
def serialize_json(doujinshi, dir): def serialize_json(doujinshi, output_dir):
metadata = {'title': doujinshi.name, metadata = {'title': doujinshi.name,
'subtitle': doujinshi.info.subtitle} 'subtitle': doujinshi.info.subtitle}
if doujinshi.info.date: if doujinshi.info.date:
@ -25,13 +26,13 @@ def serialize_json(doujinshi, dir):
metadata['URL'] = doujinshi.url metadata['URL'] = doujinshi.url
metadata['Pages'] = doujinshi.pages metadata['Pages'] = doujinshi.pages
with open(os.path.join(dir, 'metadata.json'), 'w') as f: with open(os.path.join(output_dir, 'metadata.json'), 'w') as f:
json.dump(metadata, f, separators=','':') json.dump(metadata, f, separators=(',', ':'))
def serialize_comicxml(doujinshi, dir): def serialize_comic_xml(doujinshi, output_dir):
from iso8601 import parse_date from iso8601 import parse_date
with open(os.path.join(dir, 'ComicInfo.xml'), 'w') as f: with open(os.path.join(output_dir, 'ComicInfo.xml'), 'w', encoding="utf-8") as f:
f.write('<?xml version="1.0" encoding="utf-8"?>\n') f.write('<?xml version="1.0" encoding="utf-8"?>\n')
f.write('<ComicInfo xmlns:xsd="http://www.w3.org/2001/XMLSchema" ' f.write('<ComicInfo xmlns:xsd="http://www.w3.org/2001/XMLSchema" '
'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\n') 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\n')
@ -45,7 +46,8 @@ def serialize_comicxml(doujinshi, dir):
xml_write_simple_tag(f, 'NhentaiId', doujinshi.id) xml_write_simple_tag(f, 'NhentaiId', doujinshi.id)
xml_write_simple_tag(f, 'Genre', doujinshi.info.categories) xml_write_simple_tag(f, 'Genre', doujinshi.info.categories)
xml_write_simple_tag(f, 'BlackAndWhite', 'No' if doujinshi.info.tags and 'full color' in doujinshi.info.tags else 'Yes') xml_write_simple_tag(f, 'BlackAndWhite', 'No' if doujinshi.info.tags and
'full color' in doujinshi.info.tags else 'Yes')
if doujinshi.info.date: if doujinshi.info.date:
dt = parse_date(doujinshi.info.date) dt = parse_date(doujinshi.info.date)
@ -59,19 +61,20 @@ def serialize_comicxml(doujinshi, dir):
if doujinshi.info.tags: if doujinshi.info.tags:
xml_write_simple_tag(f, 'Tags', doujinshi.info.tags) xml_write_simple_tag(f, 'Tags', doujinshi.info.tags)
if doujinshi.info.artists: if doujinshi.info.artists:
xml_write_simple_tag(f, 'Writer', ' & '.join([i.strip() for i in doujinshi.info.artists.split(',')])) xml_write_simple_tag(f, 'Writer', ' & '.join([i.strip() for i in
# if doujinshi.info.groups: doujinshi.info.artists.split(',')]))
# metadata['group'] = [i.strip() for i in doujinshi.info.groups.split(',')]
if doujinshi.info.languages: if doujinshi.info.languages:
languages = [i.strip() for i in doujinshi.info.languages.split(',')] languages = [i.strip() for i in doujinshi.info.languages.split(',')]
xml_write_simple_tag(f, 'Translated', 'Yes' if 'translated' in languages else 'No') xml_write_simple_tag(f, 'Translated', 'Yes' if 'translated' in languages else 'No')
[xml_write_simple_tag(f, 'Language', i) for i in languages if i != 'translated'] [xml_write_simple_tag(f, 'LanguageISO', LANGUAGE_ISO[i]) for i in languages
if (i != 'translated' and i in LANGUAGE_ISO)]
f.write('</ComicInfo>') f.write('</ComicInfo>')
def xml_write_simple_tag(f, name, val, indent=1): def xml_write_simple_tag(f, name, val, indent=1):
f.write('{}<{}>{}</{}>\n'.format(' ' * indent, name, escape(str(val)), name)) f.write(f'{" "*indent}<{name}>{escape(str(val))}</{name}>\n')
def merge_json(): def merge_json():
@ -120,7 +123,7 @@ def serialize_unique(lst):
def set_js_database(): def set_js_database():
with open('data.js', 'w') as f: with open('data.js', 'w') as f:
indexed_json = merge_json() indexed_json = merge_json()
unique_json = json.dumps(serialize_unique(indexed_json), separators=','':') unique_json = json.dumps(serialize_unique(indexed_json), separators=(',', ':'))
indexed_json = json.dumps(indexed_json, separators=','':') indexed_json = json.dumps(indexed_json, separators=(',', ':'))
f.write('var data = ' + indexed_json) f.write('var data = ' + indexed_json)
f.write(';\nvar tags = ' + unique_json) f.write(';\nvar tags = ' + unique_json)

View File

@ -1,10 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals, print_function
import sys import sys
import re import re
import os import os
import string
import zipfile import zipfile
import shutil import shutil
import requests import requests
@ -12,26 +10,37 @@ import sqlite3
from nhentai import constant from nhentai import constant
from nhentai.logger import logger from nhentai.logger import logger
from nhentai.serializer import serialize_json, serialize_comicxml, set_js_database from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
MAX_FIELD_LENGTH = 100
def request(method, url, **kwargs): def request(method, url, **kwargs):
session = requests.Session() session = requests.Session()
session.headers.update({ session.headers.update({
'Referer': constant.LOGIN_URL, 'Referer': constant.LOGIN_URL,
'User-Agent': 'nhentai command line client (https://github.com/RicterZ/nhentai)', 'User-Agent': constant.CONFIG['useragent'],
'Cookie': constant.COOKIE 'Cookie': constant.CONFIG['cookie']
}) })
return getattr(session, method)(url, proxies=constant.PROXY, verify=False, **kwargs)
if not kwargs.get('proxies', None):
kwargs['proxies'] = constant.CONFIG['proxy']
return getattr(session, method)(url, verify=False, **kwargs)
def check_cookie(): def check_cookie():
response = request('get', constant.BASE_URL).text response = request('get', constant.BASE_URL)
username = re.findall('"/users/\d+/(.*?)"', response) if response.status_code == 503 and 'cf-browser-verification' in response.text:
logger.error('Blocked by Cloudflare captcha, please set your cookie and useragent')
exit(-1)
username = re.findall('"/users/[0-9]+/(.*?)"', response.text)
if not username: if not username:
logger.error('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie') logger.warning('Cannot get your username, please check your cookie or use `nhentai --cookie` to set your cookie')
else: else:
logger.info('Login successfully! Your username: {}'.format(username[0])) logger.log(16, f'Login successfully! Your username: {username[0]}')
class _Singleton(type): class _Singleton(type):
@ -64,7 +73,7 @@ def readfile(path):
return file.read() return file.read()
def generate_html(output_dir='.', doujinshi_obj=None): def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
image_html = '' image_html = ''
if doujinshi_obj is not None: if doujinshi_obj is not None:
@ -72,24 +81,28 @@ def generate_html(output_dir='.', doujinshi_obj=None):
else: else:
doujinshi_dir = '.' doujinshi_dir = '.'
if not os.path.exists(doujinshi_dir):
logger.warning(f'Path "{doujinshi_dir}" does not exist, creating.')
try:
os.makedirs(doujinshi_dir)
except EnvironmentError as e:
logger.critical(e)
file_list = os.listdir(doujinshi_dir) file_list = os.listdir(doujinshi_dir)
file_list.sort() file_list.sort()
for image in file_list: for image in file_list:
if not os.path.splitext(image)[1] in ('.jpg', '.png'): if not os.path.splitext(image)[1] in ('.jpg', '.png'):
continue continue
image_html += f'<img src="{image}" class="image-item"/>\n'
image_html += '<img src="{0}" class="image-item"/>\n'\ html = readfile(f'viewer/{template}/index.html')
.format(image) css = readfile(f'viewer/{template}/styles.css')
html = readfile('viewer/index.html') js = readfile(f'viewer/{template}/scripts.js')
css = readfile('viewer/styles.css')
js = readfile('viewer/scripts.js')
if doujinshi_obj is not None: if doujinshi_obj is not None:
serialize_json(doujinshi_obj, doujinshi_dir) serialize_json(doujinshi_obj, doujinshi_dir)
name = doujinshi_obj.name name = doujinshi_obj.name
if sys.version_info < (3, 0):
name = doujinshi_obj.name.encode('utf-8')
else: else:
name = {'title': 'nHentai HTML Viewer'} name = {'title': 'nHentai HTML Viewer'}
@ -102,14 +115,14 @@ def generate_html(output_dir='.', doujinshi_obj=None):
with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f: with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f:
f.write(data.encode('utf-8')) f.write(data.encode('utf-8'))
logger.log(15, 'HTML Viewer has been written to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html'))) logger.log(16, f'HTML Viewer has been written to "{os.path.join(doujinshi_dir, "index.html")}"')
except Exception as e: except Exception as e:
logger.warning('Writing HTML Viewer failed ({})'.format(str(e))) logger.warning(f'Writing HTML Viewer failed ({e})')
def generate_main_html(output_dir='./'): def generate_main_html(output_dir='./'):
""" """
Generate a main html to show all the contain doujinshi. Generate a main html to show all the contains doujinshi.
With a link to their `index.html`. With a link to their `index.html`.
Default output folder will be the CLI path. Default output folder will be the CLI path.
""" """
@ -138,7 +151,7 @@ def generate_main_html(output_dir='./'):
files.sort() files.sort()
if 'index.html' in files: if 'index.html' in files:
logger.info('Add doujinshi \'{}\''.format(folder)) logger.info(f'Add doujinshi "{folder}"')
else: else:
continue continue
@ -160,20 +173,19 @@ def generate_main_html(output_dir='./'):
else: else:
with open('./main.html', 'wb') as f: with open('./main.html', 'wb') as f:
f.write(data.encode('utf-8')) f.write(data.encode('utf-8'))
shutil.copy(os.path.dirname(__file__)+'/viewer/logo.png', './') shutil.copy(os.path.dirname(__file__) + '/viewer/logo.png', './')
set_js_database() set_js_database()
logger.log( logger.log(16, f'Main Viewer has been written to "{output_dir}main.html"')
15, 'Main Viewer has been written to \'{0}main.html\''.format(output_dir))
except Exception as e: except Exception as e:
logger.warning('Writing Main Viewer failed ({})'.format(str(e))) logger.warning(f'Writing Main Viewer failed ({e})')
def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=False): def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True):
if doujinshi_obj is not None: if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
if write_comic_info: if write_comic_info:
serialize_comicxml(doujinshi_obj, doujinshi_dir) serialize_comic_xml(doujinshi_obj, doujinshi_dir)
cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), '{}.cbz'.format(doujinshi_obj.filename)) cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), f'{doujinshi_obj.filename}.cbz')
else: else:
cbz_filename = './doujinshi.cbz' cbz_filename = './doujinshi.cbz'
doujinshi_dir = '.' doujinshi_dir = '.'
@ -181,7 +193,7 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_
file_list = os.listdir(doujinshi_dir) file_list = os.listdir(doujinshi_dir)
file_list.sort() file_list.sort()
logger.info('Writing CBZ file to path: {}'.format(cbz_filename)) logger.info(f'Writing CBZ file to path: {cbz_filename}')
with zipfile.ZipFile(cbz_filename, 'w') as cbz_pf: with zipfile.ZipFile(cbz_filename, 'w') as cbz_pf:
for image in file_list: for image in file_list:
image_path = os.path.join(doujinshi_dir, image) image_path = os.path.join(doujinshi_dir, image)
@ -190,21 +202,19 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_
if rm_origin_dir: if rm_origin_dir:
shutil.rmtree(doujinshi_dir, ignore_errors=True) shutil.rmtree(doujinshi_dir, ignore_errors=True)
logger.log(15, 'Comic Book CBZ file has been written to \'{0}\''.format(doujinshi_dir)) logger.log(16, f'Comic Book CBZ file has been written to "{doujinshi_dir}"')
def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False): def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
try: try:
import img2pdf import img2pdf
except ImportError:
logger.error("Please install img2pdf package by using pip.")
"""Write images to a PDF file using img2pdf.""" """Write images to a PDF file using img2pdf."""
if doujinshi_obj is not None: if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
pdf_filename = os.path.join( pdf_filename = os.path.join(
os.path.join(doujinshi_dir, '..'), os.path.join(doujinshi_dir, '..'),
'{}.pdf'.format(doujinshi_obj.filename) f'{doujinshi_obj.filename}.pdf'
) )
else: else:
pdf_filename = './doujinshi.pdf' pdf_filename = './doujinshi.pdf'
@ -213,7 +223,7 @@ def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
file_list = os.listdir(doujinshi_dir) file_list = os.listdir(doujinshi_dir)
file_list.sort() file_list.sort()
logger.info('Writing PDF file to path: {}'.format(pdf_filename)) logger.info(f'Writing PDF file to path: {pdf_filename}')
with open(pdf_filename, 'wb') as pdf_f: with open(pdf_filename, 'wb') as pdf_f:
full_path_list = ( full_path_list = (
[os.path.join(doujinshi_dir, image) for image in file_list] [os.path.join(doujinshi_dir, image) for image in file_list]
@ -223,25 +233,34 @@ def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False):
if rm_origin_dir: if rm_origin_dir:
shutil.rmtree(doujinshi_dir, ignore_errors=True) shutil.rmtree(doujinshi_dir, ignore_errors=True)
logger.log(15, 'PDF file has been written to \'{0}\''.format(doujinshi_dir)) logger.log(16, f'PDF file has been written to "{doujinshi_dir}"')
except ImportError:
logger.error("Please install img2pdf package by using pip.")
def format_filename(s): def format_filename(s, length=MAX_FIELD_LENGTH, _truncate_only=False):
"""Take a string and return a valid filename constructed from the string. """
Uses a whitelist approach: any characters not present in valid_chars are It used to be a whitelist approach allowed only alphabet and a part of symbols.
removed. Also spaces are replaced with underscores. but most doujinshi's names include Japanese 2-byte characters and these was rejected.
so it is using blacklist approach now.
Note: this method may produce invalid filenames such as ``, `.` or `..` if filename include forbidden characters (\'/:,;*?"<>|) ,it replace space character(' ').
When I use this method I prepend a date string like '2009_01_15_19_46_32_' """
and append a file extension like '.txt', so I avoid the potential of using
an invalid filename.
"""
# maybe you can use `--format` to select a suitable filename # maybe you can use `--format` to select a suitable filename
valid_chars = "-_.()[] %s%s" % (string.ascii_letters, string.digits)
filename = ''.join(c for c in s if c in valid_chars) if not _truncate_only:
if len(filename) > 100: ban_chars = '\\\'/:,;*?"<>|\t'
filename = filename[:100] + '...]' filename = s.translate(str.maketrans(ban_chars, ' ' * len(ban_chars))).strip()
filename = ' '.join(filename.split())
while filename.endswith('.'):
filename = filename[:-1]
else:
filename = s
# limit `length` chars
if len(filename) >= length:
filename = filename[:length - 1] + u''
# Remove [] from filename # Remove [] from filename
filename = filename.replace('[]', '').strip() filename = filename.replace('[]', '').strip()
@ -253,6 +272,54 @@ def signal_handler(signal, frame):
exit(1) exit(1)
def paging(page_string):
# 1,3-5,14 -> [1, 3, 4, 5, 14]
if not page_string:
return []
page_list = []
for i in page_string.split(','):
if '-' in i:
start, end = i.split('-')
if not (start.isdigit() and end.isdigit()):
raise Exception('Invalid page number')
page_list.extend(list(range(int(start), int(end) + 1)))
else:
if not i.isdigit():
raise Exception('Invalid page number')
page_list.append(int(i))
return page_list
def generate_metadata_file(output_dir, table, doujinshi_obj=None):
logger.info('Writing Metadata Info')
if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
else:
doujinshi_dir = '.'
logger.info(doujinshi_dir)
f = open(os.path.join(doujinshi_dir, 'info.txt'), 'w', encoding='utf-8')
fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
'TAGS', 'TYPE', 'LANGUAGE', 'RELEASED', 'READING DIRECTION', 'CHARACTERS',
'SERIES', 'PARODY', 'URL']
special_fields = ['PARODY', 'TITLE', 'ORIGINAL TITLE', 'CHARACTERS', 'AUTHOR',
'LANGUAGE', 'TAGS', 'URL', 'PAGES']
for i in range(len(fields)):
f.write(f'{fields[i]}: ')
if fields[i] in special_fields:
f.write(str(table[special_fields.index(fields[i])][1]))
f.write('\n')
f.close()
class DB(object): class DB(object):
conn = None conn = None
cur = None cur = None

View File

@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=yes, viewport-fit=cover" />
<title>{TITLE}</title>
<style>
{STYLES}
</style>
</head>
<body>
<nav id="list" hidden=true>
{IMAGES}</nav>
<div id="image-container">
<div id="dest"></div>
<span id="page-num"></span>
</div>
<script>
{SCRIPTS}
</script>
</body>
</html>

View File

@ -0,0 +1,79 @@
const pages = Array.from(document.querySelectorAll('img.image-item'));
let currentPage = 0;
function changePage(pageNum) {
const previous = pages[currentPage];
const current = pages[pageNum];
if (current == null) {
return;
}
previous.classList.remove('current');
current.classList.add('current');
currentPage = pageNum;
const display = document.getElementById('dest');
display.style.backgroundImage = `url("${current.src}")`;
scroll(0,0)
document.getElementById('page-num')
.innerText = [
(pageNum + 1).toLocaleString(),
pages.length.toLocaleString()
].join('\u200a/\u200a');
}
changePage(0);
document.getElementById('image-container').onclick = event => {
const width = document.getElementById('image-container').clientWidth;
const clickPos = event.clientX / width;
if (clickPos < 0.5) {
changePage(currentPage - 1);
} else {
changePage(currentPage + 1);
}
};
document.onkeypress = event => {
switch (event.key.toLowerCase()) {
// Previous Image
case 'w':
scrollBy(0, -40);
break;
case 'a':
changePage(currentPage - 1);
break;
// Return to previous page
case 'q':
window.history.go(-1);
break;
// Next Image
case ' ':
case 's':
scrollBy(0, 40);
break;
case 'd':
changePage(currentPage + 1);
break;
}// remove arrow cause it won't work
};
document.onkeydown = event =>{
switch (event.keyCode) {
case 37: //left
changePage(currentPage - 1);
break;
case 38: //up
break;
case 39: //right
changePage(currentPage + 1);
break;
case 40: //down
break;
}
};

View File

@ -0,0 +1,75 @@
*, *::after, *::before {
box-sizing: border-box;
}
img {
vertical-align: middle;
}
html, body {
display: flex;
background-color: #e8e6e6;
height: 100%;
width: 100%;
padding: 0;
margin: 0;
font-family: sans-serif;
}
#list {
height: 2000px;
overflow: scroll;
width: 260px;
text-align: center;
}
#list img {
width: 200px;
padding: 10px;
border-radius: 10px;
margin: 15px 0;
cursor: pointer;
}
#list img.current {
background: #0003;
}
#image-container {
flex: auto;
height: 100%;
background: rgb(0, 0, 0);
color: rgb(100, 100, 100);
text-align: center;
cursor: pointer;
-webkit-user-select: none;
user-select: none;
position: relative;
}
#image-container #dest {
height: 2000px;
width: 100%;
background-size: contain;
background-repeat: no-repeat;
background-position: top;
margin-left: auto;
margin-right: auto;
max-width: 100%;
max-height: 100vh;
margin: auto;
}
#image-container #page-num {
position: static;
font-size: 9pt;
left: 10px;
bottom: 5px;
font-weight: bold;
opacity: 0.9;
text-shadow: /* Duplicate the same shadow to make it very strong */
0 0 2px #222,
0 0 2px #222,
0 0 2px #222;
}

View File

@ -1,7 +1,6 @@
requests>=2.5.0 requests>=2.5.0
soupsieve<2.0 soupsieve
BeautifulSoup4>=4.0.0 BeautifulSoup4>=4.0.0
threadpool>=1.2.7
tabulate>=0.7.5 tabulate>=0.7.5
future>=0.15.2
iso8601 >= 0.1 iso8601 >= 0.1
urllib3

View File

@ -1,6 +1,4 @@
# coding: utf-8 # coding: utf-8
from __future__ import print_function, unicode_literals
import sys
import codecs import codecs
from setuptools import setup, find_packages from setuptools import setup, find_packages
from nhentai import __version__, __author__, __email__ from nhentai import __version__, __author__, __email__
@ -11,9 +9,8 @@ with open('requirements.txt') as f:
def long_description(): def long_description():
with codecs.open('README.rst', 'rb') as readme: with codecs.open('README.rst', 'r') as readme:
if not sys.version_info < (3, 0, 0): return readme.read()
return readme.read().decode('utf-8')
setup( setup(