0.2.15

download by tagname #15
fix unicodeerror on python3
2025-07-01 16:09:28 +02:00 · 2018-08-12 22:48:26 +08:00 · 2018-08-12 22:43:36 +08:00 · 2018-08-12 18:04:36 +08:00 · 2018-08-11 09:47:32 +08:00 · 2018-08-11 09:46:46 +08:00
8 changed files with 107 additions and 34 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -6,7 +6,6 @@ python:
    - 2.7
    - 2.6
    - 3.6
-    - 3.7

 install:
    - python setup.py install
@ -15,3 +14,4 @@ script:
    - NHENTAI=https://nhentai.net nhentai --search umaru
    - NHENTAI=https://nhentai.net nhentai --id=152503,146134 -t 10 --output=/tmp/
    - NHENTAI=https://nhentai.net nhentai -l nhentai_test:nhentai --output=/tmp/
+    - NHENTAI=https://nhentai.net nhentai --tag lolicon
--- a/README.md
+++ b/README.md
@ -7,11 +7,10 @@ nhentai
    |_| |_|_| |_|\___|_| |_|\__\__,_|_|

 あなたも変態。 いいね?  
-[![Build Status](https://travis-ci.org/RicterZ/nhentai.svg?branch=master)](https://travis-ci.org/RicterZ/nhentai)  
+[![Build Status](https://travis-ci.org/RicterZ/nhentai.svg?branch=master)](https://travis-ci.org/RicterZ/nhentai) ![nhentai PyPI Downloads](https://pypistats.com/badge/nhentai.svg)  

-🎉🎉 nhentai 现在支持 Windows 啦！

-由于 [http://nhentai.net](http://nhentai.net) 下载下来的种子速度很慢，而且官方也提供在线观看本子的功能，所以可以利用本脚本下载本子。
+nHentai is a CLI tool for downloading doujinshi from [nhentai.net](http://nhentai.net).

 ### Installation

@ -25,36 +24,45 @@ nhentai
    sudo emerge net-misc/nhentai

 ### Usage
-下载指定 id 列表的本子：
+Download specified doujinshi:
 ```bash
 nhentai --id=123855,123866
 ```

-下载某关键词第一页的本子：
+Search a keyword and download the first page:
 ```bash
 nhentai --search="tomori" --page=1 --download
 ```

-下载用户 favorites 内容：
+Download your favourite doujinshi (login required):
 ```bash
 nhentai --login "username:password" --download
 ```

+Download by tag name:
+```bash
+nhentai --tag lolicon --download
+```
+
 ### Options

-`-t, --thread`：指定下载的线程数，最多为 10 线程。  
-`--path`：指定下载文件的输出路径，默认为当前目录。  
-`--timeout`：指定下载图片的超时时间，默认为 30 秒。  
-`--proxy`：指定下载的代理，例如: http://127.0.0.1:8080/
-`--login`：nhentai 账号的“用户名:密码”组合
-`--nohtml`：nhentai Don't generate HTML
-`--cbz`：nhentai Generate Comic Book CBZ file
+ `-t, --thread`: Download threads, max: 10  
+ `--output`:Output dir of saving doujinshi  
+ `--tag`:Download by tag name  
+ `--timeout`: Timeout of downloading each image   
+ `--proxy`: Use proxy, example: http://127.0.0.1:8080/  
+ `--login`: username:password pair of your nhentai account  
+ `--nohtml`: Do not generate HTML  
+ `--cbz`: Generate Comic Book CBZ File  

 ### nHentai Mirror
-如果想用自建镜像下载 nhentai 的本子，需要搭建 nhentai.net 和 i.nhentai.net 的反向代理。  
-例如用 h.loli.club 来做反向代理的话，需要 h.loli.club 反代 nhentai.net，i.h.loli.club 反带 i.nhentai.net。  
-然后利用环境变量来下载：  
+If you want to use a mirror, you should set up a reverse proxy of `nhentai.net` and `i.nhentai.net`.
+For example:

+    i.h.loli.club -> i.nhentai.net
+    h.loli.club -> nhentai.net
+
+Set `NHENTAI` env var to your nhentai mirror.
 ```bash
 NHENTAI=http://h.loli.club nhentai --id 123456
 ```
@ -67,4 +75,4 @@ NHENTAI=http://h.loli.club nhentai --id 123456
 MIT

 ### あなたも変態
-![](./images/image.jpg)
+![](./images/image.jpg)
--- a/nhentai/init.py
+++ b/nhentai/init.py
@ -1,3 +1,3 @@
-__version__ = '0.2.14'
+__version__ = '0.2.15'
 __author__ = 'RicterZ'
 __email__ = 'ricterzheng@gmail.com'
--- a/nhentai/cmdline.py
+++ b/nhentai/cmdline.py
@ -42,7 +42,7 @@ def cmd_parser():
    parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword')
    parser.add_option('--page', type='int', dest='page', action='store', default=1,
                      help='page number of search result')
-    parser.add_option('--tags', type='string', dest='tags', action='store', help='download doujinshi by tags')
+    parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag')
    parser.add_option('--output', type='string', dest='output_dir', action='store', default='',
                      help='output dir')
    parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
@ -86,20 +86,17 @@ def cmd_parser():
        if not args.is_download:
            logger.warning('YOU DO NOT SPECIFY `--download` OPTION !!!')

-    if args.tags:
-        logger.warning('`--tags` is under construction')
-        exit(1)
-
    if args.id:
        _ = map(lambda id: id.strip(), args.id.split(','))
        args.id = set(map(int, filter(lambda id_: id_.isdigit(), _)))

-    if (args.is_download or args.is_show) and not args.id and not args.keyword and not args.login:
+    if (args.is_download or args.is_show) and not args.id and not args.keyword and \
+            not args.login and not args.tag:
        logger.critical('Doujinshi id(s) are required for downloading')
        parser.print_help()
        exit(1)

-    if not args.keyword and not args.id and not args.login:
+    if not args.keyword and not args.id and not args.login and not args.tag:
        parser.print_help()
        exit(1)

--- a/nhentai/command.py
+++ b/nhentai/command.py
@ -5,7 +5,7 @@ import signal
 import platform

 from nhentai.cmdline import cmd_parser, banner
-from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, login_parser
+from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, login_parser, tag_guessing, tag_parser
 from nhentai.doujinshi import Doujinshi
 from nhentai.downloader import Downloader
 from nhentai.logger import logger
@ -27,13 +27,19 @@ def main():
        for doujinshi_info in login_parser(username=username, password=password):
            doujinshi_list.append(Doujinshi(**doujinshi_info))

+    if options.tag:
+        tag_id = tag_guessing(options.tag)
+        if tag_id:
+            doujinshis = tag_parser(tag_id)
+            print_doujinshi(doujinshis)
+            if options.is_download:
+                doujinshi_ids = map(lambda d: d['id'], doujinshis)
+
    if options.keyword:
        doujinshis = search_parser(options.keyword, options.page)
        print_doujinshi(doujinshis)
        if options.is_download:
            doujinshi_ids = map(lambda d: d['id'], doujinshis)
-    else:
-        doujinshi_ids = options.id

    if doujinshi_ids:
        for id_ in doujinshi_ids:
--- a/nhentai/constant.py
+++ b/nhentai/constant.py
@ -7,6 +7,8 @@ BASE_URL = os.getenv('NHENTAI', 'https://nhentai.net')

 DETAIL_URL = '%s/api/gallery' % BASE_URL
 SEARCH_URL = '%s/api/galleries/search' % BASE_URL
+TAG_URL = '%s/tag' % BASE_URL
+TAG_API_URL = '%s/api/galleries/tagged' % BASE_URL
 LOGIN_URL = '%s/login/' % BASE_URL
 FAV_URL = '%s/favorites/' % BASE_URL

--- a/nhentai/parser.py
+++ b/nhentai/parser.py
@ -110,11 +110,18 @@ def doujinshi_parser(id_):
    doujinshi['pages'] = len(response['images']['pages'])

    # gain information of the doujinshi
-    needed_fields = ['character', 'artist', 'language']
+    needed_fields = ['character', 'artist', 'language', 'tag']
    for tag in response['tags']:
        tag_type = tag['type']
        if tag_type in needed_fields:
-            if tag_type not in doujinshi:
+            if tag_type == 'tag':
+                if tag_type not in doujinshi:
+                    doujinshi[tag_type] = {}
+
+                tag['name'] = tag['name'].replace(' ', '-')
+                tag['name'] = tag['name'].lower()
+                doujinshi[tag_type][tag['name']] = tag['id']
+            elif tag_type not in doujinshi:
                doujinshi[tag_type] = tag['name']
            else:
                doujinshi[tag_type] += tag['name']
@ -154,5 +161,48 @@ def print_doujinshi(doujinshi_list):
                tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst'))


+def tag_parser(tag_id):
+    logger.info('Get doujinshi of tag id: {0}'.format(tag_id))
+    result = []
+    response = request('get', url=constant.TAG_API_URL, params={'sort': 'popular', 'tag_id': tag_id}).json()
+
+    for row in response['result']:
+        title = row['title']['english']
+        title = title[:85] + '..' if len(title) > 85 else title
+        result.append({'id': row['id'], 'title': title})
+
+    if not result:
+        logger.warn('Not found anything of tag id {}'.format(tag_id))
+
+    return result
+
+
+def tag_guessing(tag_name):
+    tag_name = tag_name.lower()
+    tag_name = tag_name.replace(' ', '-')
+    logger.info('Trying to get tag_id of tag \'{0}\''.format(tag_name))
+    response = request('get', url='%s/%s' % (constant.TAG_URL, tag_name)).content
+    html = BeautifulSoup(response, 'html.parser')
+    first_item = html.find('div', attrs={'class': 'gallery'})
+    if not first_item:
+        logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name))
+        return
+
+    doujinshi_id = re.findall('(\d+)', first_item.a.attrs['href'])
+    if not doujinshi_id:
+        logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name))
+        return
+
+    ret = doujinshi_parser(doujinshi_id[0])
+    if 'tag' in ret and tag_name in ret['tag']:
+        tag_id = ret['tag'][tag_name]
+        logger.info('Tag id of tag \'{0}\' is {1}'.format(tag_name, tag_id))
+    else:
+        logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name))
+        return
+
+    return tag_id
+
+
 if __name__ == '__main__':
    print(doujinshi_parser("32271"))
--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals, print_function

+import sys
 import os
 import string
 import zipfile
@ -30,12 +31,14 @@ def urlparse(url):

    return urlparse(url)

+
 def readfile(path):
    loc = os.path.dirname(__file__)

    with open(os.path.join(loc, path), 'r') as file:
        return file.read()

+
 def generate_html(output_dir='.', doujinshi_obj=None):
    image_html = ''

@ -65,10 +68,17 @@ def generate_html(output_dir='.', doujinshi_obj=None):
        title = 'nHentai HTML Viewer'

    data = html.format(TITLE=title, IMAGES=image_html, SCRIPTS=js, STYLES=css)
-    with open(os.path.join(doujinshi_dir, 'index.html'), 'w') as f:
-        f.write(data)
+    try:
+        if sys.version_info < (3, 0):
+            with open(os.path.join(doujinshi_dir, 'index.html'), 'w') as f:
+                f.write(data)
+        else:
+            with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f:
+                f.write(data.encode('utf-8'))

-    logger.log(15, 'HTML Viewer has been write to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html')))
+        logger.log(15, 'HTML Viewer has been write to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html')))
+    except Exception as e:
+        logger.warning('Writen HTML Viewer failed ({})'.format(str(e)))


 def generate_cbz(output_dir='.', doujinshi_obj=None):
Author	SHA1	Message	Date
Ricter Z	16e8ce6f45	0.2.15	2018-08-12 22:48:26 +08:00
Ricter Z	0632826827	download by tagname #15	2018-08-12 22:43:36 +08:00
Ricter Z	8d2cd1974b	fix unicodeerror on python3	2018-08-12 18:04:36 +08:00
Ricter Zheng	8c176cd2ad	Update README.md	2018-08-11 09:47:32 +08:00
Ricter Zheng	f2c88e8ade	Update README.md	2018-08-11 09:46:46 +08:00
Ricter Zheng	2300744c5c	Update README.md	2018-08-11 09:46:04 +08:00
Ricter Zheng	7f30c84eff	Update README.md	2018-08-11 09:45:04 +08:00
Ricter Z	dda849b770	remove python3.7	2018-08-11 09:32:35 +08:00