Merge pull request #93 from Alocks/dev

Added language option and metadata serializer
2026-05-02 04:48:10 +02:00 · 2019-12-02 11:38:09 +08:00
parent c0c7b33909 88c0c1e021
commit 411d6c2f30
6 changed files with 54 additions and 9 deletions
--- a/README.rst
+++ b/README.rst
@@ -74,6 +74,12 @@ Download by tag name:

    nhentai --tag lolicon --download --page=2

+Download by language:
+
+.. code-block:: bash
+
+    nhentai --language english --download --page=2
+
 Download by artist name:

 .. code-block:: bash
--- a/nhentai/cmdline.py
+++ b/nhentai/cmdline.py
@@ -55,6 +55,7 @@ def cmd_parser():
    parser.add_option('--character', type='string', dest='character', action='store', help='download doujinshi by character')
    parser.add_option('--parody', type='string', dest='parody', action='store', help='download doujinshi by parody')
    parser.add_option('--group', type='string', dest='group', action='store', help='download doujinshi by group')
+    parser.add_option('--language', type='string', dest='language', action='store', help='download doujinshi by language')
    parser.add_option('--favorites', '-F', action='store_true', dest='favorites',
                      help='list or download your favorites.')

@@ -113,7 +114,7 @@ def cmd_parser():

    if args.main_viewer and not args.id and not args.keyword and \
            not args.tag and not args.artist and not args.character and \
-            not args.parody and not args.group and not args.favorites:
+            not args.parody and not args.group and not args.language and not args.favorites:
        generate_main_html()
        exit(0)

@@ -174,13 +175,13 @@ def cmd_parser():

    if (args.is_download or args.is_show) and not args.id and not args.keyword and \
            not args.tag and not args.artist and not args.character and \
-            not args.parody and not args.group and not args.favorites:
+            not args.parody and not args.group and not args.language and not args.favorites:
        logger.critical('Doujinshi id(s) are required for downloading')
        parser.print_help()
        exit(1)

    if not args.keyword and not args.id and not args.tag and not args.artist and \
-            not args.character and not args.parody and not args.group and not args.favorites:
+            not args.character and not args.parody and not args.group and not args.language and not args.favorites:
        parser.print_help()
        exit(1)

--- a/nhentai/command.py
+++ b/nhentai/command.py
@@ -70,6 +70,12 @@ def main():
        if options.is_download and doujinshis:
            doujinshi_ids = [i['id'] for i in doujinshis]

+    elif options.language:
+        doujinshis = tag_parser(options.group, max_page=options.max_page, index=5)
+        print_doujinshi(doujinshis)
+        if options.is_download and doujinshis:
+            doujinshi_ids = [i['id'] for i in doujinshis]
+
    elif options.keyword:
        doujinshis = search_parser(options.keyword, sorting=options.sorting, page=options.page)
        print_doujinshi(doujinshis)
--- a/nhentai/parser.py
+++ b/nhentai/parser.py
@@ -158,7 +158,7 @@ def doujinshi_parser(id_):

    # gain information of the doujinshi
    information_fields = doujinshi_info.find_all('div', attrs={'class': 'field-name'})
-    needed_fields = ['Characters', 'Artists', 'Languages', 'Tags']
+    needed_fields = ['Characters', 'Artists', 'Languages', 'Tags', 'Parodies', 'Groups', 'Categories']
    for field in information_fields:
        field_name = field.contents[0].strip().strip(':')
        if field_name in needed_fields:
@@ -166,6 +166,9 @@ def doujinshi_parser(id_):
                    field.find_all('a', attrs={'class': 'tag'})]
            doujinshi[field_name.lower()] = ', '.join(data)

+    time_field = doujinshi_info.find('time')
+    if time_field.has_attr('datetime'):
+        doujinshi['date'] = time_field['datetime']
    return doujinshi


@@ -302,7 +305,7 @@ def __api_suspended_doujinshi_parser(id_):
    doujinshi['pages'] = len(response['images']['pages'])

    # gain information of the doujinshi
-    needed_fields = ['character', 'artist', 'language', 'tag']
+    needed_fields = ['character', 'artist', 'language', 'tag', 'parody', 'group', 'category']
    for tag in response['tags']:
        tag_type = tag['type']
        if tag_type in needed_fields:
--- a/nhentai/serializer.py
+++ b/nhentai/serializer.py
@@ -0,0 +1,24 @@
+# coding: utf-8
+
+
+def serialize(doujinshi):
+    metadata = {'Title'    : doujinshi.name,
+                'Subtitle' : doujinshi.info.subtitle}
+    if doujinshi.info.date:
+        metadata['Upload_Date'] = doujinshi.info.date
+    if doujinshi.info.parodies:
+        metadata['Parodies']    = [i.strip() for i in doujinshi.info.parodies.split(',')]
+    if doujinshi.info.characters:
+        metadata['Characters']  = [i.strip() for i in doujinshi.info.characters.split(',')]
+    if doujinshi.info.tags:
+        metadata['Tags']        = [i.strip() for i in doujinshi.info.tags.split(',')]
+    if doujinshi.info.artists:
+        metadata['Artists']     = [i.strip() for i in doujinshi.info.artists.split(',')]
+    if doujinshi.info.groups:
+        metadata['Groups']      = [i.strip() for i in doujinshi.info.groups.split(',')]
+    if doujinshi.info.languages:
+        metadata['Languages']   = [i.strip() for i in doujinshi.info.languages.split(',')]
+    metadata['Categories']      = doujinshi.info.categories
+    metadata['URL']             = doujinshi.url
+    metadata['Pages']           = doujinshi.pages
+    return metadata
--- a/nhentai/utils.py
+++ b/nhentai/utils.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals, print_function
 import sys
 import re
 import os
+import json
 import string
 import zipfile
 import shutil
@@ -11,6 +12,7 @@ import requests

 from nhentai import constant
 from nhentai.logger import logger
+from nhentai.serializer import serialize


 def request(method, url, **kwargs):
@@ -85,13 +87,16 @@ def generate_html(output_dir='.', doujinshi_obj=None):
    js = readfile('viewer/scripts.js')

    if doujinshi_obj is not None:
-        title = doujinshi_obj.name
+        metadata = serialize(doujinshi_obj)
        if sys.version_info < (3, 0):
-            title = title.encode('utf-8')
+            metadata['Title'] = doujinshi_obj.name.encode('utf-8')
+            metadata['Subtitle'] = doujinshi_obj.info.subtitle.encode('utf-8')
+        with open(os.path.join(doujinshi_dir, 'metadata.json'), 'w') as f:
+            json.dump(metadata, f, separators=','':')
    else:
-        title = 'nHentai HTML Viewer'
+        metadata= {'Title': 'nHentai HTML Viewer'}

-    data = html.format(TITLE=title, IMAGES=image_html, SCRIPTS=js, STYLES=css)
+    data = html.format(TITLE=metadata['Title'], IMAGES=image_html, SCRIPTS=js, STYLES=css)
    try:
        if sys.version_info < (3, 0):
            with open(os.path.join(doujinshi_dir, 'index.html'), 'w') as f: