Merge pull request #93 from Alocks/dev

Added language option and metadata serializer
This commit is contained in:
Ricter Zheng 2019-12-02 11:38:09 +08:00 committed by GitHub
commit 411d6c2f30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 54 additions and 9 deletions

View File

@ -74,6 +74,12 @@ Download by tag name:
nhentai --tag lolicon --download --page=2
Download by language:
.. code-block:: bash
nhentai --language english --download --page=2
Download by artist name:
.. code-block:: bash

View File

@ -55,6 +55,7 @@ def cmd_parser():
parser.add_option('--character', type='string', dest='character', action='store', help='download doujinshi by character')
parser.add_option('--parody', type='string', dest='parody', action='store', help='download doujinshi by parody')
parser.add_option('--group', type='string', dest='group', action='store', help='download doujinshi by group')
parser.add_option('--language', type='string', dest='language', action='store', help='download doujinshi by language')
parser.add_option('--favorites', '-F', action='store_true', dest='favorites',
help='list or download your favorites.')
@ -113,7 +114,7 @@ def cmd_parser():
if args.main_viewer and not args.id and not args.keyword and \
not args.tag and not args.artist and not args.character and \
not args.parody and not args.group and not args.favorites:
not args.parody and not args.group and not args.language and not args.favorites:
generate_main_html()
exit(0)
@ -174,13 +175,13 @@ def cmd_parser():
if (args.is_download or args.is_show) and not args.id and not args.keyword and \
not args.tag and not args.artist and not args.character and \
not args.parody and not args.group and not args.favorites:
not args.parody and not args.group and not args.language and not args.favorites:
logger.critical('Doujinshi id(s) are required for downloading')
parser.print_help()
exit(1)
if not args.keyword and not args.id and not args.tag and not args.artist and \
not args.character and not args.parody and not args.group and not args.favorites:
not args.character and not args.parody and not args.group and not args.language and not args.favorites:
parser.print_help()
exit(1)

View File

@ -70,6 +70,12 @@ def main():
if options.is_download and doujinshis:
doujinshi_ids = [i['id'] for i in doujinshis]
elif options.language:
doujinshis = tag_parser(options.group, max_page=options.max_page, index=5)
print_doujinshi(doujinshis)
if options.is_download and doujinshis:
doujinshi_ids = [i['id'] for i in doujinshis]
elif options.keyword:
doujinshis = search_parser(options.keyword, sorting=options.sorting, page=options.page)
print_doujinshi(doujinshis)

View File

@ -158,7 +158,7 @@ def doujinshi_parser(id_):
# gain information of the doujinshi
information_fields = doujinshi_info.find_all('div', attrs={'class': 'field-name'})
needed_fields = ['Characters', 'Artists', 'Languages', 'Tags']
needed_fields = ['Characters', 'Artists', 'Languages', 'Tags', 'Parodies', 'Groups', 'Categories']
for field in information_fields:
field_name = field.contents[0].strip().strip(':')
if field_name in needed_fields:
@ -166,6 +166,9 @@ def doujinshi_parser(id_):
field.find_all('a', attrs={'class': 'tag'})]
doujinshi[field_name.lower()] = ', '.join(data)
time_field = doujinshi_info.find('time')
if time_field.has_attr('datetime'):
doujinshi['date'] = time_field['datetime']
return doujinshi
@ -302,7 +305,7 @@ def __api_suspended_doujinshi_parser(id_):
doujinshi['pages'] = len(response['images']['pages'])
# gain information of the doujinshi
needed_fields = ['character', 'artist', 'language', 'tag']
needed_fields = ['character', 'artist', 'language', 'tag', 'parody', 'group', 'category']
for tag in response['tags']:
tag_type = tag['type']
if tag_type in needed_fields:

24
nhentai/serializer.py Normal file
View File

@ -0,0 +1,24 @@
# coding: utf-8
def serialize(doujinshi):
metadata = {'Title' : doujinshi.name,
'Subtitle' : doujinshi.info.subtitle}
if doujinshi.info.date:
metadata['Upload_Date'] = doujinshi.info.date
if doujinshi.info.parodies:
metadata['Parodies'] = [i.strip() for i in doujinshi.info.parodies.split(',')]
if doujinshi.info.characters:
metadata['Characters'] = [i.strip() for i in doujinshi.info.characters.split(',')]
if doujinshi.info.tags:
metadata['Tags'] = [i.strip() for i in doujinshi.info.tags.split(',')]
if doujinshi.info.artists:
metadata['Artists'] = [i.strip() for i in doujinshi.info.artists.split(',')]
if doujinshi.info.groups:
metadata['Groups'] = [i.strip() for i in doujinshi.info.groups.split(',')]
if doujinshi.info.languages:
metadata['Languages'] = [i.strip() for i in doujinshi.info.languages.split(',')]
metadata['Categories'] = doujinshi.info.categories
metadata['URL'] = doujinshi.url
metadata['Pages'] = doujinshi.pages
return metadata

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals, print_function
import sys
import re
import os
import json
import string
import zipfile
import shutil
@ -11,6 +12,7 @@ import requests
from nhentai import constant
from nhentai.logger import logger
from nhentai.serializer import serialize
def request(method, url, **kwargs):
@ -85,13 +87,16 @@ def generate_html(output_dir='.', doujinshi_obj=None):
js = readfile('viewer/scripts.js')
if doujinshi_obj is not None:
title = doujinshi_obj.name
metadata = serialize(doujinshi_obj)
if sys.version_info < (3, 0):
title = title.encode('utf-8')
metadata['Title'] = doujinshi_obj.name.encode('utf-8')
metadata['Subtitle'] = doujinshi_obj.info.subtitle.encode('utf-8')
with open(os.path.join(doujinshi_dir, 'metadata.json'), 'w') as f:
json.dump(metadata, f, separators=','':')
else:
title = 'nHentai HTML Viewer'
metadata= {'Title': 'nHentai HTML Viewer'}
data = html.format(TITLE=title, IMAGES=image_html, SCRIPTS=js, STYLES=css)
data = html.format(TITLE=metadata['Title'], IMAGES=image_html, SCRIPTS=js, STYLES=css)
try:
if sys.version_info < (3, 0):
with open(os.path.join(doujinshi_dir, 'index.html'), 'w') as f: