diff --git a/README.rst b/README.rst index acca1d4..83f8cb8 100644 --- a/README.rst +++ b/README.rst @@ -74,6 +74,12 @@ Download by tag name: nhentai --tag lolicon --download --page=2 +Download by language: + +.. code-block:: bash + + nhentai --language english --download --page=2 + Download by artist name: .. code-block:: bash @@ -84,13 +90,13 @@ Download by character name: .. code-block:: bash - nhentai --character kuro von einsbern --download + nhentai --character "kuro von einsbern" --download Download by parody name: .. code-block:: bash - nhentai --parody the idolmaster --download + nhentai --parody "the idolmaster" --download Download by group name: @@ -102,7 +108,7 @@ Download using multiple tags (--tag, --character, --paordy and --group supported .. code-block:: bash - nhentai --tag lolicon, teasing --artist tamano kedama, atte nanakusa + nhentai --tag "lolicon, teasing" --artist "tamano kedama, atte nanakusa" Download your favorites with delay: diff --git a/nhentai/cmdline.py b/nhentai/cmdline.py index f8e7679..f74a0e4 100644 --- a/nhentai/cmdline.py +++ b/nhentai/cmdline.py @@ -55,6 +55,7 @@ def cmd_parser(): parser.add_option('--character', type='string', dest='character', action='store', help='download doujinshi by character') parser.add_option('--parody', type='string', dest='parody', action='store', help='download doujinshi by parody') parser.add_option('--group', type='string', dest='group', action='store', help='download doujinshi by group') + parser.add_option('--language', type='string', dest='language', action='store', help='download doujinshi by language') parser.add_option('--favorites', '-F', action='store_true', dest='favorites', help='list or download your favorites.') @@ -115,7 +116,7 @@ def cmd_parser(): if args.main_viewer and not args.id and not args.keyword and \ not args.tag and not args.artist and not args.character and \ - not args.parody and not args.group and not args.favorites: + not args.parody and not args.group and not args.language and not args.favorites: generate_main_html() exit(0) @@ -176,13 +177,13 @@ def cmd_parser(): if (args.is_download or args.is_show) and not args.id and not args.keyword and \ not args.tag and not args.artist and not args.character and \ - not args.parody and not args.group and not args.favorites: + not args.parody and not args.group and not args.language and not args.favorites: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(1) if not args.keyword and not args.id and not args.tag and not args.artist and \ - not args.character and not args.parody and not args.group and not args.favorites: + not args.character and not args.parody and not args.group and not args.language and not args.favorites: parser.print_help() exit(1) diff --git a/nhentai/command.py b/nhentai/command.py index deb9071..40f924f 100644 --- a/nhentai/command.py +++ b/nhentai/command.py @@ -70,6 +70,12 @@ def main(): if options.is_download and doujinshis: doujinshi_ids = [i['id'] for i in doujinshis] + elif options.language: + doujinshis = tag_parser(options.language, max_page=options.max_page, index=5) + print_doujinshi(doujinshis) + if options.is_download and doujinshis: + doujinshi_ids = [i['id'] for i in doujinshis] + elif options.keyword: doujinshis = search_parser(options.keyword, sorting=options.sorting, page=options.page) print_doujinshi(doujinshis) diff --git a/nhentai/constant.py b/nhentai/constant.py index ecbee17..5641618 100644 --- a/nhentai/constant.py +++ b/nhentai/constant.py @@ -21,7 +21,8 @@ TAG_URL = ['%s/tag' % BASE_URL, '%s/artist' % BASE_URL, '%s/character' % BASE_URL, '%s/parody' % BASE_URL, - '%s/group' % BASE_URL] + '%s/group' % BASE_URL, + '%s/language' % BASE_URL] TAG_API_URL = '%s/api/galleries/tagged' % BASE_URL LOGIN_URL = '%s/login/' % BASE_URL diff --git a/nhentai/parser.py b/nhentai/parser.py index 212461f..0c43882 100644 --- a/nhentai/parser.py +++ b/nhentai/parser.py @@ -163,7 +163,7 @@ def doujinshi_parser(id_): doujinshi['subtitle'] = subtitle.text if subtitle else '' doujinshi_cover = html.find('div', attrs={'id': 'cover'}) - img_id = re.search('/galleries/([\d]+)/cover\.(jpg|png)$', doujinshi_cover.a.img.attrs['data-src']) + img_id = re.search('/galleries/([\d]+)/cover\.(jpg|png|gif)$', doujinshi_cover.a.img.attrs['data-src']) ext = [] for i in html.find_all('div', attrs={'class': 'thumb-container'}): @@ -187,7 +187,7 @@ def doujinshi_parser(id_): # gain information of the doujinshi information_fields = doujinshi_info.find_all('div', attrs={'class': 'field-name'}) - needed_fields = ['Characters', 'Artists', 'Languages', 'Tags'] + needed_fields = ['Characters', 'Artists', 'Languages', 'Tags', 'Parodies', 'Groups', 'Categories'] for field in information_fields: field_name = field.contents[0].strip().strip(':') if field_name in needed_fields: @@ -195,6 +195,9 @@ def doujinshi_parser(id_): field.find_all('a', attrs={'class': 'tag'})] doujinshi[field_name.lower()] = ', '.join(data) + time_field = doujinshi_info.find('time') + if time_field.has_attr('datetime'): + doujinshi['date'] = time_field['datetime'] return doujinshi @@ -224,7 +227,7 @@ def tag_parser(tag_name, sorting='date', max_page=1, index=0): if ',' in tag_name: tag_name = [i.strip().replace(' ', '-') for i in tag_name.split(',')] else: - tag_name = tag_name.replace(' ', '-') + tag_name = tag_name.strip().replace(' ', '-') if sorting == 'date': sorting = '' @@ -331,7 +334,7 @@ def __api_suspended_doujinshi_parser(id_): doujinshi['pages'] = len(response['images']['pages']) # gain information of the doujinshi - needed_fields = ['character', 'artist', 'language', 'tag'] + needed_fields = ['character', 'artist', 'language', 'tag', 'parody', 'group', 'category'] for tag in response['tags']: tag_type = tag['type'] if tag_type in needed_fields: diff --git a/nhentai/serializer.py b/nhentai/serializer.py new file mode 100644 index 0000000..357da6c --- /dev/null +++ b/nhentai/serializer.py @@ -0,0 +1,79 @@ +# coding: utf-8 +import json +import os + +def serialize(doujinshi, dir): + metadata = {'title' : doujinshi.name, + 'subtitle' : doujinshi.info.subtitle} + if doujinshi.info.date: + metadata['upload_date'] = doujinshi.info.date + if doujinshi.info.parodies: + metadata['parody'] = [i.strip() for i in doujinshi.info.parodies.split(',')] + if doujinshi.info.characters: + metadata['character'] = [i.strip() for i in doujinshi.info.characters.split(',')] + if doujinshi.info.tags: + metadata['tag'] = [i.strip() for i in doujinshi.info.tags.split(',')] + if doujinshi.info.artists: + metadata['artist'] = [i.strip() for i in doujinshi.info.artists.split(',')] + if doujinshi.info.groups: + metadata['group'] = [i.strip() for i in doujinshi.info.groups.split(',')] + if doujinshi.info.languages: + metadata['language'] = [i.strip() for i in doujinshi.info.languages.split(',')] + metadata['category'] = doujinshi.info.categories + metadata['URL'] = doujinshi.url + metadata['Pages'] = doujinshi.pages + with open(os.path.join(dir, 'metadata.json'), 'w', encoding="raw_unicode_escape") as f: + json.dump(metadata, f, separators=','':') + + +def merge_json(): + lst = [] + output_dir = "./" + os.chdir(output_dir) + doujinshi_dirs = next(os.walk('.'))[1] + for folder in doujinshi_dirs: + files = os.listdir(folder) + if 'metadata.json' not in files: + continue + data_folder = output_dir + folder + '/' + 'metadata.json' + json_file = open(data_folder, 'r') + json_dict = {} + json_dict = json.load(json_file) + json_dict['Folder'] = folder + lst.append(json_dict) + return lst + + +def serialize_unique(lst): + dictionary = {} + parody = [] + character = [] + tag = [] + artist = [] + group = [] + for dic in lst: + if 'parody' in dic: + parody.extend([i for i in dic['parody']]) + if 'character' in dic: + character.extend([i for i in dic['character']]) + if 'tag' in dic: + tag.extend([i for i in dic['tag']]) + if 'artist' in dic: + artist.extend([i for i in dic['artist']]) + if 'group' in dic: + group.extend([i for i in dic['group']]) + dictionary['parody'] = list(set(parody)) + dictionary['character'] = list(set(character)) + dictionary['tag'] = list(set(tag)) + dictionary['artist'] = list(set(artist)) + dictionary['group'] = list(set(group)) + return dictionary + + +def set_js_database(): + with open('data.js', 'w') as f: + indexed_json = merge_json() + unique_json = json.dumps(serialize_unique(indexed_json), separators=','':') + indexed_json = json.dumps(indexed_json, separators=','':') + f.write('var data = '+indexed_json) + f.write(';\nvar tags = '+unique_json) diff --git a/nhentai/utils.py b/nhentai/utils.py index 815c690..16f8bdf 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -11,6 +11,7 @@ import requests from nhentai import constant from nhentai.logger import logger +from nhentai.serializer import serialize, set_js_database def request(method, url, **kwargs): @@ -79,19 +80,19 @@ def generate_html(output_dir='.', doujinshi_obj=None): image_html += '\n'\ .format(image) - html = readfile('viewer/index.html') css = readfile('viewer/styles.css') js = readfile('viewer/scripts.js') if doujinshi_obj is not None: - title = doujinshi_obj.name + serialize(doujinshi_obj, doujinshi_dir) + name = doujinshi_obj.name if sys.version_info < (3, 0): - title = title.encode('utf-8') + name = doujinshi_obj.name.encode('utf-8') else: - title = 'nHentai HTML Viewer' + name = {'title': 'nHentai HTML Viewer'} - data = html.format(TITLE=title, IMAGES=image_html, SCRIPTS=js, STYLES=css) + data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css) try: if sys.version_info < (3, 0): with open(os.path.join(doujinshi_dir, 'index.html'), 'w') as f: @@ -112,10 +113,12 @@ def generate_main_html(output_dir='./'): Default output folder will be the CLI path. """ - count = 0 image_html = '' + main = readfile('viewer/main.html') css = readfile('viewer/main.css') + js = readfile('viewer/main.js') + element = '\n\