From d35190f9d03c141aec4e16c4ea87caf9b36dd859 Mon Sep 17 00:00:00 2001 From: Anh Nhan Nguyen Date: Tue, 21 Apr 2020 13:23:50 +0200 Subject: [PATCH 01/18] write ComicInfo.xml for CBZ files --- nhentai/serializer.py | 46 ++++++++++++++++++++++++++++++++++++++++++- nhentai/utils.py | 17 ++++++++-------- requirements.txt | 1 + 3 files changed, 55 insertions(+), 9 deletions(-) diff --git a/nhentai/serializer.py b/nhentai/serializer.py index 8e7db1d..fc33cda 100644 --- a/nhentai/serializer.py +++ b/nhentai/serializer.py @@ -1,9 +1,11 @@ # coding: utf-8 import json import os +from iso8601 import parse_date +from xml.sax.saxutils import escape -def serialize(doujinshi, dir): +def serialize_json(doujinshi, dir): metadata = {'title': doujinshi.name, 'subtitle': doujinshi.info.subtitle} if doujinshi.info.date: @@ -28,6 +30,48 @@ def serialize(doujinshi, dir): json.dump(metadata, f, separators=','':') +def serialize_comicxml(doujinshi, dir): + with open(os.path.join(dir, 'ComicInfo.xml'), 'w') as f: + f.write('\n') + f.write('\n') + + xml_write_simple_tag(f, 'Manga', 'Yes') + + xml_write_simple_tag(f, 'Title', doujinshi.name) + xml_write_simple_tag(f, 'Summary', doujinshi.info.subtitle) + xml_write_simple_tag(f, 'PageCount', doujinshi.pages) + xml_write_simple_tag(f, 'URL', doujinshi.url) + xml_write_simple_tag(f, 'NhentaiId', doujinshi.id) + xml_write_simple_tag(f, 'Genre', doujinshi.info.categories) + + xml_write_simple_tag(f, 'BlackAndWhite', 'No' if doujinshi.info.tags and 'full color' in doujinshi.info.tags else 'Yes') + + if doujinshi.info.date: + dt = parse_date(doujinshi.info.date) + xml_write_simple_tag(f, 'Year', dt.year) + xml_write_simple_tag(f, 'Month', dt.month) + xml_write_simple_tag(f, 'Day', dt.day) + if doujinshi.info.parodies: + xml_write_simple_tag(f, 'Series', doujinshi.info.parodies) + if doujinshi.info.characters: + xml_write_simple_tag(f, 'Characters', doujinshi.info.characters) + if doujinshi.info.tags: + xml_write_simple_tag(f, 'Tags', doujinshi.info.tags) + if doujinshi.info.artists: + xml_write_simple_tag(f, 'Writer', ' & '.join([i.strip() for i in doujinshi.info.artists.split(',')])) + # if doujinshi.info.groups: + # metadata['group'] = [i.strip() for i in doujinshi.info.groups.split(',')] + if doujinshi.info.languages: + languages = [i.strip() for i in doujinshi.info.languages.split(',')] + xml_write_simple_tag(f, 'Translated', 'Yes' if 'translated' in languages else 'No') + [xml_write_simple_tag(f, 'Language', i) for i in languages if i != 'translated'] + + f.write('') + + +def xml_write_simple_tag(f, name, val, indent=1): + f.write(f'{" " * indent}<{name}>{escape(str(val))}\n') + def merge_json(): lst = [] output_dir = "./" diff --git a/nhentai/utils.py b/nhentai/utils.py index f9f5946..421abb8 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -12,7 +12,7 @@ import sqlite3 from nhentai import constant from nhentai.logger import logger -from nhentai.serializer import serialize, set_js_database +from nhentai.serializer import serialize_json, serialize_comicxml, set_js_database def request(method, url, **kwargs): @@ -86,7 +86,7 @@ def generate_html(output_dir='.', doujinshi_obj=None): js = readfile('viewer/scripts.js') if doujinshi_obj is not None: - serialize(doujinshi_obj, doujinshi_dir) + serialize_json(doujinshi_obj, doujinshi_dir) name = doujinshi_obj.name if sys.version_info < (3, 0): name = doujinshi_obj.name.encode('utf-8') @@ -102,9 +102,9 @@ def generate_html(output_dir='.', doujinshi_obj=None): with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f: f.write(data.encode('utf-8')) - logger.log(15, 'HTML Viewer has been write to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html'))) + logger.log(15, 'HTML Viewer has been written to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html'))) except Exception as e: - logger.warning('Writen HTML Viewer failed ({})'.format(str(e))) + logger.warning('Writing HTML Viewer failed ({})'.format(str(e))) def generate_main_html(output_dir='./'): @@ -150,7 +150,7 @@ def generate_main_html(output_dir='./'): image_html += element.format(FOLDER=folder, IMAGE=image, TITLE=title) if image_html == '': - logger.warning('None index.html found, --gen-main paused.') + logger.warning('No index.html found, --gen-main paused.') return try: data = main.format(STYLES=css, SCRIPTS=js, PICTURE=image_html) @@ -163,14 +163,15 @@ def generate_main_html(output_dir='./'): shutil.copy(os.path.dirname(__file__)+'/viewer/logo.png', './') set_js_database() logger.log( - 15, 'Main Viewer has been write to \'{0}main.html\''.format(output_dir)) + 15, 'Main Viewer has been written to \'{0}main.html\''.format(output_dir)) except Exception as e: - logger.warning('Writen Main Viewer failed ({})'.format(str(e))) + logger.warning('Writing Main Viewer failed ({})'.format(str(e))) def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False): if doujinshi_obj is not None: doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) + serialize_comicxml(doujinshi_obj, doujinshi_dir) cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), '{}.cbz'.format(doujinshi_obj.filename)) else: cbz_filename = './doujinshi.cbz' @@ -188,7 +189,7 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False): if rm_origin_dir: shutil.rmtree(doujinshi_dir, ignore_errors=True) - logger.log(15, 'Comic Book CBZ file has been write to \'{0}\''.format(doujinshi_dir)) + logger.log(15, 'Comic Book CBZ file has been written to \'{0}\''.format(doujinshi_dir)) def format_filename(s): diff --git a/requirements.txt b/requirements.txt index 2b55b04..3fbad9d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ BeautifulSoup4>=4.0.0 threadpool>=1.2.7 tabulate>=0.7.5 future>=0.15.2 +iso8601 >= 0.1 From 5552d39337197ba5f0e4137c54dc52dff8e178d4 Mon Sep 17 00:00:00 2001 From: Anh Nhan Nguyen Date: Tue, 21 Apr 2020 14:54:04 +0200 Subject: [PATCH 02/18] fix --artist, --character, --parody, --group --- nhentai/command.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/nhentai/command.py b/nhentai/command.py index 4afabef..ff962d7 100644 --- a/nhentai/command.py +++ b/nhentai/command.py @@ -28,6 +28,7 @@ def main(): check_cookie() index = 0 + index_value = None doujinshis = [] doujinshi_ids = [] doujinshi_list = [] @@ -43,18 +44,23 @@ def main(): elif options.artist: index = 1 + index_value = options.artist elif options.character: index = 2 + index_value = options.character elif options.parody: index = 3 + index_value = options.parody elif options.group: index = 4 + index_value = options.group elif options.language: index = 5 + index_value = options.language elif options.keyword: doujinshis = search_parser(options.keyword, sorting=options.sorting, page=options.page) @@ -63,7 +69,7 @@ def main(): doujinshi_ids = options.id if index: - doujinshis = tag_parser(options.language, max_page=options.max_page, index=index) + doujinshis = tag_parser(index_value, max_page=options.max_page, index=index) print_doujinshi(doujinshis) if options.is_download and doujinshis: From 07d804b047b396063680c3fd67dca431b245778e Mon Sep 17 00:00:00 2001 From: Anh Nhan Nguyen Date: Wed, 22 Apr 2020 06:19:12 +0200 Subject: [PATCH 03/18] move ComicInfo.xml behind the --comic-info flag --- nhentai/cmdline.py | 4 +++- nhentai/command.py | 2 +- nhentai/utils.py | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/nhentai/cmdline.py b/nhentai/cmdline.py index 9decc88..13a8692 100644 --- a/nhentai/cmdline.py +++ b/nhentai/cmdline.py @@ -38,7 +38,7 @@ def banner(): def cmd_parser(): parser = OptionParser('\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' - '\n nhentai --file [filename]' + '\n nhentai --file [filename]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') # operation options @@ -95,6 +95,8 @@ def cmd_parser(): help='generate a main viewer contain all the doujin in the folder') parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true', help='generate Comic Book CBZ File') + parser.add_option('--comic-info', dest='write_comic_info', action='store_true', + help='when generating Comic Book CBZ File, also write ComicInfo.xml') parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, help='remove downloaded doujinshi dir when generated CBZ file.') diff --git a/nhentai/command.py b/nhentai/command.py index ff962d7..6f821d3 100644 --- a/nhentai/command.py +++ b/nhentai/command.py @@ -109,7 +109,7 @@ def main(): if not options.is_nohtml and not options.is_cbz: generate_html(options.output_dir, doujinshi) elif options.is_cbz: - generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir) + generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir, options.write_comic_info) if options.main_viewer: generate_main_html(options.output_dir) diff --git a/nhentai/utils.py b/nhentai/utils.py index 421abb8..6a22d65 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -168,10 +168,11 @@ def generate_main_html(output_dir='./'): logger.warning('Writing Main Viewer failed ({})'.format(str(e))) -def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False): +def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=False): if doujinshi_obj is not None: doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) - serialize_comicxml(doujinshi_obj, doujinshi_dir) + if write_comic_info: + serialize_comicxml(doujinshi_obj, doujinshi_dir) cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), '{}.cbz'.format(doujinshi_obj.filename)) else: cbz_filename = './doujinshi.cbz' From 2f7bb59e586d69fdf0e9f44475481b9170b1ef41 Mon Sep 17 00:00:00 2001 From: Francesco <55460785+BachoSeven@users.noreply.github.com> Date: Sat, 25 Apr 2020 18:04:50 +0200 Subject: [PATCH 04/18] Update README.rst --- README.rst | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 83f8cb8..0927716 100644 --- a/README.rst +++ b/README.rst @@ -19,15 +19,28 @@ nhentai nHentai is a CLI tool for downloading doujinshi from -============ -Installation -============ +=================== +Manual Installation +=================== .. code-block:: git clone https://github.com/RicterZ/nhentai cd nhentai python setup.py install - + +================== +Installation (pip) +================== +Alternatively, install from PyPI with pip +.. code-block:: + + pip install nhentai + +For a self-contained installation, use _Pipx: +.. code-block:: + + pipx install nhentai + ===================== Installation (Gentoo) ===================== @@ -222,3 +235,4 @@ Set `NHENTAI` env var to your nhentai mirror. .. |license| image:: https://img.shields.io/github/license/ricterz/nhentai.svg :target: https://github.com/RicterZ/nhentai/blob/master/LICENSE +.. _Pipx: https://github.com/pipxproject/pipx/ From a2cd0250278cc43ce76d6e248d75ff75c837f397 Mon Sep 17 00:00:00 2001 From: Francesco Minnocci Date: Sat, 25 Apr 2020 18:18:48 +0200 Subject: [PATCH 05/18] updated README.rst --- README.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 0927716..2b9a800 100644 --- a/README.rst +++ b/README.rst @@ -31,16 +31,18 @@ Manual Installation ================== Installation (pip) ================== -Alternatively, install from PyPI with pip +Alternatively, install from PyPI with pip: + .. code-block:: pip install nhentai -For a self-contained installation, use _Pipx: +For a self-contained installation, use `Pipx `_: + .. code-block:: pipx install nhentai - + ===================== Installation (Gentoo) ===================== @@ -63,6 +65,8 @@ Set your nhentai cookie against captcha: nhentai --cookie "YOUR COOKIE FROM nhentai.net" +**NOTE**: The format of the cookie is `"csrftoken=CRSFTOKEN; sessionid=SESSIONID"` + Download specified doujinshi: .. code-block:: bash @@ -196,7 +200,7 @@ nHentai Mirror If you want to use a mirror, you should set up a reverse proxy of `nhentai.net` and `i.nhentai.net`. For example: -.. code-block:: +.. code-block:: i.h.loli.club -> i.nhentai.net h.loli.club -> nhentai.net @@ -235,4 +239,3 @@ Set `NHENTAI` env var to your nhentai mirror. .. |license| image:: https://img.shields.io/github/license/ricterz/nhentai.svg :target: https://github.com/RicterZ/nhentai/blob/master/LICENSE -.. _Pipx: https://github.com/pipxproject/pipx/ From 80713d2e00a916aea56560eb4e11dfdc22235da1 Mon Sep 17 00:00:00 2001 From: Francesco Minnocci Date: Sat, 25 Apr 2020 18:19:44 +0200 Subject: [PATCH 06/18] updated README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 2b9a800..dc167f2 100644 --- a/README.rst +++ b/README.rst @@ -65,7 +65,7 @@ Set your nhentai cookie against captcha: nhentai --cookie "YOUR COOKIE FROM nhentai.net" -**NOTE**: The format of the cookie is `"csrftoken=CRSFTOKEN; sessionid=SESSIONID"` +**NOTE**: The format of the cookie is `"csrftoken=TOKEN; sessionid=ID"` Download specified doujinshi: From e2a1d79b1b01cd7dbad6fb571f3559aa9db7fb75 Mon Sep 17 00:00:00 2001 From: Ricter Z Date: Fri, 1 May 2020 22:18:03 +0800 Subject: [PATCH 07/18] fix #117 --- nhentai/serializer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nhentai/serializer.py b/nhentai/serializer.py index fc33cda..10e8ccd 100644 --- a/nhentai/serializer.py +++ b/nhentai/serializer.py @@ -33,7 +33,8 @@ def serialize_json(doujinshi, dir): def serialize_comicxml(doujinshi, dir): with open(os.path.join(dir, 'ComicInfo.xml'), 'w') as f: f.write('\n') - f.write('\n') + f.write('\n') xml_write_simple_tag(f, 'Manga', 'Yes') @@ -70,7 +71,8 @@ def serialize_comicxml(doujinshi, dir): def xml_write_simple_tag(f, name, val, indent=1): - f.write(f'{" " * indent}<{name}>{escape(str(val))}\n') + f.write('{}<{}>{}\n'.format(' ' * indent, name, escape(str(val)), name)) + def merge_json(): lst = [] From 7e4dff8fec6e0ad45881b6e7ab2f8c7821375e68 Mon Sep 17 00:00:00 2001 From: Ricter Z Date: Fri, 1 May 2020 22:20:55 +0800 Subject: [PATCH 08/18] move import statement to function --- nhentai/serializer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nhentai/serializer.py b/nhentai/serializer.py index 10e8ccd..d8839a5 100644 --- a/nhentai/serializer.py +++ b/nhentai/serializer.py @@ -1,7 +1,6 @@ # coding: utf-8 import json import os -from iso8601 import parse_date from xml.sax.saxutils import escape @@ -31,6 +30,7 @@ def serialize_json(doujinshi, dir): def serialize_comicxml(doujinshi, dir): + from iso8601 import parse_date with open(os.path.join(dir, 'ComicInfo.xml'), 'w') as f: f.write('\n') f.write(' Date: Sat, 2 May 2020 14:23:31 -0300 Subject: [PATCH 09/18] removed all --tag commands since --search API is working again, now --language is a setting, cleaned some code --- nhentai/cmdline.py | 42 ++++++++++++++---------- nhentai/command.py | 39 ++++------------------- nhentai/constant.py | 11 ++----- nhentai/parser.py | 78 ++++++--------------------------------------- 4 files changed, 45 insertions(+), 125 deletions(-) diff --git a/nhentai/cmdline.py b/nhentai/cmdline.py index 13a8692..8e06bfe 100644 --- a/nhentai/cmdline.py +++ b/nhentai/cmdline.py @@ -38,7 +38,7 @@ def banner(): def cmd_parser(): parser = OptionParser('\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' - '\n nhentai --file [filename]' + '\n nhentai --file [filename]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') # operation options @@ -50,14 +50,6 @@ def cmd_parser(): parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') parser.add_option('--search', '-s', type='string', dest='keyword', action='store', help='search doujinshi by keyword') - parser.add_option('--tag', type='string', dest='tag', action='store', help='download doujinshi by tag') - parser.add_option('--artist', type='string', dest='artist', action='store', help='download doujinshi by artist') - parser.add_option('--character', type='string', dest='character', action='store', - help='download doujinshi by character') - parser.add_option('--parody', type='string', dest='parody', action='store', help='download doujinshi by parody') - parser.add_option('--group', type='string', dest='group', action='store', help='download doujinshi by group') - parser.add_option('--language', type='string', dest='language', action='store', - help='download doujinshi by language') parser.add_option('--favorites', '-F', action='store_true', dest='favorites', help='list or download your favorites.') @@ -95,14 +87,14 @@ def cmd_parser(): help='generate a main viewer contain all the doujin in the folder') parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true', help='generate Comic Book CBZ File') - parser.add_option('--comic-info', dest='write_comic_info', action='store_true', - help='when generating Comic Book CBZ File, also write ComicInfo.xml') parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, help='remove downloaded doujinshi dir when generated CBZ file.') # nhentai options parser.add_option('--cookie', type='str', dest='cookie', action='store', help='set cookie of nhentai to bypass Google recaptcha') + parser.add_option('--language', type='str', dest='language', action='store', + help='set default language to parse doujinshis') parser.add_option('--save-download-history', dest='is_save_download_history', action='store_true', default=False, help='save downloaded doujinshis, whose will be skipped if you re-download them') parser.add_option('--clean-download-history', action='store_true', default=False, dest='clean_download_history', @@ -123,8 +115,7 @@ def cmd_parser(): exit(0) if args.main_viewer and not args.id and not args.keyword and \ - not args.tag and not args.artist and not args.character and \ - not args.parody and not args.group and not args.language and not args.favorites: + not args.tag and not args.favorites: generate_main_html() exit(0) @@ -153,6 +144,25 @@ def cmd_parser(): logger.info('Cookie saved.') exit(0) + if os.path.exists(constant.NHENTAI_LANGUAGE) and not args.language: + with open(constant.NHENTAI_LANGUAGE, 'r') as f: + constant.LANGUAGE = f.read() + args.language = f.read() + + if args.language: + try: + if not os.path.exists(constant.NHENTAI_HOME): + os.mkdir(constant.NHENTAI_HOME) + + with open(constant.NHENTAI_LANGUAGE, 'w') as f: + f.write(args.language) + except Exception as e: + logger.error('Cannot create NHENTAI_HOME: {}'.format(str(e))) + exit(1) + + logger.info('Default language now is {}.'.format(args.language)) + exit(0) + if os.path.exists(constant.NHENTAI_PROXY): with open(constant.NHENTAI_PROXY, 'r') as f: link = f.read() @@ -192,14 +202,12 @@ def cmd_parser(): args.id = set(int(i) for i in _ if i.isdigit()) if (args.is_download or args.is_show) and not args.id and not args.keyword and \ - not args.tag and not args.artist and not args.character and \ - not args.parody and not args.group and not args.language and not args.favorites: + not args.tag and not args.favorites: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(1) - if not args.keyword and not args.id and not args.tag and not args.artist and \ - not args.character and not args.parody and not args.group and not args.language and not args.favorites: + if not args.keyword and not args.id and not args.favorites: parser.print_help() exit(1) diff --git a/nhentai/command.py b/nhentai/command.py index 6f821d3..f1ecf9f 100644 --- a/nhentai/command.py +++ b/nhentai/command.py @@ -21,14 +21,12 @@ def main(): from nhentai.constant import PROXY # constant.PROXY will be changed after cmd_parser() - if PROXY != {}: + if PROXY: logger.info('Using proxy: {0}'.format(PROXY)) # check your cookie check_cookie() - index = 0 - index_value = None doujinshis = [] doujinshi_ids = [] doujinshi_list = [] @@ -39,38 +37,16 @@ def main(): doujinshis = favorites_parser(options.page_range) - elif options.tag: - doujinshis = tag_parser(options.tag, sorting=options.sorting, max_page=options.max_page) - - elif options.artist: - index = 1 - index_value = options.artist - - elif options.character: - index = 2 - index_value = options.character - - elif options.parody: - index = 3 - index_value = options.parody - - elif options.group: - index = 4 - index_value = options.group - - elif options.language: - index = 5 - index_value = options.language - elif options.keyword: + from nhentai.constant import LANGUAGE + if LANGUAGE: + logger.info('Using deafult language: {0}'.format(LANGUAGE)) + options.keyword += ', language:{}'.format(LANGUAGE) doujinshis = search_parser(options.keyword, sorting=options.sorting, page=options.page) elif not doujinshi_ids: doujinshi_ids = options.id - if index: - doujinshis = tag_parser(index_value, max_page=options.max_page, index=index) - print_doujinshi(doujinshis) if options.is_download and doujinshis: doujinshi_ids = [i['id'] for i in doujinshis] @@ -109,7 +85,7 @@ def main(): if not options.is_nohtml and not options.is_cbz: generate_html(options.output_dir, doujinshi) elif options.is_cbz: - generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir, options.write_comic_info) + generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir) if options.main_viewer: generate_main_html(options.output_dir) @@ -125,6 +101,5 @@ def main(): signal.signal(signal.SIGINT, signal_handler) - if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/nhentai/constant.py b/nhentai/constant.py index c5c7c53..c6b40c4 100644 --- a/nhentai/constant.py +++ b/nhentai/constant.py @@ -12,17 +12,10 @@ except ImportError: BASE_URL = os.getenv('NHENTAI', 'https://nhentai.net') __api_suspended_DETAIL_URL = '%s/api/gallery' % BASE_URL -__api_suspended_SEARCH_URL = '%s/api/galleries/search' % BASE_URL DETAIL_URL = '%s/g' % BASE_URL -SEARCH_URL = '%s/search/' % BASE_URL +SEARCH_URL = '%s/api/galleries/search' % BASE_URL -TAG_URL = ['%s/tag' % BASE_URL, - '%s/artist' % BASE_URL, - '%s/character' % BASE_URL, - '%s/parody' % BASE_URL, - '%s/group' % BASE_URL, - '%s/language' % BASE_URL] TAG_API_URL = '%s/api/galleries/tagged' % BASE_URL LOGIN_URL = '%s/login/' % BASE_URL @@ -35,8 +28,10 @@ IMAGE_URL = '%s://i.%s/galleries' % (u.scheme, u.hostname) NHENTAI_HOME = os.path.join(os.getenv('HOME', tempfile.gettempdir()), '.nhentai') NHENTAI_PROXY = os.path.join(NHENTAI_HOME, 'proxy') NHENTAI_COOKIE = os.path.join(NHENTAI_HOME, 'cookie') +NHENTAI_LANGUAGE = os.path.join(NHENTAI_HOME, 'language') NHENTAI_HISTORY = os.path.join(NHENTAI_HOME, 'history.sqlite3') PROXY = {} COOKIE = '' +LANGUAGE = '' diff --git a/nhentai/parser.py b/nhentai/parser.py index 97ea354..62224d5 100644 --- a/nhentai/parser.py +++ b/nhentai/parser.py @@ -120,15 +120,15 @@ def page_range_parser(page_range, max_page_num): else: try: left = int(range_str[:idx]) - right = int(range_str[idx+1:]) + right = int(range_str[idx + 1:]) if right > max_page_num: right = max_page_num - for page in range(left, right+1): + for page in range(left, right + 1): pages.add(page) except ValueError: logger.error('page range({0}) is not valid'.format(page_range)) - - return list(pages) + + return list(pages) def doujinshi_parser(id_): @@ -143,7 +143,7 @@ def doujinshi_parser(id_): try: response = request('get', url) - if response.status_code in (200, ): + if response.status_code in (200,): response = response.content else: logger.debug('Slow down and retry ({}) ...'.format(id_)) @@ -202,7 +202,7 @@ def doujinshi_parser(id_): return doujinshi -def search_parser(keyword, sorting='date', page=1): +def old_search_parser(keyword, sorting='date', page=1): logger.debug('Searching doujinshis of keyword {0}'.format(keyword)) response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page, 'sort': sorting}).content @@ -222,50 +222,15 @@ def print_doujinshi(doujinshi_list): tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst')) -def tag_parser(tag_name, sorting='date', max_page=1, index=0): - result = [] - tag_name = tag_name.lower() - if ',' in tag_name: - tag_name = [i.strip().replace(' ', '-') for i in tag_name.split(',')] - else: - tag_name = tag_name.strip().replace(' ', '-') - if sorting == 'date': - sorting = '' - - for p in range(1, max_page + 1): - if sys.version_info >= (3, 0, 0): - unicode_ = str - else: - unicode_ = unicode - - if isinstance(tag_name, (str, unicode_)): - logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, tag_name)) - response = request('get', url='%s/%s/%s?page=%d' % (constant.TAG_URL[index], tag_name, sorting, p)).content - result += _get_title_and_id(response) - else: - for i in tag_name: - logger.debug('Fetching page {0} for doujinshi with tag \'{1}\''.format(p, i)) - response = request('get', - url='%s/%s/%s?page=%d' % (constant.TAG_URL[index], i, sorting, p)).content - result += _get_title_and_id(response) - - if not result: - logger.error('Cannot find doujinshi id of tag \'{0}\''.format(tag_name)) - return - - if not result: - logger.warn('No results for tag \'{}\''.format(tag_name)) - - return result - - -def __api_suspended_search_parser(keyword, sorting, page): +def search_parser(keyword, sorting, page): logger.debug('Searching doujinshis using keywords {0}'.format(keyword)) + keyword = '+'.join([i.strip().replace(' ', '-').lower() for i in keyword.split(',')]) result = [] i = 0 while i < 5: try: - response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page, 'sort': sorting}).json() + url = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page, 'sort': sorting}).url + response = request('get', url.replace('%2B', '+')).json() except Exception as e: i += 1 if not i < 5: @@ -289,29 +254,6 @@ def __api_suspended_search_parser(keyword, sorting, page): return result -def __api_suspended_tag_parser(tag_id, sorting, max_page=1): - logger.info('Searching for doujinshi with tag id {0}'.format(tag_id)) - result = [] - response = request('get', url=constant.TAG_API_URL, params={'sort': sorting, 'tag_id': tag_id}).json() - page = max_page if max_page <= response['num_pages'] else int(response['num_pages']) - - for i in range(1, page + 1): - logger.info('Getting page {} ...'.format(i)) - - if page != 1: - response = request('get', url=constant.TAG_API_URL, - params={'sort': sorting, 'tag_id': tag_id}).json() - for row in response['result']: - title = row['title']['english'] - title = title[:85] + '..' if len(title) > 85 else title - result.append({'id': row['id'], 'title': title}) - - if not result: - logger.warn('No results for tag id {}'.format(tag_id)) - - return result - - def __api_suspended_doujinshi_parser(id_): if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()): raise Exception('Doujinshi id({0}) is not valid'.format(id_)) From be2ec3f452515a8570617d7bfb8c2f8f03ff7d2b Mon Sep 17 00:00:00 2001 From: Alocks Date: Sat, 2 May 2020 14:35:22 -0300 Subject: [PATCH 10/18] updated documentation --- README.rst | 51 +++++++++------------------------------------------ 1 file changed, 9 insertions(+), 42 deletions(-) diff --git a/README.rst b/README.rst index dc167f2..7dfc26b 100644 --- a/README.rst +++ b/README.rst @@ -79,53 +79,20 @@ Download doujinshi with ids specified in a file (doujinshi ids split by line): nhentai --file=doujinshi.txt +Set search default language + +.. code-block:: bash + + nhentai --language=english + Search a keyword and download the first page: .. code-block:: bash nhentai --search="tomori" --page=1 --download - -Download by tag name: - -.. code-block:: bash - - nhentai --tag lolicon --download --page=2 - -Download by language: - -.. code-block:: bash - - nhentai --language english --download --page=2 - -Download by artist name: - -.. code-block:: bash - - nhentai --artist henreader --download - -Download by character name: - -.. code-block:: bash - - nhentai --character "kuro von einsbern" --download - -Download by parody name: - -.. code-block:: bash - - nhentai --parody "the idolmaster" --download - -Download by group name: - -.. code-block:: bash - - nhentai --group clesta --download - -Download using multiple tags (--tag, --character, --paordy and --group supported): - -.. code-block:: bash - - nhentai --tag "lolicon, teasing" --artist "tamano kedama, atte nanakusa" + # you also can download by tags and multiple keywords + nhentai --search="tag:lolicon, artist:henreader, tag:full color" + nhentai --search="lolicon, henreader, full color" Download your favorites with delay: From fc39aeb49ea411c22ae658b5d23b8d5809b74d6c Mon Sep 17 00:00:00 2001 From: Alocks Date: Sat, 2 May 2020 14:52:24 -0300 Subject: [PATCH 11/18] stupid fix --- nhentai/command.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nhentai/command.py b/nhentai/command.py index f1ecf9f..40e412c 100644 --- a/nhentai/command.py +++ b/nhentai/command.py @@ -6,7 +6,7 @@ import platform import time from nhentai.cmdline import cmd_parser, banner -from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, favorites_parser, tag_parser +from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, favorites_parser from nhentai.doujinshi import Doujinshi from nhentai.downloader import Downloader from nhentai.logger import logger @@ -102,4 +102,4 @@ def main(): signal.signal(signal.SIGINT, signal_handler) if __name__ == '__main__': - main() \ No newline at end of file + main() From 22dbb4dd0dd2d647a86ecad2d31f23fb293776f5 Mon Sep 17 00:00:00 2001 From: user Date: Sun, 7 Jun 2020 19:07:40 +0300 Subject: [PATCH 12/18] Add PDF support --- README.rst | 3 ++- nhentai/cmdline.py | 6 ++++-- nhentai/command.py | 6 ++++-- nhentai/utils.py | 29 +++++++++++++++++++++++++++++ requirements.txt | 1 + 5 files changed, 40 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 7dfc26b..cf0497a 100644 --- a/README.rst +++ b/README.rst @@ -154,8 +154,9 @@ Other options: --no-html don't generate HTML after downloading --gen-main generate a main viewer contain all the doujin in the folder -C, --cbz generate Comic Book CBZ File + -P --pdf generate PDF file --rm-origin-dir remove downloaded doujinshi dir when generated CBZ - file. + or PDF file. # nHentai options --cookie=COOKIE set cookie of nhentai to bypass Google recaptcha diff --git a/nhentai/cmdline.py b/nhentai/cmdline.py index 8e06bfe..849ebc9 100644 --- a/nhentai/cmdline.py +++ b/nhentai/cmdline.py @@ -38,7 +38,7 @@ def banner(): def cmd_parser(): parser = OptionParser('\n nhentai --search [keyword] --download' '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' - '\n nhentai --file [filename]' + '\n nhentai --file [filename]' '\n\nEnvironment Variable:\n' ' NHENTAI nhentai mirror url') # operation options @@ -87,8 +87,10 @@ def cmd_parser(): help='generate a main viewer contain all the doujin in the folder') parser.add_option('--cbz', '-C', dest='is_cbz', action='store_true', help='generate Comic Book CBZ File') + parser.add_option('--pdf', '-P', dest='is_pdf', action='store_true', + help='generate PDF file') parser.add_option('--rm-origin-dir', dest='rm_origin_dir', action='store_true', default=False, - help='remove downloaded doujinshi dir when generated CBZ file.') + help='remove downloaded doujinshi dir when generated CBZ or PDF file.') # nhentai options parser.add_option('--cookie', type='str', dest='cookie', action='store', diff --git a/nhentai/command.py b/nhentai/command.py index 40e412c..f6c4855 100644 --- a/nhentai/command.py +++ b/nhentai/command.py @@ -11,7 +11,7 @@ from nhentai.doujinshi import Doujinshi from nhentai.downloader import Downloader from nhentai.logger import logger from nhentai.constant import BASE_URL -from nhentai.utils import generate_html, generate_cbz, generate_main_html, check_cookie, signal_handler, DB +from nhentai.utils import generate_html, generate_cbz, generate_main_html, generate_pdf, check_cookie, signal_handler, DB def main(): @@ -82,10 +82,12 @@ def main(): with DB() as db: db.add_one(doujinshi.id) - if not options.is_nohtml and not options.is_cbz: + if not options.is_nohtml and not options.is_cbz and not options.is_pdf: generate_html(options.output_dir, doujinshi) elif options.is_cbz: generate_cbz(options.output_dir, doujinshi, options.rm_origin_dir) + elif options.is_pdf: + generate_pdf(options.output_dir, doujinshi, options.rm_origin_dir) if options.main_viewer: generate_main_html(options.output_dir) diff --git a/nhentai/utils.py b/nhentai/utils.py index 6a22d65..779ce2d 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -9,6 +9,7 @@ import zipfile import shutil import requests import sqlite3 +import img2pdf from nhentai import constant from nhentai.logger import logger @@ -193,6 +194,34 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_ logger.log(15, 'Comic Book CBZ file has been written to \'{0}\''.format(doujinshi_dir)) +def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False): + """Write images to a PDF file using img2pdf.""" + if doujinshi_obj is not None: + doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename) + pdf_filename = os.path.join( + os.path.join(doujinshi_dir, '..'), + '{}.pdf'.format(doujinshi_obj.filename) + ) + else: + pdf_filename = './doujinshi.pdf' + doujinshi_dir = '.' + + file_list = os.listdir(doujinshi_dir) + file_list.sort() + + logger.info('Writing PDF file to path: {}'.format(pdf_filename)) + with open(pdf_filename, 'wb') as pdf_f: + full_path_list = ( + [os.path.join(doujinshi_dir, image) for image in file_list] + ) + pdf_f.write(img2pdf.convert(full_path_list)) + + if rm_origin_dir: + shutil.rmtree(doujinshi_dir, ignore_errors=True) + + logger.log(15, 'PDF file has been written to \'{0}\''.format(doujinshi_dir)) + + def format_filename(s): """Take a string and return a valid filename constructed from the string. Uses a whitelist approach: any characters not present in valid_chars are diff --git a/requirements.txt b/requirements.txt index 3fbad9d..e34829d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ BeautifulSoup4>=4.0.0 threadpool>=1.2.7 tabulate>=0.7.5 future>=0.15.2 +img2pdf>=0.3.6 iso8601 >= 0.1 From 44c424a3215e6c9e2ae1b6ef1b4e290763c190cd Mon Sep 17 00:00:00 2001 From: jwfiredragon Date: Wed, 10 Jun 2020 22:39:35 -0700 Subject: [PATCH 13/18] Fixing parser for nhentai site update nhentai's recent site update broke the parser, this fixes it. Based off the work on [my fork here](https://github.com/jwfiredragon/nhentai/commit/8c4a4f02bc0d551b13fd5c9e7da5afe3381c47d3). --- nhentai/parser.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/nhentai/parser.py b/nhentai/parser.py index 62224d5..97c1b15 100644 --- a/nhentai/parser.py +++ b/nhentai/parser.py @@ -179,11 +179,9 @@ def doujinshi_parser(id_): doujinshi['ext'] = ext pages = 0 - for _ in doujinshi_info.find_all('div', class_=''): - pages = re.search('([\d]+) pages', _.text) - if pages: - pages = pages.group(1) - break + for _ in doujinshi_info.find_all('div', class_='tag-container field-name'): + if re.search('Pages:', _.text): + pages = _.find('span', class_='name').string doujinshi['pages'] = int(pages) # gain information of the doujinshi @@ -192,7 +190,7 @@ def doujinshi_parser(id_): for field in information_fields: field_name = field.contents[0].strip().strip(':') if field_name in needed_fields: - data = [sub_field.contents[0].strip() for sub_field in + data = [sub_field.find('span', attrs={'class': 'name'}).contents[0].strip() for sub_field in field.find_all('a', attrs={'class': 'tag'})] doujinshi[field_name.lower()] = ', '.join(data) From 7323eae99b8330574fb0ca762af7508b5d8dda23 Mon Sep 17 00:00:00 2001 From: Ma Yunzhe Date: Mon, 15 Jun 2020 10:00:23 +0800 Subject: [PATCH 14/18] remove args.tag since no tag option in parser --- nhentai/cmdline.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/nhentai/cmdline.py b/nhentai/cmdline.py index 849ebc9..7153991 100644 --- a/nhentai/cmdline.py +++ b/nhentai/cmdline.py @@ -116,8 +116,7 @@ def cmd_parser(): generate_html() exit(0) - if args.main_viewer and not args.id and not args.keyword and \ - not args.tag and not args.favorites: + if args.main_viewer and not args.id and not args.keyword and not args.favorites: generate_main_html() exit(0) @@ -203,8 +202,7 @@ def cmd_parser(): _ = [i.strip() for i in f.readlines()] args.id = set(int(i) for i in _ if i.isdigit()) - if (args.is_download or args.is_show) and not args.id and not args.keyword and \ - not args.tag and not args.favorites: + if (args.is_download or args.is_show) and not args.id and not args.keyword and not args.favorites: logger.critical('Doujinshi id(s) are required for downloading') parser.print_help() exit(1) From 37b4ee7d002507e5ed45f33fbd481e8178c032d0 Mon Sep 17 00:00:00 2001 From: jwfiredragon Date: Tue, 23 Jun 2020 23:04:09 -0700 Subject: [PATCH 15/18] Fixing typos ms-user-select should be -ms-user-select. #0d0d0d9 isn't a valid hex code - I assume it's supposed to be #0d0d0d? --- nhentai/viewer/main.css | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nhentai/viewer/main.css b/nhentai/viewer/main.css index b621234..01a58ba 100644 --- a/nhentai/viewer/main.css +++ b/nhentai/viewer/main.css @@ -148,7 +148,7 @@ blockquote { -webkit-user-select: none; /* Safari */ -khtml-user-select: none; /* Konqueror HTML */ -moz-user-select: none; /* Old versions of Firefox */ - ms-user-select: none; /* Internet Explorer/Edge */ + -ms-user-select: none; /* Internet Explorer/Edge */ user-select: none; } @@ -157,7 +157,7 @@ blockquote { padding: 5px 0px 5px 15px; text-decoration: none; font-size: 15px; - color: #0d0d0d9; + color: #0d0d0d; display: block; text-align: left; } @@ -329,4 +329,4 @@ html.theme-black .gallery:hover .caption { html.theme-black .caption { background-color: #404040; color: #d9d9d9 -} \ No newline at end of file +} From 7508a2010d94774e4adf70f490ab634ddf4cd39c Mon Sep 17 00:00:00 2001 From: RicterZ Date: Fri, 26 Jun 2020 13:12:37 +0800 Subject: [PATCH 16/18] 0.4.0 --- nhentai/__init__.py | 2 +- nhentai/utils.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/nhentai/__init__.py b/nhentai/__init__.py index 14b488f..e622c47 100644 --- a/nhentai/__init__.py +++ b/nhentai/__init__.py @@ -1,3 +1,3 @@ -__version__ = '0.3.9' +__version__ = '0.4.0' __author__ = 'RicterZ' __email__ = 'ricterzheng@gmail.com' diff --git a/nhentai/utils.py b/nhentai/utils.py index 779ce2d..caa925b 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -233,6 +233,9 @@ and append a file extension like '.txt', so I avoid the potential of using an invalid filename. """ + return s + + # maybe you can use `--format` to select a suitable filename valid_chars = "-_.()[] %s%s" % (string.ascii_letters, string.digits) filename = ''.join(c for c in s if c in valid_chars) if len(filename) > 100: From 43013badd459de93f8359497282f3cec718fdea1 Mon Sep 17 00:00:00 2001 From: RicterZ Date: Fri, 26 Jun 2020 13:12:49 +0800 Subject: [PATCH 17/18] update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0905537..1d6ba36 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ dist/ .python-version .DS_Store output/ +venv/ From e9864d158f15eae5d1d87402948e08d132eb4b26 Mon Sep 17 00:00:00 2001 From: RicterZ Date: Fri, 26 Jun 2020 13:15:57 +0800 Subject: [PATCH 18/18] update tests --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 14d9f97..d7edc15 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,10 +11,9 @@ install: script: - echo 268642 > /tmp/test.txt - - nhentai --cookie "csrftoken=xIh7s9d4NB8qSLN7eJZG9064zsV84aHEYFoAU49Ib9anqmoT0pZRw6TIdayLzQuT; sessionid=un101zfgpglsyffdnsm72le4euuisp7t" + - nhentai --cookie "_ga=GA1.2.2000087053.1558179358; __cfduid=d8930f7b43d04e1b2117719e28386b2e31593148489; csrftoken=3914GQGSmmqQyfQTBswNgfXuhFiefu8sAgOnsfZWiiqS4PJpKivuTp34p2USV6xu; sessionid=be0w2lwlprlmld3ahg9i592ipsuaw840" - nhentai --search umaru - nhentai --id=152503,146134 -t 10 --output=/tmp/ --cbz - - nhentai --tag lolicon --sorting popular - nhentai -F - nhentai --file /tmp/test.txt - nhentai --id=152503,146134 --gen-main --output=/tmp/