diff --git a/MANIFEST.in b/MANIFEST.in index edb9348..60ce4b8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,5 @@ -include README.md -include requirements.txt -include nhentai/doujinshi.html +include README.md +include requirements.txt +include nhentai/viewer/index.html +include nhentai/viewer/styles.css +include nhentai/viewer/scripts.js diff --git a/README.md b/README.md index c7f7d8e..6b348f7 100644 --- a/README.md +++ b/README.md @@ -1,68 +1,70 @@ -nhentai -======= - _ _ _ _ - _ __ | | | | ___ _ __ | |_ __ _(_) - | '_ \| |_| |/ _ \ '_ \| __/ _` | | - | | | | _ | __/ | | | || (_| | | - |_| |_|_| |_|\___|_| |_|\__\__,_|_| - -あなたも変態。 いいね? -[![Build Status](https://travis-ci.org/RicterZ/nhentai.svg?branch=master)](https://travis-ci.org/RicterZ/nhentai) - -🎉🎉 nhentai 现在支持 Windows 啦! - -由于 [http://nhentai.net](http://nhentai.net) 下载下来的种子速度很慢,而且官方也提供在线观看本子的功能,所以可以利用本脚本下载本子。 - -### Installation - - git clone https://github.com/RicterZ/nhentai - cd nhentai - python setup.py install - -### Gentoo - - layman -fa glicOne - sudo emerge net-misc/nhentai - -### Usage -下载指定 id 列表的本子: -```bash -nhentai --id=123855,123866 -``` - -下载某关键词第一页的本子: -```bash -nhentai --search="tomori" --page=1 --download -``` - -下载用户 favorites 内容: -```bash -nhentai --login "username:password" --download -``` - -### Options - -`-t, --thread`:指定下载的线程数,最多为 10 线程。 -`--path`:指定下载文件的输出路径,默认为当前目录。 -`--timeout`:指定下载图片的超时时间,默认为 30 秒。 -`--proxy`:指定下载的代理,例如: http://127.0.0.1:8080/ -`--login`:nhentai 账号的“用户名:密码”组合 - -### nHentai Mirror -如果想用自建镜像下载 nhentai 的本子,需要搭建 nhentai.net 和 i.nhentai.net 的反向代理。 -例如用 h.loli.club 来做反向代理的话,需要 h.loli.club 反代 nhentai.net,i.h.loli.club 反带 i.nhentai.net。 -然后利用环境变量来下载: - -```bash -NHENTAI=http://h.loli.club nhentai --id 123456 -``` - -![](./images/search.png) -![](./images/download.png) -![](./images/viewer.png) - -### License -MIT - -### あなたも変態 +nhentai +======= + _ _ _ _ + _ __ | | | | ___ _ __ | |_ __ _(_) + | '_ \| |_| |/ _ \ '_ \| __/ _` | | + | | | | _ | __/ | | | || (_| | | + |_| |_|_| |_|\___|_| |_|\__\__,_|_| + +あなたも変態。 いいね? +[![Build Status](https://travis-ci.org/RicterZ/nhentai.svg?branch=master)](https://travis-ci.org/RicterZ/nhentai) + +🎉🎉 nhentai 现在支持 Windows 啦! + +由于 [http://nhentai.net](http://nhentai.net) 下载下来的种子速度很慢,而且官方也提供在线观看本子的功能,所以可以利用本脚本下载本子。 + +### Installation + + git clone https://github.com/RicterZ/nhentai + cd nhentai + python setup.py install + +### Gentoo + + layman -fa glicOne + sudo emerge net-misc/nhentai + +### Usage +下载指定 id 列表的本子: +```bash +nhentai --id=123855,123866 +``` + +下载某关键词第一页的本子: +```bash +nhentai --search="tomori" --page=1 --download +``` + +下载用户 favorites 内容: +```bash +nhentai --login "username:password" --download +``` + +### Options + +`-t, --thread`:指定下载的线程数,最多为 10 线程。 +`--path`:指定下载文件的输出路径,默认为当前目录。 +`--timeout`:指定下载图片的超时时间,默认为 30 秒。 +`--proxy`:指定下载的代理,例如: http://127.0.0.1:8080/ +`--login`:nhentai 账号的“用户名:密码”组合 +`--nohtml`:nhentai Don't generate HTML +`--cbz`:nhentai Generate Comic Book CBZ file + +### nHentai Mirror +如果想用自建镜像下载 nhentai 的本子,需要搭建 nhentai.net 和 i.nhentai.net 的反向代理。 +例如用 h.loli.club 来做反向代理的话,需要 h.loli.club 反代 nhentai.net,i.h.loli.club 反带 i.nhentai.net。 +然后利用环境变量来下载: + +```bash +NHENTAI=http://h.loli.club nhentai --id 123456 +``` + +![](./images/search.png) +![](./images/download.png) +![](./images/viewer.png) + +### License +MIT + +### あなたも変態 ![](./images/image.jpg) \ No newline at end of file diff --git a/nhentai/__init__.py b/nhentai/__init__.py index 9acc19b..51111f2 100644 --- a/nhentai/__init__.py +++ b/nhentai/__init__.py @@ -1,3 +1,3 @@ -__version__ = '0.2.12' -__author__ = 'Ricter' -__email__ = 'ricterzheng@gmail.com' +__version__ = '0.2.14' +__author__ = 'RicterZ' +__email__ = 'ricterzheng@gmail.com' diff --git a/nhentai/cmdline.py b/nhentai/cmdline.py index 7995e4f..deea5b2 100644 --- a/nhentai/cmdline.py +++ b/nhentai/cmdline.py @@ -1,113 +1,120 @@ -# coding: utf-8 -from __future__ import print_function -import sys -from optparse import OptionParser -try: - from itertools import ifilter as filter -except ImportError: - pass - -import nhentai.constant as constant -from nhentai.utils import urlparse, generate_html -from nhentai.logger import logger - -try: - reload(sys) - sys.setdefaultencoding(sys.stdin.encoding) -except NameError: - # python3 - pass - - -def banner(): - logger.info(u'''nHentai: あなたも変態。 いいね? - _ _ _ _ - _ __ | | | | ___ _ __ | |_ __ _(_) -| '_ \| |_| |/ _ \ '_ \| __/ _` | | -| | | | _ | __/ | | | || (_| | | -|_| |_|_| |_|\___|_| |_|\__\__,_|_| -''') - - -def cmd_parser(): - parser = OptionParser('\n nhentai --search [keyword] --download' - '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' - '\n\nEnvironment Variable:\n' - ' NHENTAI nhentai mirror url') - parser.add_option('--download', dest='is_download', action='store_true', - help='download doujinshi (for search result)') - parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information') - parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') - parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword') - parser.add_option('--page', type='int', dest='page', action='store', default=1, - help='page number of search result') - parser.add_option('--tags', type='string', dest='tags', action='store', help='download doujinshi by tags') - parser.add_option('--output', type='string', dest='output_dir', action='store', default='', - help='output dir') - parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, - help='thread count of download doujinshi') - parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30, - help='timeout of download doujinshi') - parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', - help='use proxy, example: http://127.0.0.1:1080') - parser.add_option('--html', dest='html_viewer', action='store_true', - help='generate a html viewer at current directory') - - parser.add_option('--login', '-l', type='str', dest='login', action='store', - help='username:password pair of nhentai account') - - try: - sys.argv = list(map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv)) - except (NameError, TypeError): - pass - except UnicodeDecodeError: - exit(0) - - args, _ = parser.parse_args(sys.argv[1:]) - - if args.html_viewer: - generate_html() - exit(0) - - if args.login: - try: - _, _ = args.login.split(':', 1) - except ValueError: - logger.error('Invalid `username:password` pair.') - exit(1) - - if not args.is_download: - logger.warning('YOU DO NOT SPECIFY `--download` OPTION !!!') - - if args.tags: - logger.warning('`--tags` is under construction') - exit(1) - - if args.id: - _ = map(lambda id: id.strip(), args.id.split(',')) - args.id = set(map(int, filter(lambda id_: id_.isdigit(), _))) - - if (args.is_download or args.is_show) and not args.id and not args.keyword and not args.login: - logger.critical('Doujinshi id(s) are required for downloading') - parser.print_help() - exit(1) - - if not args.keyword and not args.id and not args.login: - parser.print_help() - exit(1) - - if args.threads <= 0: - args.threads = 1 - - elif args.threads > 15: - logger.critical('Maximum number of used threads is 15') - exit(1) - - if args.proxy: - proxy_url = urlparse(args.proxy) - if proxy_url.scheme not in ('http', 'https'): - logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(proxy_url.scheme)) - else: - constant.PROXY = {'http': args.proxy, 'https': args.proxy} - - return args +# coding: utf-8 +from __future__ import print_function +import sys +from optparse import OptionParser +from nhentai import __version__ +try: + from itertools import ifilter as filter +except ImportError: + pass + +import nhentai.constant as constant +from nhentai.utils import urlparse, generate_html +from nhentai.logger import logger + +try: + reload(sys) + sys.setdefaultencoding(sys.stdin.encoding) +except NameError: + # python3 + pass + + +def banner(): + logger.info(u'''nHentai ver %s: あなたも変態。 いいね? + _ _ _ _ + _ __ | | | | ___ _ __ | |_ __ _(_) +| '_ \| |_| |/ _ \ '_ \| __/ _` | | +| | | | _ | __/ | | | || (_| | | +|_| |_|_| |_|\___|_| |_|\__\__,_|_| +''' % __version__) + + +def cmd_parser(): + parser = OptionParser('\n nhentai --search [keyword] --download' + '\n NHENTAI=http://h.loli.club nhentai --id [ID ...]' + '\n\nEnvironment Variable:\n' + ' NHENTAI nhentai mirror url') + parser.add_option('--download', dest='is_download', action='store_true', + help='download doujinshi (for search result)') + parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information') + parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3') + parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword') + parser.add_option('--page', type='int', dest='page', action='store', default=1, + help='page number of search result') + parser.add_option('--tags', type='string', dest='tags', action='store', help='download doujinshi by tags') + parser.add_option('--output', type='string', dest='output_dir', action='store', default='', + help='output dir') + parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, + help='thread count of download doujinshi') + parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30, + help='timeout of download doujinshi') + parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', + help='use proxy, example: http://127.0.0.1:1080') + parser.add_option('--html', dest='html_viewer', action='store_true', + help='generate a html viewer at current directory') + + parser.add_option('--login', '-l', type='str', dest='login', action='store', + help='username:password pair of nhentai account') + + parser.add_option('--nohtml', dest='is_nohtml', action='store_true', + help='Don\'t generate HTML') + + parser.add_option('--cbz', dest='is_cbz', action='store_true', + help='Generate Comic Book CBZ File') + + try: + sys.argv = list(map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv)) + except (NameError, TypeError): + pass + except UnicodeDecodeError: + exit(0) + + args, _ = parser.parse_args(sys.argv[1:]) + + if args.html_viewer: + generate_html() + exit(0) + + if args.login: + try: + _, _ = args.login.split(':', 1) + except ValueError: + logger.error('Invalid `username:password` pair.') + exit(1) + + if not args.is_download: + logger.warning('YOU DO NOT SPECIFY `--download` OPTION !!!') + + if args.tags: + logger.warning('`--tags` is under construction') + exit(1) + + if args.id: + _ = map(lambda id: id.strip(), args.id.split(',')) + args.id = set(map(int, filter(lambda id_: id_.isdigit(), _))) + + if (args.is_download or args.is_show) and not args.id and not args.keyword and not args.login: + logger.critical('Doujinshi id(s) are required for downloading') + parser.print_help() + exit(1) + + if not args.keyword and not args.id and not args.login: + parser.print_help() + exit(1) + + if args.threads <= 0: + args.threads = 1 + + elif args.threads > 15: + logger.critical('Maximum number of used threads is 15') + exit(1) + + if args.proxy: + proxy_url = urlparse(args.proxy) + if proxy_url.scheme not in ('http', 'https'): + logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(proxy_url.scheme)) + else: + constant.PROXY = {'http': args.proxy, 'https': args.proxy} + + return args diff --git a/nhentai/command.py b/nhentai/command.py index 840b076..cfbf2ad 100644 --- a/nhentai/command.py +++ b/nhentai/command.py @@ -1,69 +1,72 @@ -#!/usr/bin/env python2.7 -# coding: utf-8 -from __future__ import unicode_literals, print_function -import signal -import platform - -from nhentai.cmdline import cmd_parser, banner -from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, login_parser -from nhentai.doujinshi import Doujinshi -from nhentai.downloader import Downloader -from nhentai.logger import logger -from nhentai.constant import BASE_URL -from nhentai.utils import generate_html - - -def main(): - banner() - logger.info('Using mirror: {0}'.format(BASE_URL)) - options = cmd_parser() - - doujinshi_ids = [] - doujinshi_list = [] - - if options.login: - username, password = options.login.split(':', 1) - logger.info('Login to nhentai use credential \'%s:%s\'' % (username, '*' * len(password))) - for doujinshi_info in login_parser(username=username, password=password): - doujinshi_list.append(Doujinshi(**doujinshi_info)) - - if options.keyword: - doujinshis = search_parser(options.keyword, options.page) - print_doujinshi(doujinshis) - if options.is_download: - doujinshi_ids = map(lambda d: d['id'], doujinshis) - else: - doujinshi_ids = options.id - - if doujinshi_ids: - for id_ in doujinshi_ids: - doujinshi_info = doujinshi_parser(id_) - doujinshi_list.append(Doujinshi(**doujinshi_info)) - - if not options.is_show: - downloader = Downloader(path=options.output_dir, - thread=options.threads, timeout=options.timeout) - - for doujinshi in doujinshi_list: - doujinshi.downloader = downloader - doujinshi.download() - generate_html(options.output_dir, doujinshi) - - if not platform.system() == 'Windows': - logger.log(15, '🍺 All done.') - else: - logger.log(15, 'All done.') - - else: - [doujinshi.show() for doujinshi in doujinshi_list] - - -def signal_handler(signal, frame): - logger.error('Ctrl-C signal received. Quit.') - exit(1) - - -signal.signal(signal.SIGINT, signal_handler) - -if __name__ == '__main__': - main() +#!/usr/bin/env python2.7 +# coding: utf-8 +from __future__ import unicode_literals, print_function +import signal +import platform + +from nhentai.cmdline import cmd_parser, banner +from nhentai.parser import doujinshi_parser, search_parser, print_doujinshi, login_parser +from nhentai.doujinshi import Doujinshi +from nhentai.downloader import Downloader +from nhentai.logger import logger +from nhentai.constant import BASE_URL +from nhentai.utils import generate_html, generate_cbz + + +def main(): + banner() + logger.info('Using mirror: {0}'.format(BASE_URL)) + options = cmd_parser() + + doujinshi_ids = [] + doujinshi_list = [] + + if options.login: + username, password = options.login.split(':', 1) + logger.info('Login to nhentai use credential \'%s:%s\'' % (username, '*' * len(password))) + for doujinshi_info in login_parser(username=username, password=password): + doujinshi_list.append(Doujinshi(**doujinshi_info)) + + if options.keyword: + doujinshis = search_parser(options.keyword, options.page) + print_doujinshi(doujinshis) + if options.is_download: + doujinshi_ids = map(lambda d: d['id'], doujinshis) + else: + doujinshi_ids = options.id + + if doujinshi_ids: + for id_ in doujinshi_ids: + doujinshi_info = doujinshi_parser(id_) + doujinshi_list.append(Doujinshi(**doujinshi_info)) + + if not options.is_show: + downloader = Downloader(path=options.output_dir, + thread=options.threads, timeout=options.timeout) + + for doujinshi in doujinshi_list: + doujinshi.downloader = downloader + doujinshi.download() + if not options.is_nohtml and not options.is_cbz: + generate_html(options.output_dir, doujinshi) + elif options.is_cbz: + generate_cbz(options.output_dir, doujinshi) + + if not platform.system() == 'Windows': + logger.log(15, '🍻 All done.') + else: + logger.log(15, 'All done.') + + else: + [doujinshi.show() for doujinshi in doujinshi_list] + + +def signal_handler(signal, frame): + logger.error('Ctrl-C signal received. Quit.') + exit(1) + + +signal.signal(signal.SIGINT, signal_handler) + +if __name__ == '__main__': + main() diff --git a/nhentai/doujinshi.html b/nhentai/doujinshi.html deleted file mode 100644 index eae0e25..0000000 --- a/nhentai/doujinshi.html +++ /dev/null @@ -1,126 +0,0 @@ - - - - - {TITLE} - - - - - - -
-{IMAGES}
-
-
- - - -
-
- - - - \ No newline at end of file diff --git a/nhentai/parser.py b/nhentai/parser.py index bef5730..f405f7b 100644 --- a/nhentai/parser.py +++ b/nhentai/parser.py @@ -1,158 +1,158 @@ -# coding: utf-8 -from __future__ import unicode_literals, print_function - -import os -import re -import threadpool -import requests -from bs4 import BeautifulSoup -from tabulate import tabulate - -import nhentai.constant as constant -from nhentai.logger import logger - - -def request(method, url, **kwargs): - if not hasattr(requests, method): - raise AttributeError('\'requests\' object has no attribute \'{0}\''.format(method)) - - return requests.__dict__[method](url, proxies=constant.PROXY, verify=False, **kwargs) - - -def login_parser(username, password): - s = requests.Session() - s.proxies = constant.PROXY - s.verify = False - s.headers.update({'Referer': constant.LOGIN_URL}) - - s.get(constant.LOGIN_URL) - content = s.get(constant.LOGIN_URL).content - html = BeautifulSoup(content, 'html.parser') - csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'}) - - if not csrf_token_elem: - raise Exception('Cannot find csrf token to login') - csrf_token = csrf_token_elem.attrs['value'] - - login_dict = { - 'csrfmiddlewaretoken': csrf_token, - 'username_or_email': username, - 'password': password, - } - resp = s.post(constant.LOGIN_URL, data=login_dict) - if 'Invalid username (or email) or password' in resp.text: - logger.error('Login failed, please check your username and password') - exit(1) - - html = BeautifulSoup(s.get(constant.FAV_URL).content, 'html.parser') - count = html.find('span', attrs={'class': 'count'}) - if not count: - logger.error('Cannot get count of your favorites, maybe login failed.') - - count = int(count.text.strip('(').strip(')')) - if count == 0: - logger.warning('No favorites found') - return [] - pages = int(count / 25) - - if pages: - pages += 1 if count % (25 * pages) else 0 - else: - pages = 1 - - logger.info('Your have %d favorites in %d pages.' % (count, pages)) - - if os.getenv('DEBUG'): - pages = 1 - - ret = [] - doujinshi_id = re.compile('data-id="([\d]+)"') - - def _callback(request, result): - ret.append(result) - - thread_pool = threadpool.ThreadPool(5) - - for page in range(1, pages+1): - try: - logger.info('Getting doujinshi id of page %d' % page) - resp = s.get(constant.FAV_URL + '?page=%d' % page).text - ids = doujinshi_id.findall(resp) - requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback) - [thread_pool.putRequest(req) for req in requests_] - thread_pool.wait() - except Exception as e: - logger.error('Error: %s, continue', str(e)) - - return ret - - -def doujinshi_parser(id_): - if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()): - raise Exception('Doujinshi id({0}) is not valid'.format(id_)) - - id_ = int(id_) - logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_)) - doujinshi = dict() - doujinshi['id'] = id_ - url = '{0}/{1}'.format(constant.DETAIL_URL, id_) - - try: - response = request('get', url).json() - except Exception as e: - logger.critical(str(e)) - exit(1) - - doujinshi['name'] = response['title']['english'] - doujinshi['subtitle'] = response['title']['japanese'] - doujinshi['img_id'] = response['media_id'] - doujinshi['ext'] = ''.join(map(lambda s: s['t'], response['images']['pages'])) - doujinshi['pages'] = len(response['images']['pages']) - - # gain information of the doujinshi - needed_fields = ['character', 'artist', 'language'] - for tag in response['tags']: - tag_type = tag['type'] - if tag_type in needed_fields: - if tag_type not in doujinshi: - doujinshi[tag_type] = tag['name'] - else: - doujinshi[tag_type] += tag['name'] - - return doujinshi - - -def search_parser(keyword, page): - logger.debug('Searching doujinshis of keyword {0}'.format(keyword)) - result = [] - try: - response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json() - if 'result' not in response: - raise Exception('No result in response') - except requests.ConnectionError as e: - logger.critical(e) - logger.warn('If you are in China, please configure the proxy to fu*k GFW.') - exit(1) - - for row in response['result']: - title = row['title']['english'] - title = title[:85] + '..' if len(title) > 85 else title - result.append({'id': row['id'], 'title': title}) - - if not result: - logger.warn('Not found anything of keyword {}'.format(keyword)) - - return result - - -def print_doujinshi(doujinshi_list): - if not doujinshi_list: - return - doujinshi_list = [(i['id'], i['title']) for i in doujinshi_list] - headers = ['id', 'doujinshi'] - logger.info('Search Result\n' + - tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst')) - - -if __name__ == '__main__': - print(doujinshi_parser("32271")) +# coding: utf-8 +from __future__ import unicode_literals, print_function + +import os +import re +import threadpool +import requests +from bs4 import BeautifulSoup +from tabulate import tabulate + +import nhentai.constant as constant +from nhentai.logger import logger + + +def request(method, url, **kwargs): + if not hasattr(requests, method): + raise AttributeError('\'requests\' object has no attribute \'{0}\''.format(method)) + + return requests.__dict__[method](url, proxies=constant.PROXY, verify=False, **kwargs) + + +def login_parser(username, password): + s = requests.Session() + s.proxies = constant.PROXY + s.verify = False + s.headers.update({'Referer': constant.LOGIN_URL}) + + s.get(constant.LOGIN_URL) + content = s.get(constant.LOGIN_URL).content + html = BeautifulSoup(content, 'html.parser').encode("ascii") + csrf_token_elem = html.find('input', attrs={'name': 'csrfmiddlewaretoken'}) + + if not csrf_token_elem: + raise Exception('Cannot find csrf token to login') + csrf_token = csrf_token_elem.attrs['value'] + + login_dict = { + 'csrfmiddlewaretoken': csrf_token, + 'username_or_email': username, + 'password': password, + } + resp = s.post(constant.LOGIN_URL, data=login_dict) + if 'Invalid username (or email) or password' in resp.text: + logger.error('Login failed, please check your username and password') + exit(1) + + html = BeautifulSoup(s.get(constant.FAV_URL).content, 'html.parser').encode("ascii") + count = html.find('span', attrs={'class': 'count'}) + if not count: + logger.error('Cannot get count of your favorites, maybe login failed.') + + count = int(count.text.strip('(').strip(')')) + if count == 0: + logger.warning('No favorites found') + return [] + pages = int(count / 25) + + if pages: + pages += 1 if count % (25 * pages) else 0 + else: + pages = 1 + + logger.info('Your have %d favorites in %d pages.' % (count, pages)) + + if os.getenv('DEBUG'): + pages = 1 + + ret = [] + doujinshi_id = re.compile('data-id="([\d]+)"') + + def _callback(request, result): + ret.append(result) + + thread_pool = threadpool.ThreadPool(5) + + for page in range(1, pages+1): + try: + logger.info('Getting doujinshi id of page %d' % page) + resp = s.get(constant.FAV_URL + '?page=%d' % page).text + ids = doujinshi_id.findall(resp) + requests_ = threadpool.makeRequests(doujinshi_parser, ids, _callback) + [thread_pool.putRequest(req) for req in requests_] + thread_pool.wait() + except Exception as e: + logger.error('Error: %s, continue', str(e)) + + return ret + + +def doujinshi_parser(id_): + if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()): + raise Exception('Doujinshi id({0}) is not valid'.format(id_)) + + id_ = int(id_) + logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_)) + doujinshi = dict() + doujinshi['id'] = id_ + url = '{0}/{1}'.format(constant.DETAIL_URL, id_) + + try: + response = request('get', url).json() + except Exception as e: + logger.critical(str(e)) + exit(1) + + doujinshi['name'] = response['title']['english'] + doujinshi['subtitle'] = response['title']['japanese'] + doujinshi['img_id'] = response['media_id'] + doujinshi['ext'] = ''.join(map(lambda s: s['t'], response['images']['pages'])) + doujinshi['pages'] = len(response['images']['pages']) + + # gain information of the doujinshi + needed_fields = ['character', 'artist', 'language'] + for tag in response['tags']: + tag_type = tag['type'] + if tag_type in needed_fields: + if tag_type not in doujinshi: + doujinshi[tag_type] = tag['name'] + else: + doujinshi[tag_type] += tag['name'] + + return doujinshi + + +def search_parser(keyword, page): + logger.debug('Searching doujinshis of keyword {0}'.format(keyword)) + result = [] + try: + response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json() + if 'result' not in response: + raise Exception('No result in response') + except requests.ConnectionError as e: + logger.critical(e) + logger.warn('If you are in China, please configure the proxy to fu*k GFW.') + exit(1) + + for row in response['result']: + title = row['title']['english'] + title = title[:85] + '..' if len(title) > 85 else title + result.append({'id': row['id'], 'title': title}) + + if not result: + logger.warn('Not found anything of keyword {}'.format(keyword)) + + return result + + +def print_doujinshi(doujinshi_list): + if not doujinshi_list: + return + doujinshi_list = [(i['id'], i['title']) for i in doujinshi_list] + headers = ['id', 'doujinshi'] + logger.info('Search Result\n' + + tabulate(tabular_data=doujinshi_list, headers=headers, tablefmt='rst')) + + +if __name__ == '__main__': + print(doujinshi_parser("32271")) diff --git a/nhentai/utils.py b/nhentai/utils.py index 21e47f1..d83ce0f 100644 --- a/nhentai/utils.py +++ b/nhentai/utils.py @@ -1,87 +1,115 @@ -# coding: utf-8 -from __future__ import unicode_literals, print_function - -import os -import string -from nhentai.logger import logger - - -class _Singleton(type): - """ A metaclass that creates a Singleton base class when called. """ - _instances = {} - - def __call__(cls, *args, **kwargs): - if cls not in cls._instances: - cls._instances[cls] = super(_Singleton, cls).__call__(*args, **kwargs) - return cls._instances[cls] - - -class Singleton(_Singleton(str('SingletonMeta'), (object,), {})): - pass - - -def urlparse(url): - try: - from urlparse import urlparse - except ImportError: - from urllib.parse import urlparse - - return urlparse(url) - - -def generate_html(output_dir='.', doujinshi_obj=None): - image_html = '' - previous = '' - - if doujinshi_obj is not None: - doujinshi_dir = os.path.join(output_dir, format_filename('%s-%s' % (doujinshi_obj.id, - doujinshi_obj.name[:200]))) - else: - doujinshi_dir = '.' - - file_list = os.listdir(doujinshi_dir) - file_list.sort() - - for index, image in enumerate(file_list): - if not os.path.splitext(image)[1] in ('.jpg', '.png'): - continue - - try: - next_ = file_list[file_list.index(image) + 1] - except IndexError: - next_ = '' - - image_html += '\n'\ - .format(image, 'current' if index == 0 else '', previous, next_) - previous = image - - with open(os.path.join(os.path.dirname(__file__), 'doujinshi.html'), 'r') as template: - html = template.read() - - if doujinshi_obj is not None: - title = doujinshi_obj.name - else: - title = 'nHentai HTML Viewer' - - data = html.format(TITLE=title, IMAGES=image_html) - with open(os.path.join(doujinshi_dir, 'index.html'), 'w') as f: - f.write(data) - - logger.log(15, 'HTML Viewer has been write to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html'))) - - -def format_filename(s): - """Take a string and return a valid filename constructed from the string. -Uses a whitelist approach: any characters not present in valid_chars are -removed. Also spaces are replaced with underscores. - -Note: this method may produce invalid filenames such as ``, `.` or `..` -When I use this method I prepend a date string like '2009_01_15_19_46_32_' -and append a file extension like '.txt', so I avoid the potential of using -an invalid filename. - -""" - valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) - filename = ''.join(c for c in s if c in valid_chars) - filename = filename.replace(' ', '_') # I don't like spaces in filenames. - return filename +# coding: utf-8 +from __future__ import unicode_literals, print_function + +import os +import string +import zipfile +import shutil +from nhentai.logger import logger + + +class _Singleton(type): + """ A metaclass that creates a Singleton base class when called. """ + _instances = {} + + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + cls._instances[cls] = super(_Singleton, cls).__call__(*args, **kwargs) + return cls._instances[cls] + + +class Singleton(_Singleton(str('SingletonMeta'), (object,), {})): + pass + + +def urlparse(url): + try: + from urlparse import urlparse + except ImportError: + from urllib.parse import urlparse + + return urlparse(url) + +def readfile(path): + loc = os.path.dirname(__file__) + + with open(os.path.join(loc, path), 'r') as file: + return file.read() + +def generate_html(output_dir='.', doujinshi_obj=None): + image_html = '' + + if doujinshi_obj is not None: + doujinshi_dir = os.path.join(output_dir, format_filename('%s-%s' % (doujinshi_obj.id, + str(doujinshi_obj.name[:200])))) + else: + doujinshi_dir = '.' + + file_list = os.listdir(doujinshi_dir) + file_list.sort() + + for image in file_list: + if not os.path.splitext(image)[1] in ('.jpg', '.png'): + continue + + image_html += '\n'\ + .format(image) + + html = readfile('viewer/index.html') + css = readfile('viewer/styles.css') + js = readfile('viewer/scripts.js') + + if doujinshi_obj is not None: + title = doujinshi_obj.name + else: + title = 'nHentai HTML Viewer' + + data = html.format(TITLE=title, IMAGES=image_html, SCRIPTS=js, STYLES=css) + with open(os.path.join(doujinshi_dir, 'index.html'), 'w') as f: + f.write(data) + + logger.log(15, 'HTML Viewer has been write to \'{0}\''.format(os.path.join(doujinshi_dir, 'index.html'))) + + +def generate_cbz(output_dir='.', doujinshi_obj=None): + if doujinshi_obj is not None: + doujinshi_dir = os.path.join(output_dir, format_filename('%s-%s' % (doujinshi_obj.id, + str(doujinshi_obj.name[:200])))) + cbz_filename = os.path.join(output_dir, format_filename('%s-%s.cbz' % (doujinshi_obj.id, + str(doujinshi_obj.name[:200])))) + else: + cbz_filename = './doujinshi.cbz' + doujinshi_dir = '.' + + file_list = os.listdir(doujinshi_dir) + file_list.sort() + + with zipfile.ZipFile(cbz_filename, 'w') as cbz_pf: + for image in file_list: + image_path = os.path.join(doujinshi_dir, image) + cbz_pf.write(image_path, image) + + shutil.rmtree(doujinshi_dir, ignore_errors=True) + logger.log(15, 'Comic Book CBZ file has been write to \'{0}\''.format(doujinshi_dir)) + + + + + + + +def format_filename(s): + """Take a string and return a valid filename constructed from the string. +Uses a whitelist approach: any characters not present in valid_chars are +removed. Also spaces are replaced with underscores. + +Note: this method may produce invalid filenames such as ``, `.` or `..` +When I use this method I prepend a date string like '2009_01_15_19_46_32_' +and append a file extension like '.txt', so I avoid the potential of using +an invalid filename. + +""" + valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) + filename = ''.join(c for c in s if c in valid_chars) + filename = filename.replace(' ', '_') # I don't like spaces in filenames. + return filename diff --git a/nhentai/viewer/index.html b/nhentai/viewer/index.html new file mode 100644 index 0000000..6d344e3 --- /dev/null +++ b/nhentai/viewer/index.html @@ -0,0 +1,24 @@ + + + + + {TITLE} + + + + + + +
+ +
+
+ + + + \ No newline at end of file diff --git a/nhentai/viewer/scripts.js b/nhentai/viewer/scripts.js new file mode 100644 index 0000000..09f976e --- /dev/null +++ b/nhentai/viewer/scripts.js @@ -0,0 +1,62 @@ +const pages = Array.from(document.querySelectorAll('img.image-item')); +let currentPage = 0; + +function changePage(pageNum) { + const previous = pages[currentPage]; + const current = pages[pageNum]; + + if (current == null) { + return; + } + + previous.classList.remove('current'); + current.classList.add('current'); + + currentPage = pageNum; + + const display = document.getElementById('dest'); + display.style.backgroundImage = `url("${current.src}")`; + + document.getElementById('page-num') + .innerText = [ + (pageNum + 1).toLocaleString(), + pages.length.toLocaleString() + ].join('\u200a/\u200a'); +} + +changePage(0); + +document.getElementById('list').onclick = event => { + if (pages.includes(event.target)) { + changePage(pages.indexOf(event.target)); + } +}; + +document.getElementById('image-container').onclick = event => { + const width = document.getElementById('image-container').clientWidth; + const clickPos = event.clientX / width; + + if (clickPos < 0.5) { + changePage(currentPage - 1); + } else { + changePage(currentPage + 1); + } +}; + +document.onkeypress = event => { + switch (event.key.toLowerCase()) { + // Previous Image + case 'arrowleft': + case 'a': + changePage(currentPage - 1); + break; + + // Next Image + case ' ': + case 'enter': + case 'arrowright': + case 'd': + changePage(currentPage + 1); + break; + } +}; \ No newline at end of file diff --git a/nhentai/viewer/styles.css b/nhentai/viewer/styles.css new file mode 100644 index 0000000..f9830dc --- /dev/null +++ b/nhentai/viewer/styles.css @@ -0,0 +1,69 @@ +*, *::after, *::before { + box-sizing: border-box; +} + +img { + vertical-align: middle; +} + +html, body { + display: flex; + background-color: #e8e6e6; + height: 100%; + width: 100%; + padding: 0; + margin: 0; + font-family: sans-serif; +} + +#list { + height: 100%; + overflow: auto; + width: 260px; + text-align: center; +} + +#list img { + width: 200px; + padding: 10px; + border-radius: 10px; + margin: 15px 0; + cursor: pointer; +} + +#list img.current { + background: #0003; +} + +#image-container { + flex: auto; + height: 100vh; + background: #222; + color: #fff; + text-align: center; + cursor: pointer; + -webkit-user-select: none; + user-select: none; + position: relative; +} + +#image-container #dest { + height: 100%; + width: 100%; + background-size: contain; + background-repeat: no-repeat; + background-position: center; +} + +#image-container #page-num { + position: absolute; + font-size: 18pt; + left: 10px; + bottom: 5px; + font-weight: bold; + opacity: 0.75; + text-shadow: /* Duplicate the same shadow to make it very strong */ + 0 0 2px #222, + 0 0 2px #222, + 0 0 2px #222; +} \ No newline at end of file