From 73849e0d10bceda6cbf2ee3cebc6b469fa720cd6 Mon Sep 17 00:00:00 2001 From: Ricter Z Date: Mon, 15 Feb 2016 23:28:49 +0800 Subject: [PATCH] feature: more detail of doujinshi --- nhentai/doujinshi.py | 21 ++++++++++++++++++--- nhentai/parser.py | 12 ++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/nhentai/doujinshi.py b/nhentai/doujinshi.py index 164edaa..6e0436b 100644 --- a/nhentai/doujinshi.py +++ b/nhentai/doujinshi.py @@ -5,16 +5,27 @@ from constant import DETAIL_URL, IMAGE_URL from logger import logger +class DoujinshiInfo(dict): + def __init__(self, **kwargs): + super(DoujinshiInfo, self).__init__(**kwargs) + + def __getattr__(self, item): + try: + return dict.__getitem__(self, item) + except KeyError: + return '' + + class Doujinshi(object): - def __init__(self, name=None, subtitle=None, id=None, img_id=None, ext='jpg', pages=0): + def __init__(self, name=None, id=None, img_id=None, ext='jpg', pages=0, **kwargs): self.name = name - self.subtitle = subtitle self.id = id self.img_id = img_id self.ext = ext self.pages = pages self.downloader = None self.url = '%s/%d' % (DETAIL_URL, self.id) + self.info = DoujinshiInfo(**kwargs) def __repr__(self): return ''.format(self.name) @@ -22,7 +33,11 @@ class Doujinshi(object): def show(self): table = [ ["Doujinshi", self.name], - ["Subtitle", self.subtitle], + ["Subtitle", self.info.subtitle], + ["Characters", self.info.characters], + ["Authors", self.info.artists], + ["Language", self.info.language], + ["Tags", self.info.tags], ["URL", self.url], ["Pages", self.pages], ] diff --git a/nhentai/parser.py b/nhentai/parser.py index cb19dfc..f068670 100644 --- a/nhentai/parser.py +++ b/nhentai/parser.py @@ -37,6 +37,7 @@ def doujinshi_parser(id_): title = doujinshi_info.find('h1').text subtitle = doujinshi_info.find('h2') + doujinshi['name'] = title doujinshi['subtitle'] = subtitle.text if subtitle else '' @@ -55,6 +56,17 @@ def doujinshi_parser(id_): pages = pages.group(1) break doujinshi['pages'] = int(pages) + + # gain information of the doujinshi + information_fields = doujinshi_info.find_all('div', attrs={'class': 'field-name'}) + needed_fields = ['Characters', 'Artists', 'Language', 'Tags'] + for field in information_fields: + field_name = field.contents[0].strip().strip(':') + if field_name in needed_fields: + data = [sub_field.contents[0].strip() for sub_field in + field.find_all('a', attrs={'class': 'tag'})] + doujinshi[field_name.lower()] = ', '.join(data) + return doujinshi