From 44c424a3215e6c9e2ae1b6ef1b4e290763c190cd Mon Sep 17 00:00:00 2001 From: jwfiredragon Date: Wed, 10 Jun 2020 22:39:35 -0700 Subject: [PATCH] Fixing parser for nhentai site update nhentai's recent site update broke the parser, this fixes it. Based off the work on [my fork here](https://github.com/jwfiredragon/nhentai/commit/8c4a4f02bc0d551b13fd5c9e7da5afe3381c47d3). --- nhentai/parser.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/nhentai/parser.py b/nhentai/parser.py index 62224d5..97c1b15 100644 --- a/nhentai/parser.py +++ b/nhentai/parser.py @@ -179,11 +179,9 @@ def doujinshi_parser(id_): doujinshi['ext'] = ext pages = 0 - for _ in doujinshi_info.find_all('div', class_=''): - pages = re.search('([\d]+) pages', _.text) - if pages: - pages = pages.group(1) - break + for _ in doujinshi_info.find_all('div', class_='tag-container field-name'): + if re.search('Pages:', _.text): + pages = _.find('span', class_='name').string doujinshi['pages'] = int(pages) # gain information of the doujinshi @@ -192,7 +190,7 @@ def doujinshi_parser(id_): for field in information_fields: field_name = field.contents[0].strip().strip(':') if field_name in needed_fields: - data = [sub_field.contents[0].strip() for sub_field in + data = [sub_field.find('span', attrs={'class': 'name'}).contents[0].strip() for sub_field in field.find_all('a', attrs={'class': 'tag'})] doujinshi[field_name.lower()] = ', '.join(data)