Fixing parser for nhentai site update

nhentai's recent site update broke the parser, this fixes it. Based off the work on [my fork here](8c4a4f02bc).
This commit is contained in:
jwfiredragon 2020-06-10 22:39:35 -07:00 committed by GitHub
parent 3db77e0ce3
commit 44c424a321
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -179,11 +179,9 @@ def doujinshi_parser(id_):
doujinshi['ext'] = ext doujinshi['ext'] = ext
pages = 0 pages = 0
for _ in doujinshi_info.find_all('div', class_=''): for _ in doujinshi_info.find_all('div', class_='tag-container field-name'):
pages = re.search('([\d]+) pages', _.text) if re.search('Pages:', _.text):
if pages: pages = _.find('span', class_='name').string
pages = pages.group(1)
break
doujinshi['pages'] = int(pages) doujinshi['pages'] = int(pages)
# gain information of the doujinshi # gain information of the doujinshi
@ -192,7 +190,7 @@ def doujinshi_parser(id_):
for field in information_fields: for field in information_fields:
field_name = field.contents[0].strip().strip(':') field_name = field.contents[0].strip().strip(':')
if field_name in needed_fields: if field_name in needed_fields:
data = [sub_field.contents[0].strip() for sub_field in data = [sub_field.find('span', attrs={'class': 'name'}).contents[0].strip() for sub_field in
field.find_all('a', attrs={'class': 'tag'})] field.find_all('a', attrs={'class': 'tag'})]
doujinshi[field_name.lower()] = ', '.join(data) doujinshi[field_name.lower()] = ', '.join(data)