Fixing parser for nhentai site update

nhentai's recent site update broke the parser, this fixes it. Based off the work on [my fork here](8c4a4f02bc).
This commit is contained in:
jwfiredragon 2020-06-10 22:39:35 -07:00 committed by GitHub
parent 3db77e0ce3
commit 44c424a321
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -179,11 +179,9 @@ def doujinshi_parser(id_):
doujinshi['ext'] = ext
pages = 0
for _ in doujinshi_info.find_all('div', class_=''):
pages = re.search('([\d]+) pages', _.text)
if pages:
pages = pages.group(1)
break
for _ in doujinshi_info.find_all('div', class_='tag-container field-name'):
if re.search('Pages:', _.text):
pages = _.find('span', class_='name').string
doujinshi['pages'] = int(pages)
# gain information of the doujinshi
@ -192,7 +190,7 @@ def doujinshi_parser(id_):
for field in information_fields:
field_name = field.contents[0].strip().strip(':')
if field_name in needed_fields:
data = [sub_field.contents[0].strip() for sub_field in
data = [sub_field.find('span', attrs={'class': 'name'}).contents[0].strip() for sub_field in
field.find_all('a', attrs={'class': 'tag'})]
doujinshi[field_name.lower()] = ', '.join(data)