Fixing parser for nhentai site update

nhentai's recent site update broke the parser, this fixes it. Based off the work on [my fork here](https://github.com/jwfiredragon/nhentai/commit/8c4a4f02bc0d551b13fd5c9e7da5afe3381c47d3).
2026-05-24 14:26:52 +02:00 · 2020-06-10 22:39:35 -07:00
parent 3db77e0ce3
commit 44c424a321
1 changed files with 4 additions and 6 deletions
@@ -179,11 +179,9 @@ def doujinshi_parser(id_):
    doujinshi['ext'] = ext

    pages = 0
-    for _ in doujinshi_info.find_all('div', class_=''):
-        pages = re.search('([\d]+) pages', _.text)
-        if pages:
-            pages = pages.group(1)
-            break
+    for _ in doujinshi_info.find_all('div', class_='tag-container field-name'):
+        if re.search('Pages:', _.text):
+            pages = _.find('span', class_='name').string
    doujinshi['pages'] = int(pages)

    # gain information of the doujinshi
@@ -192,7 +190,7 @@ def doujinshi_parser(id_):
    for field in information_fields:
        field_name = field.contents[0].strip().strip(':')
        if field_name in needed_fields:
-            data = [sub_field.contents[0].strip() for sub_field in
+            data = [sub_field.find('span', attrs={'class': 'name'}).contents[0].strip() for sub_field in
                    field.find_all('a', attrs={'class': 'tag'})]
            doujinshi[field_name.lower()] = ', '.join(data)