mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-04-19 18:31:24 +02:00
39 lines
1019 B
Python
39 lines
1019 B
Python
import re
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from constant import DETAIL_URL
|
|
|
|
|
|
dojinshi_fields = ['Artists:']
|
|
|
|
|
|
def dojinshi_parser(id):
|
|
if not isinstance(id, (int, )) or (isinstance(id, (str, )) and not id.isdigit()):
|
|
raise Exception('Dojinshi id(%s) is not valid' % str(id))
|
|
id = int(id)
|
|
dojinshi = dict()
|
|
dojinshi['id'] = id
|
|
url = '%s/%d/' % (DETAIL_URL, id)
|
|
|
|
response = requests.get(url).content
|
|
html = BeautifulSoup(response)
|
|
dojinshi_info = html.find('div', attrs={'id': 'info'})
|
|
|
|
title = dojinshi_info.find('h1').text
|
|
subtitle = dojinshi_info.find('h2')
|
|
|
|
dojinshi['name'] = title
|
|
dojinshi['subtitle'] = subtitle.text if subtitle else ''
|
|
|
|
pages = 0
|
|
for _ in dojinshi_info.find_all('div', class_=''):
|
|
pages = re.search('([\d]+) pages', _.text)
|
|
if pages:
|
|
pages = pages.group(1)
|
|
break
|
|
dojinshi['pages'] = int(pages)
|
|
return dojinshi
|
|
|
|
|
|
if __name__ == '__main__':
|
|
print dojinshi_parser(32271) |