mirror of
https://github.com/RicterZ/nhentai.git
synced 2025-12-15 13:35:41 +01:00
multithreading download
This commit is contained in:
@@ -1 +1,39 @@
|
||||
__author__ = 'ricter'
|
||||
import re
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from constant import DETAIL_URL
|
||||
|
||||
|
||||
dojinshi_fields = ['Artists:']
|
||||
|
||||
|
||||
def dojinshi_parser(id):
|
||||
if not isinstance(id, (int, )) or (isinstance(id, (str, )) and not id.isdigit()):
|
||||
raise Exception('Dojinshi id(%s) is not valid' % str(id))
|
||||
id = int(id)
|
||||
dojinshi = dict()
|
||||
dojinshi['id'] = id
|
||||
url = '%s/%d/' % (DETAIL_URL, id)
|
||||
|
||||
response = requests.get(url).content
|
||||
html = BeautifulSoup(response)
|
||||
dojinshi_info = html.find('div', attrs={'id': 'info'})
|
||||
|
||||
title = dojinshi_info.find('h1').text
|
||||
subtitle = dojinshi_info.find('h2')
|
||||
|
||||
dojinshi['name'] = title
|
||||
dojinshi['subtitle'] = subtitle.text if subtitle else ''
|
||||
|
||||
pages = 0
|
||||
for _ in dojinshi_info.find_all('div', class_=''):
|
||||
pages = re.search('([\d]+) pages', _.text)
|
||||
if pages:
|
||||
pages = pages.group(1)
|
||||
break
|
||||
dojinshi['pages'] = int(pages)
|
||||
return dojinshi
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print dojinshi_parser(32271)
|
||||
Reference in New Issue
Block a user