Compare commits

..

6 Commits

Author SHA1 Message Date
d1d0c22af8 fix #349 2025-01-11 08:34:30 +08:00
803957ba88 fix #349 2025-01-11 08:33:59 +08:00
13b584a820 fix #371 and #324 2025-01-11 08:02:36 +08:00
be08fcf4cb fix #368 2025-01-11 07:54:28 +08:00
b585225308 fix #370 2025-01-11 07:52:51 +08:00
54af682848 fix #369 2025-01-11 07:50:41 +08:00
7 changed files with 35 additions and 17 deletions

View File

@ -129,7 +129,7 @@ Download your favorites with delay:
.. code-block:: bash .. code-block:: bash
nhentai --favorites --download --delay 1 nhentai --favorites --download --delay 1 --page 3-5,7
Format output doujinshi folder name: Format output doujinshi folder name:

View File

@ -79,7 +79,7 @@ def cmd_parser():
# page options # page options
parser.add_option('--page-all', dest='page_all', action='store_true', default=False, parser.add_option('--page-all', dest='page_all', action='store_true', default=False,
help='all search results') help='all search results')
parser.add_option('--page', '--page-range', type='string', dest='page', action='store', default='1', parser.add_option('--page', '--page-range', type='string', dest='page', action='store',
help='page number of search results. e.g. 1,2-5,14') help='page number of search results. e.g. 1,2-5,14')
parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular', parser.add_option('--sorting', '--sort', dest='sorting', action='store', default='popular',
help='sorting of doujinshi (recent / popular / popular-[today|week])', help='sorting of doujinshi (recent / popular / popular-[today|week])',

View File

@ -48,7 +48,7 @@ def main():
if not options.is_download: if not options.is_download:
logger.warning('You do not specify --download option') logger.warning('You do not specify --download option')
doujinshis = favorites_parser() if options.page_all else favorites_parser(page=page_list) doujinshis = favorites_parser(page=page_list) if options.page else favorites_parser()
elif options.keyword: elif options.keyword:
if constant.CONFIG['language']: if constant.CONFIG['language']:

View File

@ -38,8 +38,12 @@ FAV_URL = f'{BASE_URL}/favorites/'
IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries' IMAGE_URL = f'{urlparse(BASE_URL).scheme}://i.{urlparse(BASE_URL).hostname}/galleries'
IMAGE_URL_MIRRORS = [ IMAGE_URL_MIRRORS = [
f'{urlparse(BASE_URL).scheme}://i1.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i2.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}', f'{urlparse(BASE_URL).scheme}://i3.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i4.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}', f'{urlparse(BASE_URL).scheme}://i5.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i6.{urlparse(BASE_URL).hostname}',
f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}', f'{urlparse(BASE_URL).scheme}://i7.{urlparse(BASE_URL).hostname}',
] ]

View File

@ -47,7 +47,10 @@ class Downloader(Singleton):
for completed_task in asyncio.as_completed(tasks): for completed_task in asyncio.as_completed(tasks):
try: try:
result = await completed_task result = await completed_task
logger.info(f'{result[1]} download completed') if result[1]:
logger.info(f'{result[1]} download completed')
else:
logger.warning(f'{result[1]} download failed, return value {result[0]}')
except Exception as e: except Exception as e:
logger.error(f'An error occurred: {e}') logger.error(f'An error occurred: {e}')
@ -85,11 +88,11 @@ class Downloader(Singleton):
if not await self.save(filename, response): if not await self.save(filename, response):
logger.error(f'Can not download image {url}') logger.error(f'Can not download image {url}')
return 1, None return 1, url
except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e: except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError) as e:
if retried < 3: if retried < 3:
logger.info(f'Download {filename} failed, retrying({retried + 1}) times...') logger.warning(f'Download {filename} failed, retrying({retried + 1}) times...')
return await self.download( return await self.download(
url=url, url=url,
folder=folder, folder=folder,
@ -98,7 +101,8 @@ class Downloader(Singleton):
proxy=proxy, proxy=proxy,
) )
else: else:
return 0, None logger.warning(f'Download {filename} failed with 3 times retried, skipped')
return 0, url
except NHentaiImageNotExistException as e: except NHentaiImageNotExistException as e:
os.remove(save_file_path) os.remove(save_file_path)
@ -110,10 +114,10 @@ class Downloader(Singleton):
logger.error(f"Exception type: {type(e)}") logger.error(f"Exception type: {type(e)}")
traceback.print_stack() traceback.print_stack()
logger.critical(str(e)) logger.critical(str(e))
return 0, None return 0, url
except KeyboardInterrupt: except KeyboardInterrupt:
return -3, None return -3, url
return 1, url return 1, url
@ -152,6 +156,7 @@ class Downloader(Singleton):
return True return True
digit_length = len(str(len(queue))) digit_length = len(str(len(queue)))
logger.info(f'Total download pages: {len(queue)}')
coroutines = [ coroutines = [
self._semaphore_download(url, filename=os.path.basename(urlparse(url).path), length=digit_length) self._semaphore_download(url, filename=os.path.basename(urlparse(url).path), length=digit_length)
for url in queue for url in queue

View File

@ -151,13 +151,18 @@ def doujinshi_parser(id_, counter=0):
doujinshi['favorite_counts'] = favorite_counts.strip() doujinshi['favorite_counts'] = favorite_counts.strip()
doujinshi_cover = html.find('div', attrs={'id': 'cover'}) doujinshi_cover = html.find('div', attrs={'id': 'cover'})
img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$', # img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif|webp)$',
doujinshi_cover.a.img.attrs['data-src']) # doujinshi_cover.a.img.attrs['data-src'])
img_id = re.search(r'/galleries/(\d+)/cover\.\w+$', doujinshi_cover.a.img.attrs['data-src'])
ext = [] ext = []
for i in html.find_all('div', attrs={'class': 'thumb-container'}): for i in html.find_all('div', attrs={'class': 'thumb-container'}):
_, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1) base_name = os.path.basename(i.img.attrs['data-src'])
ext.append(ext_name) ext_name = base_name.split('.')
if len(ext_name) == 3:
ext.append(ext_name[1])
else:
ext.append(ext_name[-1])
if not img_id: if not img_id:
logger.critical(f'Tried yo get image id failed of id: {id_}') logger.critical(f'Tried yo get image id failed of id: {id_}')

View File

@ -18,6 +18,7 @@ from nhentai.logger import logger
from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database from nhentai.serializer import serialize_json, serialize_comic_xml, set_js_database
MAX_FIELD_LENGTH = 100 MAX_FIELD_LENGTH = 100
EXTENSIONS = ('.png', '.jpg', '.jpeg', '.gif', '.webp')
def request(method, url, **kwargs): def request(method, url, **kwargs):
@ -100,8 +101,8 @@ def parse_doujinshi_obj(
) -> Tuple[str, str]: ) -> Tuple[str, str]:
filename = f'./doujinshi.{file_type}' filename = f'./doujinshi.{file_type}'
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
if doujinshi_obj is not None: if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
_filename = f'{doujinshi_obj.filename}.{file_type}' _filename = f'{doujinshi_obj.filename}.{file_type}'
if file_type == 'cbz': if file_type == 'cbz':
@ -111,6 +112,8 @@ def parse_doujinshi_obj(
_filename = _filename.replace('/', '-') _filename = _filename.replace('/', '-')
filename = os.path.join(output_dir, _filename) filename = os.path.join(output_dir, _filename)
else:
doujinshi_dir = './'
return doujinshi_dir, filename return doujinshi_dir, filename
@ -130,7 +133,7 @@ def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
file_list.sort() file_list.sort()
for image in file_list: for image in file_list:
if not os.path.splitext(image)[1] in ('.jpg', '.png', '.webp'): if not os.path.splitext(image)[1] in EXTENSIONS:
continue continue
image_html += f'<img src="{image}" class="image-item"/>\n' image_html += f'<img src="{image}" class="image-item"/>\n'
@ -256,7 +259,7 @@ def generate_doc(file_type='', output_dir='.', doujinshi_obj=None, regenerate=Fa
import img2pdf import img2pdf
"""Write images to a PDF file using img2pdf.""" """Write images to a PDF file using img2pdf."""
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))] file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(EXTENSIONS)]
file_list.sort() file_list.sort()
logger.info(f'Writing PDF file to path: {filename}') logger.info(f'Writing PDF file to path: {filename}')
@ -308,7 +311,8 @@ def signal_handler(_signal, _frame):
def paging(page_string): def paging(page_string):
# 1,3-5,14 -> [1, 3, 4, 5, 14] # 1,3-5,14 -> [1, 3, 4, 5, 14]
if not page_string: if not page_string:
return [] # default, the first page
return [1]
page_list = [] page_list = []
for i in page_string.split(','): for i in page_string.split(','):