from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from bs4 import BeautifulSoup import asyncio import requests import os import time def scroll_and_scrape(driver, url): driver.get("https://www.newgrounds.com/") print("Please login to your Newgrounds account and press enter to continue.") input() driver.get(url) print("Scrolling down the page to load all the movies...") body = driver.find_element(By.CSS_SELECTOR, 'body') for _ in range(30): body.send_keys(Keys.PAGE_DOWN) time.sleep(0.2) page_content = driver.page_source return page_content def find_links(page_content, class_name): soup = BeautifulSoup(page_content, 'html.parser') links = soup.find_all('a', class_=class_name) for i in range(links.__len__()): print(i+1, links[i]['href']) return links def find_src(driver, link, class_name): driver.get(link) video_element = driver.find_element(By.ID, class_name) if video_element is None: return None try: video_element.click() except: return None time.sleep(1) soup = BeautifulSoup(driver.page_source, 'html.parser') links = soup.find_all('source', type='video/mp4') if ("uploads" not in links[0]["src"]): return None src = links[0]['src'] src = src.split('?')[0] return src async def download_video(location, url): name = url.split('/')[-1] response = requests.get(url) with open(location + '/' + name, 'wb') as f: f.write(response.content) async def main(): movies_url = 'https://derpixon.newgrounds.com/movies' folder_name = movies_url.split('.')[0].replace('https://','') os.makedirs(folder_name, exist_ok=True) driver = webdriver.Chrome() page_content = scroll_and_scrape(driver, movies_url) print("Scraping movie links...") links = find_links(page_content, 'inline-card-portalsubmission') print("Scraping the video source & downloading the videos in background...\n") dl_tasks = [] for i in range(links.__len__()): time.sleep(3) print("\033[AVideo ", i+1, " of ", links.__len__()) video_url = find_src(driver, links[i]['href'], 'ng-global-video-player') if video_url is not None: dl_tasks.append(asyncio.create_task(download_video(folder_name, video_url))) driver.quit() print("Downloading ", dl_tasks.__len__, " videos...") await asyncio.gather(*dl_tasks) print("Done.") if __name__ == "__main__": asyncio.run(main())