import requests from bs4 import BeautifulSoup from urllib.parse import urljoin from urllib.request import urlretrieve from sqlite3 import Error import numpy as np import tweepy import os CONSUMER_KEY = 'INSERT HERE' CONSUMER_SECRET = 'INSERT HERE' ACCESS_KEY = 'INSERT HERE' ACCESS_SECRET = 'INSERT HERE' auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) twitter_API = tweepy.API(auth) auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) api = tweepy.API(auth) def scraper3(): url = "https://suchwow.xyz/" imageroot = r'IMAGE FOLDER PATH HERE' page = requests.get(url) soup = BeautifulSoup(page.content, 'html.parser') results = {} def get_fullsize_path(thumbnail): link_path_split = thumbnail.split(".") link_path_split.pop(-2) return '.'.join(link_path_split) def save_image(imageurl, filename): if os.path.isfile(os.path.join(imageroot, filename)): raise FileExistsError(os.path.join(imageroot, filename)) return urlretrieve(urljoin(url, imageurl), os.path.join(imageroot, filename)) for card in soup.select('div.card'): thumbnail = card.img['src'] imagepath = get_fullsize_path(thumbnail) imagename = imagepath.split('/')[-1] # title is in the first p element of the card # this seems consistent though this can be made more robust with .select_one('p.title') title = card.select_one('p.title').get_text().strip() submitter = card.select_one('p.subtitle').get_text().strip() # numerical id of the post postid = int(card.a['href'].split('/')[-1]) try: size = save_image(imagepath, imagename) except FileExistsError: continue # immediately jump to the next iteration of the containing loop except Exception: continue tweetid = api.update_with_media(status=title + " | Credits to: " + submitter + " | #Wownero $WOW #wow #cryptocurrency #privacy #memecoin #doge #shitcoin ", filename=os.path.join(imageroot, imagename)) print("Tweet sent!") results[postid] = dict( postid=postid, imagename=imagename, title=title, submitter=submitter, size=size, tweetid=tweetid ) return results scraper3()