You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

74 lines
2.4 KiB

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from urllib.request import urlretrieve
from sqlite3 import Error
import numpy as np
import tweepy
import os
CONSUMER_KEY = 'INSERT HERE'
CONSUMER_SECRET = 'INSERT HERE'
ACCESS_KEY = 'INSERT HERE'
ACCESS_SECRET = 'INSERT HERE'
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
twitter_API = tweepy.API(auth)
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
api = tweepy.API(auth)
def scraper3():
url = "https://suchwow.xyz/"
imageroot = r'IMAGE FOLDER PATH HERE'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
results = {}
def get_fullsize_path(thumbnail):
link_path_split = thumbnail.split(".")
link_path_split.pop(-2)
return '.'.join(link_path_split)
def save_image(imageurl, filename):
if os.path.isfile(os.path.join(imageroot, filename)):
raise FileExistsError(os.path.join(imageroot, filename))
return urlretrieve(urljoin(url, imageurl), os.path.join(imageroot, filename))
for card in soup.select('div.card'):
thumbnail = card.img['src']
imagepath = get_fullsize_path(thumbnail)
imagename = imagepath.split('/')[-1]
# title is in the first p element of the card
# this seems consistent though this can be made more robust with .select_one('p.title')
title = card.select_one('p.title').get_text().strip()
submitter = card.select_one('p.subtitle').get_text().strip()
# numerical id of the post
postid = int(card.a['href'].split('/')[-1])
try:
size = save_image(imagepath, imagename)
except FileExistsError:
continue # immediately jump to the next iteration of the containing loop
except Exception:
continue
tweetid = api.update_with_media(status=title + " | Credits to: " + submitter + " | #Wownero $WOW #wow #cryptocurrency #privacy #memecoin #doge #shitcoin ", filename=os.path.join(imageroot, imagename))
print("Tweet sent!")
results[postid] = dict(
postid=postid,
imagename=imagename,
title=title,
submitter=submitter,
size=size,
tweetid=tweetid
)
return results
scraper3()