|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from urllib.parse import urljoin
|
|
|
|
from urllib.request import urlretrieve
|
|
|
|
from sqlite3 import Error
|
|
|
|
import numpy as np
|
|
|
|
import tweepy
|
|
|
|
import os
|
|
|
|
|
|
|
|
CONSUMER_KEY = 'INSERT HERE'
|
|
|
|
CONSUMER_SECRET = 'INSERT HERE'
|
|
|
|
ACCESS_KEY = 'INSERT HERE'
|
|
|
|
ACCESS_SECRET = 'INSERT HERE'
|
|
|
|
|
|
|
|
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
|
|
|
|
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
|
|
|
|
|
|
|
|
twitter_API = tweepy.API(auth)
|
|
|
|
|
|
|
|
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
|
|
|
|
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
|
|
|
|
|
|
|
|
api = tweepy.API(auth)
|
|
|
|
|
|
|
|
def scraper3():
|
|
|
|
url = "https://suchwow.xyz/"
|
|
|
|
imageroot = r'IMAGE FOLDER PATH HERE'
|
|
|
|
page = requests.get(url)
|
|
|
|
soup = BeautifulSoup(page.content, 'html.parser')
|
|
|
|
|
|
|
|
results = {}
|
|
|
|
|
|
|
|
def get_fullsize_path(thumbnail):
|
|
|
|
link_path_split = thumbnail.split(".")
|
|
|
|
link_path_split.pop(-2)
|
|
|
|
return '.'.join(link_path_split)
|
|
|
|
|
|
|
|
def save_image(imageurl, filename):
|
|
|
|
if os.path.isfile(os.path.join(imageroot, filename)):
|
|
|
|
raise FileExistsError(os.path.join(imageroot, filename))
|
|
|
|
return urlretrieve(urljoin(url, imageurl), os.path.join(imageroot, filename))
|
|
|
|
|
|
|
|
for card in soup.select('div.card'):
|
|
|
|
thumbnail = card.img['src']
|
|
|
|
imagepath = get_fullsize_path(thumbnail)
|
|
|
|
imagename = imagepath.split('/')[-1]
|
|
|
|
# title is in the first p element of the card
|
|
|
|
# this seems consistent though this can be made more robust with .select_one('p.title')
|
|
|
|
title = card.select_one('p.title').get_text().strip()
|
|
|
|
submitter = card.select_one('p.subtitle').get_text().strip()
|
|
|
|
# numerical id of the post
|
|
|
|
postid = int(card.a['href'].split('/')[-1])
|
|
|
|
try:
|
|
|
|
size = save_image(imagepath, imagename)
|
|
|
|
except FileExistsError:
|
|
|
|
continue # immediately jump to the next iteration of the containing loop
|
|
|
|
except Exception:
|
|
|
|
continue
|
|
|
|
|
|
|
|
tweetid = api.update_with_media(status=title + " | Credits to: " + submitter + " | #Wownero $WOW #wow #cryptocurrency #privacy #memecoin #doge #shitcoin ", filename=os.path.join(imageroot, imagename))
|
|
|
|
print("Tweet sent!")
|
|
|
|
|
|
|
|
results[postid] = dict(
|
|
|
|
postid=postid,
|
|
|
|
imagename=imagename,
|
|
|
|
title=title,
|
|
|
|
submitter=submitter,
|
|
|
|
size=size,
|
|
|
|
tweetid=tweetid
|
|
|
|
)
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
scraper3()
|