twitter-bot/twitterbot.py

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from urllib.request import urlretrieve
from sqlite3 import Error
import numpy as np
import tweepy
import os

CONSUMER_KEY = 'INSERT HERE'
CONSUMER_SECRET = 'INSERT HERE'
ACCESS_KEY = 'INSERT HERE'
ACCESS_SECRET = 'INSERT HERE'

auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)

twitter_API = tweepy.API(auth)

auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)

api = tweepy.API(auth)

def scraper3():
    url = "https://suchwow.xyz/"
    imageroot = r'IMAGE FOLDER PATH HERE'
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')

    results = {}

    def get_fullsize_path(thumbnail):
        link_path_split = thumbnail.split(".")
        link_path_split.pop(-2)
        return '.'.join(link_path_split)

    def save_image(imageurl, filename):
        if os.path.isfile(os.path.join(imageroot, filename)):
            raise FileExistsError(os.path.join(imageroot, filename))
        return urlretrieve(urljoin(url, imageurl), os.path.join(imageroot, filename))

    for card in soup.select('div.card'):
        thumbnail = card.img['src']
        imagepath = get_fullsize_path(thumbnail)
        imagename = imagepath.split('/')[-1]
        # title is in the first p element of the card
        # this seems consistent though this can be made more robust with .select_one('p.title')
        title = card.select_one('p.title').get_text().strip()
        submitter = card.select_one('p.subtitle').get_text().strip()
        # numerical id of the post
        postid = int(card.a['href'].split('/')[-1])
        try:
            size = save_image(imagepath, imagename)
        except FileExistsError:
            continue  # immediately jump to the next iteration of the containing loop
        except Exception:
            continue

        tweetid = api.update_with_media(status=title + " | Credits to: " + submitter + " | #Wownero $WOW #wow #cryptocurrency #privacy #memecoin #doge #shitcoin ", filename=os.path.join(imageroot, imagename))
        print("Tweet sent!")

        results[postid] = dict(
            postid=postid,
            imagename=imagename,
            title=title,
            submitter=submitter,
            size=size,
            tweetid=tweetid
        )

    return results

scraper3()