Kiwi_Scraper/scraper/scraper.py

#Scrapes kiwifarms.net threads for posts above a certain ratings threshold.
#This filter can be set by the user, with several availible options.

#REQUIRED PACKEGES: bs4, requests
from bs4 import BeautifulSoup
import enum
from postData import PostData
from prepOutput import outputSelect
from funcs import getThread, stop_page, getFilter, reactionSelect, getRatingsThreshold, getUsername
from collectPosts import collectPosts

class Filter(enum.Enum):#enumeration for type of filter
    NA = 0 #not yet assigned
    positive = 1 #positive ratings
    negative = 2 #negative ratings
    neutral = 3 #neutral ratings
    total = 4 #total ratings
    specific = 5 #specific rating
    weighted = 6 #weighted rating
    user = 7 #specific user

###### Variables ######
#thread data
threadLink = ""#link to thread
threadTitle = ""#title of thread
pageData = ""#BeautifulSoup object storing threadpage's HTML
savedPosts = [] #stores PostData objects for posts to be saved
pageNum = 0 #current page number
usernameFilter = "" #username to filter by when applicable

#filter toggles
filterSelection = Filter.NA #type of filter being used
ratingSelection = "" #specific reaction to sort by when sorting by specific reaction
minRating = 0 #minimum number for ratings/score for filter
stopPage = 0 #page number to stop searching at, 0 means no page specified

######################

#collect link to thread
print("Welcome to the Kiwi Scraper!\n")
print("Please provide the link to the thread you want analyzed below.")
print("\nPlease note that this program will start searching at the first threadpage that you link to,",
    "so if you'd like the thread analyzed starting at the frst page, please link to the first page of the thread; otherwise,",
    "provide a link to the first page you want scraped.")

pageData, threadTitle, threadLink = getThread()

print("------------------------------------------\nThread:", threadTitle)
print("------------------------------------------")

#check if user would like to stop at a certain page
stopPage = stop_page()

#collect user peference for filtering
#collect filter type
filterSelection = Filter(getFilter())

if filterSelection == Filter.specific:#if filtering by specific rating ask which rating to use
    ratingSelection = reactionSelect()

#if filtering by specific user, collect username
if filterSelection == Filter.user: usernameFilter = getUsername()

#colect ratings threshold if user is filtering by ratings
print("------------------------------------------")
if filterSelection == Filter.specific: minRating = getRatingsThreshold(filterSelection, ratingSelection)
elif not filterSelection == Filter.user: minRating = getRatingsThreshold(filterSelection)

#get current page number
try: pageNum = int(pageData.find("li", class_ = "pageNav-page--current").find("a").text)
except: pageNum = 1

#scrape for posts
print("\nGrab some popcorn, this might take a while...\n")
savedPosts = collectPosts(pageData = pageData, filter = filterSelection, pageNum = pageNum, minRating = minRating, user = usernameFilter, stopPage = stopPage, ratingFilter=ratingSelection)

# ask user where to save json file and what to name it
print("------------------------------------------")

outputSelect(savedPosts)