#Scrapes kiwifarms.net threads for posts above a certain ratings threshold. #This filter can be set by the user, with several availible options. #REQUIRED PACKEGES: bs4, requests from bs4 import BeautifulSoup import enum from postData import PostData from prepOutput import outputSelect from funcs import getThread, stop_page, getFilter, reactionSelect, getRatingsThreshold, getUsername from collectPosts import collectPosts class Filter(enum.Enum):#enumeration for type of filter NA = 0 #not yet assigned positive = 1 #positive ratings negative = 2 #negative ratings neutral = 3 #neutral ratings total = 4 #total ratings specific = 5 #specific rating weighted = 6 #weighted rating user = 7 #specific user ###### Variables ###### #thread data threadLink = ""#link to thread threadTitle = ""#title of thread pageData = ""#BeautifulSoup object storing threadpage's HTML savedPosts = [] #stores PostData objects for posts to be saved pageNum = 0 #current page number usernameFilter = "" #username to filter by when applicable #filter toggles filterSelection = Filter.NA #type of filter being used ratingSelection = "" #specific reaction to sort by when sorting by specific reaction minRating = 0 #minimum number for ratings/score for filter stopPage = 0 #page number to stop searching at, 0 means no page specified ###################### #collect link to thread print("Welcome to the Kiwi Scraper!\n") print("Please provide the link to the thread you want analyzed below.") print("\nPlease note that this program will start searching at the first threadpage that you link to,", "so if you'd like the thread analyzed starting at the frst page, please link to the first page of the thread; otherwise,", "provide a link to the first page you want scraped.") pageData, threadTitle, threadLink = getThread() print("------------------------------------------\nThread:", threadTitle) print("------------------------------------------") #check if user would like to stop at a certain page stopPage = stop_page() #collect user peference for filtering #collect filter type filterSelection = Filter(getFilter()) if filterSelection == Filter.specific:#if filtering by specific rating ask which rating to use ratingSelection = reactionSelect() #if filtering by specific user, collect username if filterSelection == Filter.user: usernameFilter = getUsername() #colect ratings threshold if user is filtering by ratings print("------------------------------------------") if filterSelection == Filter.specific: minRating = getRatingsThreshold(filterSelection, ratingSelection) elif not filterSelection == Filter.user: minRating = getRatingsThreshold(filterSelection) #get current page number try: pageNum = int(pageData.find("li", class_ = "pageNav-page--current").find("a").text) except: pageNum = 1 #scrape for posts print("\nGrab some popcorn, this might take a while...\n") savedPosts = collectPosts(pageData = pageData, filter = filterSelection, pageNum = pageNum, minRating = minRating, user = usernameFilter, stopPage = stopPage, ratingFilter=ratingSelection) # ask user where to save json file and what to name it print("------------------------------------------") outputSelect(savedPosts)