Files

147 lines
5.2 KiB
Python

#colection of functions used by scraper.py along with some others
from bs4 import BeautifulSoup
import requests
############# localy used functions ###########################
def collectInput(range):
#collects user input for multiple choice questions, validates it, and returns response
#
#range = upper range for input
choice = input(": ")
while not choice.isdigit() or not (0 < int(choice) <= range):
print(f"Error: Input must be a number between 1 and {range}. Try again.")
choice = input(": ")
return int(choice)
def yesno(question):
#collects and verifies yes/no input; returns true if yes, false if no
#
#question = question to ask
choice = input(question)
while not(choice.lower() == "y" or choice.lower() == "n"):
print("Invalid input. Try again.")
choice = input(question)
if choice.lower() == "y":
return True
else:
return False
##################### functions used outside file ########################
def getThread():
#creates BeautifulSoup object from threadLink and retrieves thread's title and URL
# returns BS object, thread title, and link to thread
pageData = "" #beautiful soup object from threadLink
threadTitle = "" #title of thread from threadLink
threadLink = "" #link to thread
while True:#make sure provided link is valid by checking for a thread title
try:
threadLink = input("\n: ")
print("\nFetching thread...")
pageData = requests.get(threadLink, headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'})
pageData = BeautifulSoup(pageData.text, "lxml")
threadTitle = pageData.find("h1", class_ = "p-title-value").text
if not pageData.find("span", class_ = "label") == None:#remove label if it exists
threadTitle = " ".join(threadTitle.split("\xa0")[1:])
break
except:
print("Error: There was either trouble reaching the webpage, or the provided link is invalid.\nTry again.")
return pageData, threadTitle, threadLink
def stop_page():
#collect page number to stop at if user has a preference
#returns page number to stop at or 0 if user wants all pages archived
stopPage = 0 #page to stop at
ui = yesno("Would you like to stop at a certain page (y/n)?: ")
if ui:
stopPage = input("What page do you want to stop at?: ")
while not stopPage.isdigit():
print("Input must be a number. Try again.")
stopPage = input("What page do you want to stop at?: ")
stopPage = int(stopPage)
return stopPage
def getFilter():
#gets user's selection of filter to use
#returns numerical choice of filter
print("""------------------------------------------
How would you like the thread to be filtered?
(1) positive ratings
(2) negative ratings
(3) neutral ratings
(4) total ratings
(5) specific rating
(6) weighted score (positive ratings count as positive points, negative ratings count as negative points, and neutral ratings don't count)
(7) specific user
""")
return collectInput(7)
def reactionSelect():
#allows user to select a reaction from a numbered list
#returns that reaction as a string
print("""------------------------------------------
Which rating do you want filtered?
(1) Like (2) Dislike
(3) Agree (4) Disagree
(5) Winner (6) Informative
(7) Thunk-Provoking (8) Feels
(9) Islamic Content (10) Lunacy
(11) Autistic (12) Horrifying
(13) Optimistic (14) TMI
(15) Late (16) Dumb
(17) Mad at the Internet (18) Semper Fidelis
(19) Deviant (20) Achievement
(21) DRINK!
""")
choice = collectInput(21)
return ["Like", "Dislike", "Agree", "Disagree", "Winner", "Informative", "Thunk-Provoking", "Feels", "Islamic Content", "Lunacy", "Autistic",
"Horrifying", "Optimistic", "TMI", "Late", "Dumb", "Mad at the Internet", "Semper Fidelis", "Deviant", "Achievement", "DRINK!"][choice - 1]
def getUsername():
#collects username from user input
user = input("Enter username to filter by: ")#user input for name to filter by
#verify choice with user
if yesno(f"User '{user}' selected. Is this correct? (Make sure you spelled it correctly!)\n: ") == False:
return getUsername()
return user
def getRatingsThreshold(filter, reaction = ""):
#collects ratings threshold based on filter value
#
#filter = Filter enum (from scraper.py)
#reation = reaction to filter by if filtering by specific reaction
#
#returns user's designated ratings threshold
minRating = 0 #ratings threshold to return
if filter.value == 6: #weighted score
print("Enter a minimum score for posts.")
elif filter.value == 5: #specific reaction
print(f"Enter a minumum number of \'{reaction}\' ratings.")
else: #other filters
print(f"Enter a minumum number of {filter.name} ratings.")
minRating = input(": ")
while not minRating.isdigit():
print("Error: Input must be a number.")
minRating = input(": ")
return int(minRating)