diff --git a/.gitignore b/.gitignore index 94b5491..88fab83 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /scraper/__pycache__ -*.json \ No newline at end of file +*.json +*.txt \ No newline at end of file diff --git a/scraper/collectPosts.py b/scraper/collectPosts.py index f42b67c..b38b660 100644 --- a/scraper/collectPosts.py +++ b/scraper/collectPosts.py @@ -77,7 +77,7 @@ def collectPosts(pageData, filter, minRating, pageNum, stopPage = 0, ratingFilte break try: - pageData = requests.get(f"http://kiwifarms{TLD}" + pageData.find("a", class_ = "pageNav-jump--next")['href']) + pageData = requests.get(f"http://kiwifarms.net" + pageData.find("a", class_ = "pageNav-jump--next")['href']) pageData = BeautifulSoup(pageData.text, 'lxml') break except:#this runs when connection can't be made to the next page diff --git a/scraper/createOutput/__pycache__/__init__.cpython-37.pyc b/scraper/createOutput/__pycache__/__init__.cpython-37.pyc index d3e0862..eaf1af4 100644 Binary files a/scraper/createOutput/__pycache__/__init__.cpython-37.pyc and b/scraper/createOutput/__pycache__/__init__.cpython-37.pyc differ diff --git a/scraper/createOutput/__pycache__/jsonConvert.cpython-37.pyc b/scraper/createOutput/__pycache__/jsonConvert.cpython-37.pyc new file mode 100644 index 0000000..390d8ea Binary files /dev/null and b/scraper/createOutput/__pycache__/jsonConvert.cpython-37.pyc differ diff --git a/scraper/createOutput/__pycache__/outputPrep.cpython-37.pyc b/scraper/createOutput/__pycache__/outputPrep.cpython-37.pyc new file mode 100644 index 0000000..c314583 Binary files /dev/null and b/scraper/createOutput/__pycache__/outputPrep.cpython-37.pyc differ diff --git a/scraper/createOutput/__pycache__/textConvert.cpython-37.pyc b/scraper/createOutput/__pycache__/textConvert.cpython-37.pyc new file mode 100644 index 0000000..6a6df2b Binary files /dev/null and b/scraper/createOutput/__pycache__/textConvert.cpython-37.pyc differ diff --git a/scraper/createOutput/jsonConvert.py b/scraper/createOutput/jsonConvert.py index d62ad70..ddae053 100644 --- a/scraper/createOutput/jsonConvert.py +++ b/scraper/createOutput/jsonConvert.py @@ -1,7 +1,5 @@ #class used for creating json files for output to user -#user will be able to choose what data goes into the jaon file - from createOutput import outputPrep """ @@ -43,6 +41,7 @@ post # | | |- Semper Fidelis | | |- Devient | | |- Achievement +| | |- DRINK! | |- positive | |- neutral | |- negative @@ -64,8 +63,8 @@ class JsonConvert(outputPrep.outputPrep): try: with open(path + "\\" + fileName + ".json", "w") as export: json.dump(self.postData, export) - return True + return True #no errors except: print(self.postData) - return False + return False #unable to write to/create file \ No newline at end of file diff --git a/scraper/createOutput/textConvert.py b/scraper/createOutput/textConvert.py new file mode 100644 index 0000000..0c6d50d --- /dev/null +++ b/scraper/createOutput/textConvert.py @@ -0,0 +1,140 @@ +#class for creating text files for output to user + +""" +text template: + +# [post number]: + Link: [link] + + Metadata: + User: [username] + + Date: [post date] + + Edit Date: [date of edit] + + Content: + Raw HTML: [raw HTML code] + + Raw Text: [raw text from post] + + Formated Text: [formated text with media/stuff] + + Attatchments: [links to attatchments] + + Ratings: + Specific: + Like: [# recieved] + Dislike: [# recieved] + Agree: [# recieved] + Disagree: [# recieved] + Winner: [# recieved] + Informative: [# recieved] + Thunk-Provoking: [# recieved] + Feels: [# recieved] + Islamic Content: [# recieved] + Lunacy: [# recieved] + Autistic: [# recieved] + Horrifying: [# recieved] + Optimistic: [# recieved] + TMI: [# recieved] + Late: [# recieved] + Dumb: [# recieved] + Mad at the Internet: [# recieved] + Semper Fidelis: [# recieved] + Devient: [# recieved] + Achievement: [# recieved] + DRINK!: [# recieved] + + Positive: [# positive ratings] + + Negative: [# negative ratings] + + Neutral: [# neutral ratings] + + Weighted Score: [weighted score] + + Total Ratings: [# overall ratings] +""" + +from createOutput import outputPrep +from postData import PostData +import json + +class TextConvert(outputPrep.outputPrep): + def makeTextForOutput(self): + #returns contents of postData dictionary in text form + + textContents = "" #text to be returned + + for postNum, postContents in self.postData.items(): + textContents = f"""{textContents} + +{postNum}: + Link: {postContents["link"]} + + Metadata: + User: {postContents["metadata"]["user"]} + + Date: {postContents["metadata"]["date"]} + + Edit Date: {postContents["metadata"]["edit date"]} + + Content: + Raw HTML: {postContents["content"]["raw html"]} + + Raw Text: {postContents["content"]["raw text"]} + + Formated Text: {postContents["content"]["formated text"]} + + Attatchments: {postContents["content"]["attachments"]} + + Ratings: + Specific: + Like: {postContents["ratings"]["specific ratings"]["Like"]} + Dislike: {postContents["ratings"]["specific ratings"]["Dislike"]} + Agree: {postContents["ratings"]["specific ratings"]["Agree"]} + Disagree: {postContents["ratings"]["specific ratings"]["Disagree"]} + Winner: {postContents["ratings"]["specific ratings"]["Winner"]} + Informative: {postContents["ratings"]["specific ratings"]["Informative"]} + Thunk-Provoking: {postContents["ratings"]["specific ratings"]["Thunk-Provoking"]} + Feels: {postContents["ratings"]["specific ratings"]["Feels"]} + Islamic Content: {postContents["ratings"]["specific ratings"]["Islamic Content"]} + Lunacy: {postContents["ratings"]["specific ratings"]["Lunacy"]} + Autistic: {postContents["ratings"]["specific ratings"]["Autistic"]} + Horrifying: {postContents["ratings"]["specific ratings"]["Horrifying"]} + Optimistic: {postContents["ratings"]["specific ratings"]["Optimistic"]} + TMI: {postContents["ratings"]["specific ratings"]["TMI"]} + Late: {postContents["ratings"]["specific ratings"]["Late"]} + Dumb: {postContents["ratings"]["specific ratings"]["Dumb"]} + Mad at the Internet: {postContents["ratings"]["specific ratings"]["Mad at the Internet"]} + Semper Fidelis: {postContents["ratings"]["specific ratings"]["Semper Fidelis"]} + Deviant: {postContents["ratings"]["specific ratings"]["Deviant"]} + Achievement: {postContents["ratings"]["specific ratings"]["Achievement"]} + DRINK!: {postContents["ratings"]["specific ratings"]["DRINK!"]} + + Positive: {postContents["ratings"]["positive ratings"]} + + Negative: {postContents["ratings"]["negative ratings"]} + + Neutral: {postContents["ratings"]["neutral ratings"]} + + Weighted Score: {postContents["ratings"]["weighted score"]} + + Total Ratings: {postContents["ratings"]["total ratings"]} + """ + return textContents + + def exportText(self, path, fileName): + #creates text file using makeTextForOutput and saves it at path at fileName + #returns true if success, false otherwise + # + #path = file locatino to save text file to + #fileName = name of text file + + try: + with open(path + "\\" + fileName + ".txt", "w") as export: + export.write(self.makeTextForOutput()) + return True #no errors + except: + return False #unable to write to/create file \ No newline at end of file diff --git a/scraper/postData.py b/scraper/postData.py index 090e731..2e35b5a 100644 --- a/scraper/postData.py +++ b/scraper/postData.py @@ -167,7 +167,7 @@ class PostData: #get metadata self.postLink = f"http://kiwifarms.net" + infoBS.find("a", class_ = "u-concealed")["href"] - self.postNum = int("".join(re.split("\n|\t", infoBS.find("ul", class_ = "message-attribution-opposite--list").find_all("li")[-1].find("a").text)).split("#")[1]) + self.postNum = int("".join(re.split("\n|\t", infoBS.find("ul", class_ = "message-attribution-opposite--list").find_all("li")[-1].find("a").text)).split("#")[1].replace(',','')) try: self.poster = infoBS.find("span", class_ = "username").text except AttributeError:#if user is a guest, username data is stored in a different tag diff --git a/scraper/prepOutput.py b/scraper/prepOutput.py index debaa08..6abcbfd 100644 --- a/scraper/prepOutput.py +++ b/scraper/prepOutput.py @@ -1,6 +1,7 @@ #collection of functions used for creating and implementing output from createOutput.jsonConvert import JsonConvert +from createOutput.textConvert import TextConvert from postData import PostData import os @@ -17,20 +18,13 @@ def yesno_(): return False -def createText(posts, toFile): - #creates text file or copies - return True - - -def createJson(posts): - #creates json file using JsonConvert class - #returns true if file successfully created, false if not +def getDirectory(extension): + #asks for a file directory and name to save with and returns that data # - #posts = list of PostData objects + #extension = string containing file extension to save file as path = "" #path to output file - fileName = "" #name of json file - jsonCreate = JsonConvert() #jsonConvert object + fileName = "" #name of file print("Please enter directory to save output to.") #collect user's desired save directory path = input(": ") @@ -40,9 +34,10 @@ def createJson(posts): if len(path.split("/")) > 1: path = "\\".join(path.split("/")) #replace all '/' with '\' if '/' is used if path[-1] == "\\": path = path[:-1] #remove any trailing '\' - print("What would you like to name your JSON file?") + print("What would you like to name your file?") fileName = input(": ") - while os.path.exists(path + "\\" + fileName + ".json"):#if file at path already exists, have user confirm decision + fileName = fileName.split(extension)[0] #remove file extension if user included it + while os.path.exists(path + "\\" + fileName + extension):#if file at path already exists, have user confirm decision print("A file with the same name already exists in the path that you specified. Would you like to replace it?") ui = yesno_() if ui: @@ -51,9 +46,70 @@ def createJson(posts): print("Would you like to select a new directory?") ui = yesno_() if not ui: - fileName = input("Enter a new file name: ") + fileName = input(": ").split(extension)[0] else: - return createJson(posts) + return getDirectory(extension) #restart function if user wants to change directory + + return path, fileName + + + +def createText(posts, toFile): + #creates text file or prints out data + #returns true if file successfully created, false if not + # + #posts = list of PostData objects + #toFile = True if output being saved to file, False if being printed for copy/paste output + + if toFile:#save to text file + path, fileName = getDirectory(".txt")#path to file and name of text file to save + textFileCreate = TextConvert() + + #add posts to textFileCreate + for post in posts: + textFileCreate.unpackPostData(post) + + #create text file + print("\nCreating file...") + if not textFileCreate.exportText(path, fileName): + #if program failed to create file, ask if uer wants to try + # different dierectory, or give up + print(""" +Error: Failed to create file. +What would you like to do? + +(1) try different drectory +(2) try something else + """) + ui = input(": ") + while not ui.isdigit() or not (0 < int(ui) < 3): + print("Error: Input must be a number between 1 and 2. Try again.") + ui = input(": ") + + if int(ui) == 1:#try again with different path + textFileCreate = 0 #clear data to save memory + return createText(posts, toFile) + if int(ui) == 2:#give up and return false + return False + else:#text file successfully created + print("Successful") + print(f"File saved to {path}\\{fileName}.txt") + return True + + + else:#print for copy/pasting + #TODO: add copy/paste functionality + return True + + +def createJson(posts): + #creates json file using JsonConvert class + #returns true if file successfully created, false if not + # + #posts = list of PostData objects + + path, fileName = getDirectory(".json") #path to file and name of JSON file to save + jsonCreate = JsonConvert() #jsonConvert object #add posts to jsonCreate for post in posts: @@ -65,7 +121,7 @@ def createJson(posts): #if program failed to create file, ask if uer wants to try # different dierectory, or give up print(""" -Error:Failed to create file. +Error: Failed to create file. What would you like to do? (1) try different drectory @@ -77,6 +133,7 @@ What would you like to do? ui = input(": ") if int(ui) == 1:#try again with different path + jsonCreate = 0#clear data to save memory return createJson(posts) if int(ui) == 2:#give up and return false return False @@ -102,7 +159,7 @@ How would you like your data outputed? (3) Copy/Paste """) ui = input(": ") - while not ui.isdigit() or not (0 < ui < 4): + while not ui.isdigit() or not (0 < int(ui) < 4): print("Error: Input must be a digit between 1 and 3. Try again.") ui = input(": ") @@ -113,4 +170,5 @@ How would you like your data outputed? else: #copy/paste writeSuccess = createText(posts, False) - if not writeSuccess: outputSelect(posts) #if failed to write to file, restart process \ No newline at end of file + if not writeSuccess: + outputSelect(posts) #if failed to write to file, restart process \ No newline at end of file