added functionality for text-file output

This commit is contained in:
=
2020-06-23 02:41:52 -05:00
parent 40e9c2f7b0
commit 899b7ac197
10 changed files with 223 additions and 25 deletions
+2 -1
View File
@@ -1,2 +1,3 @@
/scraper/__pycache__
*.json
*.json
*.txt
+1 -1
View File
@@ -77,7 +77,7 @@ def collectPosts(pageData, filter, minRating, pageNum, stopPage = 0, ratingFilte
break
try:
pageData = requests.get(f"http://kiwifarms{TLD}" + pageData.find("a", class_ = "pageNav-jump--next")['href'])
pageData = requests.get(f"http://kiwifarms.net" + pageData.find("a", class_ = "pageNav-jump--next")['href'])
pageData = BeautifulSoup(pageData.text, 'lxml')
break
except:#this runs when connection can't be made to the next page
+3 -4
View File
@@ -1,7 +1,5 @@
#class used for creating json files for output to user
#user will be able to choose what data goes into the jaon file
from createOutput import outputPrep
"""
@@ -43,6 +41,7 @@ post #
| | |- Semper Fidelis
| | |- Devient
| | |- Achievement
| | |- DRINK!
| |- positive
| |- neutral
| |- negative
@@ -64,8 +63,8 @@ class JsonConvert(outputPrep.outputPrep):
try:
with open(path + "\\" + fileName + ".json", "w") as export:
json.dump(self.postData, export)
return True
return True #no errors
except:
print(self.postData)
return False
return False #unable to write to/create file
+140
View File
@@ -0,0 +1,140 @@
#class for creating text files for output to user
"""
text template:
# [post number]:
Link: [link]
Metadata:
User: [username]
Date: [post date]
Edit Date: [date of edit]
Content:
Raw HTML: [raw HTML code]
Raw Text: [raw text from post]
Formated Text: [formated text with media/stuff]
Attatchments: [links to attatchments]
Ratings:
Specific:
Like: [# recieved]
Dislike: [# recieved]
Agree: [# recieved]
Disagree: [# recieved]
Winner: [# recieved]
Informative: [# recieved]
Thunk-Provoking: [# recieved]
Feels: [# recieved]
Islamic Content: [# recieved]
Lunacy: [# recieved]
Autistic: [# recieved]
Horrifying: [# recieved]
Optimistic: [# recieved]
TMI: [# recieved]
Late: [# recieved]
Dumb: [# recieved]
Mad at the Internet: [# recieved]
Semper Fidelis: [# recieved]
Devient: [# recieved]
Achievement: [# recieved]
DRINK!: [# recieved]
Positive: [# positive ratings]
Negative: [# negative ratings]
Neutral: [# neutral ratings]
Weighted Score: [weighted score]
Total Ratings: [# overall ratings]
"""
from createOutput import outputPrep
from postData import PostData
import json
class TextConvert(outputPrep.outputPrep):
def makeTextForOutput(self):
#returns contents of postData dictionary in text form
textContents = "" #text to be returned
for postNum, postContents in self.postData.items():
textContents = f"""{textContents}
{postNum}:
Link: {postContents["link"]}
Metadata:
User: {postContents["metadata"]["user"]}
Date: {postContents["metadata"]["date"]}
Edit Date: {postContents["metadata"]["edit date"]}
Content:
Raw HTML: {postContents["content"]["raw html"]}
Raw Text: {postContents["content"]["raw text"]}
Formated Text: {postContents["content"]["formated text"]}
Attatchments: {postContents["content"]["attachments"]}
Ratings:
Specific:
Like: {postContents["ratings"]["specific ratings"]["Like"]}
Dislike: {postContents["ratings"]["specific ratings"]["Dislike"]}
Agree: {postContents["ratings"]["specific ratings"]["Agree"]}
Disagree: {postContents["ratings"]["specific ratings"]["Disagree"]}
Winner: {postContents["ratings"]["specific ratings"]["Winner"]}
Informative: {postContents["ratings"]["specific ratings"]["Informative"]}
Thunk-Provoking: {postContents["ratings"]["specific ratings"]["Thunk-Provoking"]}
Feels: {postContents["ratings"]["specific ratings"]["Feels"]}
Islamic Content: {postContents["ratings"]["specific ratings"]["Islamic Content"]}
Lunacy: {postContents["ratings"]["specific ratings"]["Lunacy"]}
Autistic: {postContents["ratings"]["specific ratings"]["Autistic"]}
Horrifying: {postContents["ratings"]["specific ratings"]["Horrifying"]}
Optimistic: {postContents["ratings"]["specific ratings"]["Optimistic"]}
TMI: {postContents["ratings"]["specific ratings"]["TMI"]}
Late: {postContents["ratings"]["specific ratings"]["Late"]}
Dumb: {postContents["ratings"]["specific ratings"]["Dumb"]}
Mad at the Internet: {postContents["ratings"]["specific ratings"]["Mad at the Internet"]}
Semper Fidelis: {postContents["ratings"]["specific ratings"]["Semper Fidelis"]}
Deviant: {postContents["ratings"]["specific ratings"]["Deviant"]}
Achievement: {postContents["ratings"]["specific ratings"]["Achievement"]}
DRINK!: {postContents["ratings"]["specific ratings"]["DRINK!"]}
Positive: {postContents["ratings"]["positive ratings"]}
Negative: {postContents["ratings"]["negative ratings"]}
Neutral: {postContents["ratings"]["neutral ratings"]}
Weighted Score: {postContents["ratings"]["weighted score"]}
Total Ratings: {postContents["ratings"]["total ratings"]}
"""
return textContents
def exportText(self, path, fileName):
#creates text file using makeTextForOutput and saves it at path at fileName
#returns true if success, false otherwise
#
#path = file locatino to save text file to
#fileName = name of text file
try:
with open(path + "\\" + fileName + ".txt", "w") as export:
export.write(self.makeTextForOutput())
return True #no errors
except:
return False #unable to write to/create file
+1 -1
View File
@@ -167,7 +167,7 @@ class PostData:
#get metadata
self.postLink = f"http://kiwifarms.net" + infoBS.find("a", class_ = "u-concealed")["href"]
self.postNum = int("".join(re.split("\n|\t", infoBS.find("ul", class_ = "message-attribution-opposite--list").find_all("li")[-1].find("a").text)).split("#")[1])
self.postNum = int("".join(re.split("\n|\t", infoBS.find("ul", class_ = "message-attribution-opposite--list").find_all("li")[-1].find("a").text)).split("#")[1].replace(',',''))
try:
self.poster = infoBS.find("span", class_ = "username").text
except AttributeError:#if user is a guest, username data is stored in a different tag
+76 -18
View File
@@ -1,6 +1,7 @@
#collection of functions used for creating and implementing output
from createOutput.jsonConvert import JsonConvert
from createOutput.textConvert import TextConvert
from postData import PostData
import os
@@ -17,20 +18,13 @@ def yesno_():
return False
def createText(posts, toFile):
#creates text file or copies
return True
def createJson(posts):
#creates json file using JsonConvert class
#returns true if file successfully created, false if not
def getDirectory(extension):
#asks for a file directory and name to save with and returns that data
#
#posts = list of PostData objects
#extension = string containing file extension to save file as
path = "" #path to output file
fileName = "" #name of json file
jsonCreate = JsonConvert() #jsonConvert object
fileName = "" #name of file
print("Please enter directory to save output to.") #collect user's desired save directory
path = input(": ")
@@ -40,9 +34,10 @@ def createJson(posts):
if len(path.split("/")) > 1: path = "\\".join(path.split("/")) #replace all '/' with '\' if '/' is used
if path[-1] == "\\": path = path[:-1] #remove any trailing '\'
print("What would you like to name your JSON file?")
print("What would you like to name your file?")
fileName = input(": ")
while os.path.exists(path + "\\" + fileName + ".json"):#if file at path already exists, have user confirm decision
fileName = fileName.split(extension)[0] #remove file extension if user included it
while os.path.exists(path + "\\" + fileName + extension):#if file at path already exists, have user confirm decision
print("A file with the same name already exists in the path that you specified. Would you like to replace it?")
ui = yesno_()
if ui:
@@ -51,9 +46,70 @@ def createJson(posts):
print("Would you like to select a new directory?")
ui = yesno_()
if not ui:
fileName = input("Enter a new file name: ")
fileName = input(": ").split(extension)[0]
else:
return createJson(posts)
return getDirectory(extension) #restart function if user wants to change directory
return path, fileName
def createText(posts, toFile):
#creates text file or prints out data
#returns true if file successfully created, false if not
#
#posts = list of PostData objects
#toFile = True if output being saved to file, False if being printed for copy/paste output
if toFile:#save to text file
path, fileName = getDirectory(".txt")#path to file and name of text file to save
textFileCreate = TextConvert()
#add posts to textFileCreate
for post in posts:
textFileCreate.unpackPostData(post)
#create text file
print("\nCreating file...")
if not textFileCreate.exportText(path, fileName):
#if program failed to create file, ask if uer wants to try
# different dierectory, or give up
print("""
Error: Failed to create file.
What would you like to do?
(1) try different drectory
(2) try something else
""")
ui = input(": ")
while not ui.isdigit() or not (0 < int(ui) < 3):
print("Error: Input must be a number between 1 and 2. Try again.")
ui = input(": ")
if int(ui) == 1:#try again with different path
textFileCreate = 0 #clear data to save memory
return createText(posts, toFile)
if int(ui) == 2:#give up and return false
return False
else:#text file successfully created
print("Successful")
print(f"File saved to {path}\\{fileName}.txt")
return True
else:#print for copy/pasting
#TODO: add copy/paste functionality
return True
def createJson(posts):
#creates json file using JsonConvert class
#returns true if file successfully created, false if not
#
#posts = list of PostData objects
path, fileName = getDirectory(".json") #path to file and name of JSON file to save
jsonCreate = JsonConvert() #jsonConvert object
#add posts to jsonCreate
for post in posts:
@@ -65,7 +121,7 @@ def createJson(posts):
#if program failed to create file, ask if uer wants to try
# different dierectory, or give up
print("""
Error:Failed to create file.
Error: Failed to create file.
What would you like to do?
(1) try different drectory
@@ -77,6 +133,7 @@ What would you like to do?
ui = input(": ")
if int(ui) == 1:#try again with different path
jsonCreate = 0#clear data to save memory
return createJson(posts)
if int(ui) == 2:#give up and return false
return False
@@ -102,7 +159,7 @@ How would you like your data outputed?
(3) Copy/Paste
""")
ui = input(": ")
while not ui.isdigit() or not (0 < ui < 4):
while not ui.isdigit() or not (0 < int(ui) < 4):
print("Error: Input must be a digit between 1 and 3. Try again.")
ui = input(": ")
@@ -113,4 +170,5 @@ How would you like your data outputed?
else: #copy/paste
writeSuccess = createText(posts, False)
if not writeSuccess: outputSelect(posts) #if failed to write to file, restart process
if not writeSuccess:
outputSelect(posts) #if failed to write to file, restart process