added functionality for text-file output
This commit is contained in:
+2
-1
@@ -1,2 +1,3 @@
|
||||
/scraper/__pycache__
|
||||
*.json
|
||||
*.json
|
||||
*.txt
|
||||
@@ -77,7 +77,7 @@ def collectPosts(pageData, filter, minRating, pageNum, stopPage = 0, ratingFilte
|
||||
break
|
||||
|
||||
try:
|
||||
pageData = requests.get(f"http://kiwifarms{TLD}" + pageData.find("a", class_ = "pageNav-jump--next")['href'])
|
||||
pageData = requests.get(f"http://kiwifarms.net" + pageData.find("a", class_ = "pageNav-jump--next")['href'])
|
||||
pageData = BeautifulSoup(pageData.text, 'lxml')
|
||||
break
|
||||
except:#this runs when connection can't be made to the next page
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,7 +1,5 @@
|
||||
#class used for creating json files for output to user
|
||||
|
||||
#user will be able to choose what data goes into the jaon file
|
||||
|
||||
from createOutput import outputPrep
|
||||
|
||||
"""
|
||||
@@ -43,6 +41,7 @@ post #
|
||||
| | |- Semper Fidelis
|
||||
| | |- Devient
|
||||
| | |- Achievement
|
||||
| | |- DRINK!
|
||||
| |- positive
|
||||
| |- neutral
|
||||
| |- negative
|
||||
@@ -64,8 +63,8 @@ class JsonConvert(outputPrep.outputPrep):
|
||||
try:
|
||||
with open(path + "\\" + fileName + ".json", "w") as export:
|
||||
json.dump(self.postData, export)
|
||||
return True
|
||||
return True #no errors
|
||||
except:
|
||||
print(self.postData)
|
||||
return False
|
||||
return False #unable to write to/create file
|
||||
|
||||
@@ -0,0 +1,140 @@
|
||||
#class for creating text files for output to user
|
||||
|
||||
"""
|
||||
text template:
|
||||
|
||||
# [post number]:
|
||||
Link: [link]
|
||||
|
||||
Metadata:
|
||||
User: [username]
|
||||
|
||||
Date: [post date]
|
||||
|
||||
Edit Date: [date of edit]
|
||||
|
||||
Content:
|
||||
Raw HTML: [raw HTML code]
|
||||
|
||||
Raw Text: [raw text from post]
|
||||
|
||||
Formated Text: [formated text with media/stuff]
|
||||
|
||||
Attatchments: [links to attatchments]
|
||||
|
||||
Ratings:
|
||||
Specific:
|
||||
Like: [# recieved]
|
||||
Dislike: [# recieved]
|
||||
Agree: [# recieved]
|
||||
Disagree: [# recieved]
|
||||
Winner: [# recieved]
|
||||
Informative: [# recieved]
|
||||
Thunk-Provoking: [# recieved]
|
||||
Feels: [# recieved]
|
||||
Islamic Content: [# recieved]
|
||||
Lunacy: [# recieved]
|
||||
Autistic: [# recieved]
|
||||
Horrifying: [# recieved]
|
||||
Optimistic: [# recieved]
|
||||
TMI: [# recieved]
|
||||
Late: [# recieved]
|
||||
Dumb: [# recieved]
|
||||
Mad at the Internet: [# recieved]
|
||||
Semper Fidelis: [# recieved]
|
||||
Devient: [# recieved]
|
||||
Achievement: [# recieved]
|
||||
DRINK!: [# recieved]
|
||||
|
||||
Positive: [# positive ratings]
|
||||
|
||||
Negative: [# negative ratings]
|
||||
|
||||
Neutral: [# neutral ratings]
|
||||
|
||||
Weighted Score: [weighted score]
|
||||
|
||||
Total Ratings: [# overall ratings]
|
||||
"""
|
||||
|
||||
from createOutput import outputPrep
|
||||
from postData import PostData
|
||||
import json
|
||||
|
||||
class TextConvert(outputPrep.outputPrep):
|
||||
def makeTextForOutput(self):
|
||||
#returns contents of postData dictionary in text form
|
||||
|
||||
textContents = "" #text to be returned
|
||||
|
||||
for postNum, postContents in self.postData.items():
|
||||
textContents = f"""{textContents}
|
||||
|
||||
{postNum}:
|
||||
Link: {postContents["link"]}
|
||||
|
||||
Metadata:
|
||||
User: {postContents["metadata"]["user"]}
|
||||
|
||||
Date: {postContents["metadata"]["date"]}
|
||||
|
||||
Edit Date: {postContents["metadata"]["edit date"]}
|
||||
|
||||
Content:
|
||||
Raw HTML: {postContents["content"]["raw html"]}
|
||||
|
||||
Raw Text: {postContents["content"]["raw text"]}
|
||||
|
||||
Formated Text: {postContents["content"]["formated text"]}
|
||||
|
||||
Attatchments: {postContents["content"]["attachments"]}
|
||||
|
||||
Ratings:
|
||||
Specific:
|
||||
Like: {postContents["ratings"]["specific ratings"]["Like"]}
|
||||
Dislike: {postContents["ratings"]["specific ratings"]["Dislike"]}
|
||||
Agree: {postContents["ratings"]["specific ratings"]["Agree"]}
|
||||
Disagree: {postContents["ratings"]["specific ratings"]["Disagree"]}
|
||||
Winner: {postContents["ratings"]["specific ratings"]["Winner"]}
|
||||
Informative: {postContents["ratings"]["specific ratings"]["Informative"]}
|
||||
Thunk-Provoking: {postContents["ratings"]["specific ratings"]["Thunk-Provoking"]}
|
||||
Feels: {postContents["ratings"]["specific ratings"]["Feels"]}
|
||||
Islamic Content: {postContents["ratings"]["specific ratings"]["Islamic Content"]}
|
||||
Lunacy: {postContents["ratings"]["specific ratings"]["Lunacy"]}
|
||||
Autistic: {postContents["ratings"]["specific ratings"]["Autistic"]}
|
||||
Horrifying: {postContents["ratings"]["specific ratings"]["Horrifying"]}
|
||||
Optimistic: {postContents["ratings"]["specific ratings"]["Optimistic"]}
|
||||
TMI: {postContents["ratings"]["specific ratings"]["TMI"]}
|
||||
Late: {postContents["ratings"]["specific ratings"]["Late"]}
|
||||
Dumb: {postContents["ratings"]["specific ratings"]["Dumb"]}
|
||||
Mad at the Internet: {postContents["ratings"]["specific ratings"]["Mad at the Internet"]}
|
||||
Semper Fidelis: {postContents["ratings"]["specific ratings"]["Semper Fidelis"]}
|
||||
Deviant: {postContents["ratings"]["specific ratings"]["Deviant"]}
|
||||
Achievement: {postContents["ratings"]["specific ratings"]["Achievement"]}
|
||||
DRINK!: {postContents["ratings"]["specific ratings"]["DRINK!"]}
|
||||
|
||||
Positive: {postContents["ratings"]["positive ratings"]}
|
||||
|
||||
Negative: {postContents["ratings"]["negative ratings"]}
|
||||
|
||||
Neutral: {postContents["ratings"]["neutral ratings"]}
|
||||
|
||||
Weighted Score: {postContents["ratings"]["weighted score"]}
|
||||
|
||||
Total Ratings: {postContents["ratings"]["total ratings"]}
|
||||
"""
|
||||
return textContents
|
||||
|
||||
def exportText(self, path, fileName):
|
||||
#creates text file using makeTextForOutput and saves it at path at fileName
|
||||
#returns true if success, false otherwise
|
||||
#
|
||||
#path = file locatino to save text file to
|
||||
#fileName = name of text file
|
||||
|
||||
try:
|
||||
with open(path + "\\" + fileName + ".txt", "w") as export:
|
||||
export.write(self.makeTextForOutput())
|
||||
return True #no errors
|
||||
except:
|
||||
return False #unable to write to/create file
|
||||
+1
-1
@@ -167,7 +167,7 @@ class PostData:
|
||||
|
||||
#get metadata
|
||||
self.postLink = f"http://kiwifarms.net" + infoBS.find("a", class_ = "u-concealed")["href"]
|
||||
self.postNum = int("".join(re.split("\n|\t", infoBS.find("ul", class_ = "message-attribution-opposite--list").find_all("li")[-1].find("a").text)).split("#")[1])
|
||||
self.postNum = int("".join(re.split("\n|\t", infoBS.find("ul", class_ = "message-attribution-opposite--list").find_all("li")[-1].find("a").text)).split("#")[1].replace(',',''))
|
||||
try:
|
||||
self.poster = infoBS.find("span", class_ = "username").text
|
||||
except AttributeError:#if user is a guest, username data is stored in a different tag
|
||||
|
||||
+76
-18
@@ -1,6 +1,7 @@
|
||||
#collection of functions used for creating and implementing output
|
||||
|
||||
from createOutput.jsonConvert import JsonConvert
|
||||
from createOutput.textConvert import TextConvert
|
||||
from postData import PostData
|
||||
import os
|
||||
|
||||
@@ -17,20 +18,13 @@ def yesno_():
|
||||
return False
|
||||
|
||||
|
||||
def createText(posts, toFile):
|
||||
#creates text file or copies
|
||||
return True
|
||||
|
||||
|
||||
def createJson(posts):
|
||||
#creates json file using JsonConvert class
|
||||
#returns true if file successfully created, false if not
|
||||
def getDirectory(extension):
|
||||
#asks for a file directory and name to save with and returns that data
|
||||
#
|
||||
#posts = list of PostData objects
|
||||
#extension = string containing file extension to save file as
|
||||
|
||||
path = "" #path to output file
|
||||
fileName = "" #name of json file
|
||||
jsonCreate = JsonConvert() #jsonConvert object
|
||||
fileName = "" #name of file
|
||||
|
||||
print("Please enter directory to save output to.") #collect user's desired save directory
|
||||
path = input(": ")
|
||||
@@ -40,9 +34,10 @@ def createJson(posts):
|
||||
if len(path.split("/")) > 1: path = "\\".join(path.split("/")) #replace all '/' with '\' if '/' is used
|
||||
if path[-1] == "\\": path = path[:-1] #remove any trailing '\'
|
||||
|
||||
print("What would you like to name your JSON file?")
|
||||
print("What would you like to name your file?")
|
||||
fileName = input(": ")
|
||||
while os.path.exists(path + "\\" + fileName + ".json"):#if file at path already exists, have user confirm decision
|
||||
fileName = fileName.split(extension)[0] #remove file extension if user included it
|
||||
while os.path.exists(path + "\\" + fileName + extension):#if file at path already exists, have user confirm decision
|
||||
print("A file with the same name already exists in the path that you specified. Would you like to replace it?")
|
||||
ui = yesno_()
|
||||
if ui:
|
||||
@@ -51,9 +46,70 @@ def createJson(posts):
|
||||
print("Would you like to select a new directory?")
|
||||
ui = yesno_()
|
||||
if not ui:
|
||||
fileName = input("Enter a new file name: ")
|
||||
fileName = input(": ").split(extension)[0]
|
||||
else:
|
||||
return createJson(posts)
|
||||
return getDirectory(extension) #restart function if user wants to change directory
|
||||
|
||||
return path, fileName
|
||||
|
||||
|
||||
|
||||
def createText(posts, toFile):
|
||||
#creates text file or prints out data
|
||||
#returns true if file successfully created, false if not
|
||||
#
|
||||
#posts = list of PostData objects
|
||||
#toFile = True if output being saved to file, False if being printed for copy/paste output
|
||||
|
||||
if toFile:#save to text file
|
||||
path, fileName = getDirectory(".txt")#path to file and name of text file to save
|
||||
textFileCreate = TextConvert()
|
||||
|
||||
#add posts to textFileCreate
|
||||
for post in posts:
|
||||
textFileCreate.unpackPostData(post)
|
||||
|
||||
#create text file
|
||||
print("\nCreating file...")
|
||||
if not textFileCreate.exportText(path, fileName):
|
||||
#if program failed to create file, ask if uer wants to try
|
||||
# different dierectory, or give up
|
||||
print("""
|
||||
Error: Failed to create file.
|
||||
What would you like to do?
|
||||
|
||||
(1) try different drectory
|
||||
(2) try something else
|
||||
""")
|
||||
ui = input(": ")
|
||||
while not ui.isdigit() or not (0 < int(ui) < 3):
|
||||
print("Error: Input must be a number between 1 and 2. Try again.")
|
||||
ui = input(": ")
|
||||
|
||||
if int(ui) == 1:#try again with different path
|
||||
textFileCreate = 0 #clear data to save memory
|
||||
return createText(posts, toFile)
|
||||
if int(ui) == 2:#give up and return false
|
||||
return False
|
||||
else:#text file successfully created
|
||||
print("Successful")
|
||||
print(f"File saved to {path}\\{fileName}.txt")
|
||||
return True
|
||||
|
||||
|
||||
else:#print for copy/pasting
|
||||
#TODO: add copy/paste functionality
|
||||
return True
|
||||
|
||||
|
||||
def createJson(posts):
|
||||
#creates json file using JsonConvert class
|
||||
#returns true if file successfully created, false if not
|
||||
#
|
||||
#posts = list of PostData objects
|
||||
|
||||
path, fileName = getDirectory(".json") #path to file and name of JSON file to save
|
||||
jsonCreate = JsonConvert() #jsonConvert object
|
||||
|
||||
#add posts to jsonCreate
|
||||
for post in posts:
|
||||
@@ -65,7 +121,7 @@ def createJson(posts):
|
||||
#if program failed to create file, ask if uer wants to try
|
||||
# different dierectory, or give up
|
||||
print("""
|
||||
Error:Failed to create file.
|
||||
Error: Failed to create file.
|
||||
What would you like to do?
|
||||
|
||||
(1) try different drectory
|
||||
@@ -77,6 +133,7 @@ What would you like to do?
|
||||
ui = input(": ")
|
||||
|
||||
if int(ui) == 1:#try again with different path
|
||||
jsonCreate = 0#clear data to save memory
|
||||
return createJson(posts)
|
||||
if int(ui) == 2:#give up and return false
|
||||
return False
|
||||
@@ -102,7 +159,7 @@ How would you like your data outputed?
|
||||
(3) Copy/Paste
|
||||
""")
|
||||
ui = input(": ")
|
||||
while not ui.isdigit() or not (0 < ui < 4):
|
||||
while not ui.isdigit() or not (0 < int(ui) < 4):
|
||||
print("Error: Input must be a digit between 1 and 3. Try again.")
|
||||
ui = input(": ")
|
||||
|
||||
@@ -113,4 +170,5 @@ How would you like your data outputed?
|
||||
else: #copy/paste
|
||||
writeSuccess = createText(posts, False)
|
||||
|
||||
if not writeSuccess: outputSelect(posts) #if failed to write to file, restart process
|
||||
if not writeSuccess:
|
||||
outputSelect(posts) #if failed to write to file, restart process
|
||||
Reference in New Issue
Block a user