From 899b7ac197d623c437727e3a2cf71f72feb44c1a Mon Sep 17 00:00:00 2001 From: = Date: Tue, 23 Jun 2020 02:41:52 -0500 Subject: [PATCH] added functionality for text-file output --- .gitignore | 3 +- scraper/collectPosts.py | 2 +- .../__pycache__/__init__.cpython-37.pyc | Bin 169 -> 169 bytes .../__pycache__/jsonConvert.cpython-37.pyc | Bin 0 -> 706 bytes .../__pycache__/outputPrep.cpython-37.pyc | Bin 0 -> 2221 bytes .../__pycache__/textConvert.cpython-37.pyc | Bin 0 -> 4174 bytes scraper/createOutput/jsonConvert.py | 7 +- scraper/createOutput/textConvert.py | 140 ++++++++++++++++++ scraper/postData.py | 2 +- scraper/prepOutput.py | 94 +++++++++--- 10 files changed, 223 insertions(+), 25 deletions(-) create mode 100644 scraper/createOutput/__pycache__/jsonConvert.cpython-37.pyc create mode 100644 scraper/createOutput/__pycache__/outputPrep.cpython-37.pyc create mode 100644 scraper/createOutput/__pycache__/textConvert.cpython-37.pyc create mode 100644 scraper/createOutput/textConvert.py diff --git a/.gitignore b/.gitignore index 94b5491..88fab83 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /scraper/__pycache__ -*.json \ No newline at end of file +*.json +*.txt \ No newline at end of file diff --git a/scraper/collectPosts.py b/scraper/collectPosts.py index f42b67c..b38b660 100644 --- a/scraper/collectPosts.py +++ b/scraper/collectPosts.py @@ -77,7 +77,7 @@ def collectPosts(pageData, filter, minRating, pageNum, stopPage = 0, ratingFilte break try: - pageData = requests.get(f"http://kiwifarms{TLD}" + pageData.find("a", class_ = "pageNav-jump--next")['href']) + pageData = requests.get(f"http://kiwifarms.net" + pageData.find("a", class_ = "pageNav-jump--next")['href']) pageData = BeautifulSoup(pageData.text, 'lxml') break except:#this runs when connection can't be made to the next page diff --git a/scraper/createOutput/__pycache__/__init__.cpython-37.pyc b/scraper/createOutput/__pycache__/__init__.cpython-37.pyc index d3e0862276adb0b5e32755df29f79141952c9d49..eaf1af4792dac2667a011ca784e737a0e1ef8ed4 100644 GIT binary patch delta 18 YcmZ3f1*Bo3BL`6gvvnVKJqj6D&!C^@EvJ>Xd?#`}R;wgLg z74+y!_!=c_AXv*dmvae8rSh!8ucbh6yZ06euxbz1DBcWj*liYU=8#8ya1s7Y-}y z_EVP*#uQ%*M`I7wZX;+GfnpI?p!hk9L{m@t8?S9u_N($l8#fksj)bUQNA0890mAH| zV{=fj!{&Sk>7t4S|F->AIQY)zY$E332M6{6n|aILv5!c9#GqMDQhY&;1|y798-pu| z&w||v-tFP^{mWk9WmW6a%VtZeX;J&IUUlVz+GM5cNiVI=kG!l?ckIKXEZ2jy&{0c& zsVn1tGII;LULE$I#)noL8yBf{CO#_kK4k$MJCHj;k_$lbvSW(b%N5HnClO z$3~}4A2hR#iOOm+yYm0lF!pLUq;LwEZDTL0k>(7-s3T$f<9}vl|@wP+J~9e?Rl%y&3DhZ#tc}0oQN8|NGM)u3`Kw zKW3K&#tV3|EoiWTQDnsE3?buN1Dn`-XJG5Cd1hk!#Mo*41AUAgB%D#2@j<|YPztll z1mguf*(Nl^I78Tgfe4%Lj57;c*oL=_8!%B*w9}kVb3S5Z8Y1W}FZq^p?8B2ukEwBM z_}~XGzrI}8$L;s z@hf^wy_1jyQ^LlXeus<%oE$4}#vh#eVtOMS-=_)XzTc7#NzV*atlsG}plS;X#!jux~fsl(%H)~#V( z&3ZMgr`d81Th?r)hOKC}TEkW~>({WpX7_5?J{`XQIUdPLqqQ=w(w9(sLSm zS`qdky@Z(uG>fFs*t=ka2nW6cWi@}KNdnF+9wg-gkl>2$=_F4spO092m0rTcqVbZD zC@Z=LSro)D`+n)DXddQC5JFPh%Q?+B4MqFAlregKBS*E5r<}&6Zk-+-fHN{odyvPk zi`An5dja?Ogm?!KHcJRE+!GQ*9`#;QOd<+_4agM@5(sxMoKSK_WV}Ul@cqH?)ppT? z%uTt>$n*FUasgX$N9iUc=PZckUEFIzFD7yyvoK|(=X|O; ztJ_6u>h2>Kt)R{SeV3#AsMxquuBvMecN((7B|%JlU$lKcPH`Sdd&&2I&V#5V%mJ_1 xm$H$i_D&-MVY7;t$=!Es$5P4=;+F!Gg;6PY#;Ix4E;zg8nqSMxuL|sv`5$Ny7<&K! literal 0 HcmV?d00001 diff --git a/scraper/createOutput/__pycache__/textConvert.cpython-37.pyc b/scraper/createOutput/__pycache__/textConvert.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a6df2b06376aab769aa2bd5f5857fd1192f08ee GIT binary patch literal 4174 zcmai2Ta(m86z*g)7iMQ+xr1^E;B7=eMFm$B7KW9TWkFa9YZx5kq=yNc%aHW!vR#X1 z6~4=}mR0cRqkpGgee%&)Ui6$!GU)_%$W+qjeCPD(OHOxxnTylYbpqEf+j6Px}qu$W97@e)xkC}IeLBvddSox9hTbBWCnec0+!Ui6!?t#ENkSZwPs#f zzP1SA!bCKCfvtu_mt`1K;v&b_4+GD{VJW?u4?+)Gv{UDPs>>Q{4jY&rbj3Oqu~EOT zRGeeXV!eR}9gCn5oRr99foUmt;(pU&j({0U zPtGx#@M<}cJK1CIen?*v!Fr9$9mn_O3*4+13w}I9&g!I=mIot$<;0CJSPfQS=o3aY zPpOOZw`qHkYk5x3Omkp!&&82%_0}hZnj0}EVleBQ$G8%Nq0?WV+~w;-=6J7*w0v!0 zLOY9)audugSalQ3*DM>-%wz*li`AogJuyplE-Xq>GFpqBpk;BsTfz>D#GmO_rmh$6>OY}b2l63PECG%JO zMl7^k*{n~c{0gr-)VV*v^?s`tg!m5DQXoP=S>Ve7vs^P>yErMqDuo8zaa6PZ4{W}q zsfl_pV#5);5z--OH5ej;?ao^2p*_8H5H7$S9e@WVZ4I_<9k=aYNV{a0Yy(QeF2kl= z5mUH#LC!;qtfzrLUbyEkz4zMp_p2C1^2*j1_35^amr>WMZbwFYR@;JH<5+Y)U?YTFZShtzf^+AgW> z25k@2+Y9$TiR@R9Hze|=f*g>@K?OM^k;4jNO5})w9F@p11$j#%Z!5?<5_wla-jhgE zLEe|haRvE6A}18&q(n|B$cGa7NI^c9$Y}-nL?WLm$Y&BcqabG`^0|V{O5_U#IVX`X z733?4oL7(w5}8Yoi&DFkXf3H-PPBQceVu4mq_&V~-$?E1A6Fi04|m)l8yY0y;*ua- zuh)4aKB{5{APhY1_@T)hK-9d19S8&I0H4YBfS3yi0}p$?JHr(sz_%a+Fr~+l*dsjy z#!Eiq0ixQANdm7SBN#Bx<-4-D85@Z0c)0WZnZ3XQ_;ij2@O_zQ{@52&c-$u}fS-=t z(k)}$%}K|-s3tofpNSGyknS(SKt{m#jN{~3fT$*f0LF=i^h!v8C`*Cw%^9Ie!T>}q zm0|wVx>%+RKvZIhzmYTN&VMnbsE`k8oR=s7-&TMM9Z#5mC`+LbZJ|g5__hL67)Htl zL?xCA6OnNNQA=e=2z3A|7ZCMKE4Zm_K-4ph@5n`yd_a_-;5&1MmasS6cMhW;#krBWgDt-wi;M)p}07fzPDpCTzHO~pujiNvb_%xF&Azj1M zAzBJpx z@2aTdSrH36*L~Uwff1P}OvmqaZmhFG;CHS%YYwoQ&;qvCiITDxQpl_8lAUytCXrK| z9IlJ)p0z@eXTa_kd(0uRXuyM%41M33|HyyjqQ(!R5A;dad#O&Zg#K_LJJ|xaWG5{U z)<0ozirCs-vZ3z*@;3lPK+6H;1KPS&Mjog9n*Kf&9@*Tl1QceDl2H6Cu z(9#R7!;O>d5fj?l@_e%-j9^H8QClYPV|#y>7SNP+wXIG>?P_@qrU9j0aKR b%POymE^6oEPJM-c3q&<|7 1: path = "\\".join(path.split("/")) #replace all '/' with '\' if '/' is used if path[-1] == "\\": path = path[:-1] #remove any trailing '\' - print("What would you like to name your JSON file?") + print("What would you like to name your file?") fileName = input(": ") - while os.path.exists(path + "\\" + fileName + ".json"):#if file at path already exists, have user confirm decision + fileName = fileName.split(extension)[0] #remove file extension if user included it + while os.path.exists(path + "\\" + fileName + extension):#if file at path already exists, have user confirm decision print("A file with the same name already exists in the path that you specified. Would you like to replace it?") ui = yesno_() if ui: @@ -51,9 +46,70 @@ def createJson(posts): print("Would you like to select a new directory?") ui = yesno_() if not ui: - fileName = input("Enter a new file name: ") + fileName = input(": ").split(extension)[0] else: - return createJson(posts) + return getDirectory(extension) #restart function if user wants to change directory + + return path, fileName + + + +def createText(posts, toFile): + #creates text file or prints out data + #returns true if file successfully created, false if not + # + #posts = list of PostData objects + #toFile = True if output being saved to file, False if being printed for copy/paste output + + if toFile:#save to text file + path, fileName = getDirectory(".txt")#path to file and name of text file to save + textFileCreate = TextConvert() + + #add posts to textFileCreate + for post in posts: + textFileCreate.unpackPostData(post) + + #create text file + print("\nCreating file...") + if not textFileCreate.exportText(path, fileName): + #if program failed to create file, ask if uer wants to try + # different dierectory, or give up + print(""" +Error: Failed to create file. +What would you like to do? + +(1) try different drectory +(2) try something else + """) + ui = input(": ") + while not ui.isdigit() or not (0 < int(ui) < 3): + print("Error: Input must be a number between 1 and 2. Try again.") + ui = input(": ") + + if int(ui) == 1:#try again with different path + textFileCreate = 0 #clear data to save memory + return createText(posts, toFile) + if int(ui) == 2:#give up and return false + return False + else:#text file successfully created + print("Successful") + print(f"File saved to {path}\\{fileName}.txt") + return True + + + else:#print for copy/pasting + #TODO: add copy/paste functionality + return True + + +def createJson(posts): + #creates json file using JsonConvert class + #returns true if file successfully created, false if not + # + #posts = list of PostData objects + + path, fileName = getDirectory(".json") #path to file and name of JSON file to save + jsonCreate = JsonConvert() #jsonConvert object #add posts to jsonCreate for post in posts: @@ -65,7 +121,7 @@ def createJson(posts): #if program failed to create file, ask if uer wants to try # different dierectory, or give up print(""" -Error:Failed to create file. +Error: Failed to create file. What would you like to do? (1) try different drectory @@ -77,6 +133,7 @@ What would you like to do? ui = input(": ") if int(ui) == 1:#try again with different path + jsonCreate = 0#clear data to save memory return createJson(posts) if int(ui) == 2:#give up and return false return False @@ -102,7 +159,7 @@ How would you like your data outputed? (3) Copy/Paste """) ui = input(": ") - while not ui.isdigit() or not (0 < ui < 4): + while not ui.isdigit() or not (0 < int(ui) < 4): print("Error: Input must be a digit between 1 and 3. Try again.") ui = input(": ") @@ -113,4 +170,5 @@ How would you like your data outputed? else: #copy/paste writeSuccess = createText(posts, False) - if not writeSuccess: outputSelect(posts) #if failed to write to file, restart process \ No newline at end of file + if not writeSuccess: + outputSelect(posts) #if failed to write to file, restart process \ No newline at end of file