Use tighter loop and query. #2

Merged
Yotsubaaa merged 1 commits from cecograph/cl-kiwi:master into master 2020-01-09 23:14:05 +00:00
+10 -27
View File
@@ -198,33 +198,16 @@
(defparameter *end-page* 15)
;;; Collect the specified thread posts into a list
(defparameter *collected-posts* nil)
(loop for page-index from *start-page* to *end-page* do
;; Request and parse the separate post HTML for each page
(let ((page (concatenate 'string
*thread-url*
"page-"
(write-to-string page-index))))
(format t "Downloading: ~a~%" page)
(let* ((request (dex:get page))
(parsed (lquery:$ (initialize request)))
(page-posts (lquery:$ parsed "article")))
;; We accidentally pick up the innermost article tags too, so
;; we filter them out here. For now, doing this by checking whether
;; the article has its data-author attribute and discarding the ones
;; that don't.
;; TODO: Can I write this better? It's kind of gross doing it this way.
;; I suspect there's a better lquery command for page-posts.
(setf page-posts
(lquery:$ page-posts
(filter (lambda (article)
(elt (lquery:$ article (attr :data-author))
0)))))
;; Build the posts and append them to the post list
(setf *collected-posts*
(append *collected-posts*
(map 'list #'build-post page-posts))))))
(defvar *collected-posts*
(loop
for page-index from *start-page* to *end-page*
for url = (format nil "~Apage-~D" *thread-url* page-index)
for post = (progn (format t "Downloading: ~a~%" url)
(dex:get url))
for plump = (lquery:$ (initialize post))
append (loop
for article across (lquery:$ plump "article[data-author]")
collect (build-post article))))
;;; Can print them all to check
(mapcar #'print-post-listing *collected-posts*)