Use tighter loop and query. #2
+10
-27
@@ -198,33 +198,16 @@
|
||||
(defparameter *end-page* 15)
|
||||
|
||||
;;; Collect the specified thread posts into a list
|
||||
(defparameter *collected-posts* nil)
|
||||
|
||||
(loop for page-index from *start-page* to *end-page* do
|
||||
;; Request and parse the separate post HTML for each page
|
||||
(let ((page (concatenate 'string
|
||||
*thread-url*
|
||||
"page-"
|
||||
(write-to-string page-index))))
|
||||
(format t "Downloading: ~a~%" page)
|
||||
(let* ((request (dex:get page))
|
||||
(parsed (lquery:$ (initialize request)))
|
||||
(page-posts (lquery:$ parsed "article")))
|
||||
;; We accidentally pick up the innermost article tags too, so
|
||||
;; we filter them out here. For now, doing this by checking whether
|
||||
;; the article has its data-author attribute and discarding the ones
|
||||
;; that don't.
|
||||
;; TODO: Can I write this better? It's kind of gross doing it this way.
|
||||
;; I suspect there's a better lquery command for page-posts.
|
||||
(setf page-posts
|
||||
(lquery:$ page-posts
|
||||
(filter (lambda (article)
|
||||
(elt (lquery:$ article (attr :data-author))
|
||||
0)))))
|
||||
;; Build the posts and append them to the post list
|
||||
(setf *collected-posts*
|
||||
(append *collected-posts*
|
||||
(map 'list #'build-post page-posts))))))
|
||||
(defvar *collected-posts*
|
||||
(loop
|
||||
for page-index from *start-page* to *end-page*
|
||||
for url = (format nil "~Apage-~D" *thread-url* page-index)
|
||||
for post = (progn (format t "Downloading: ~a~%" url)
|
||||
(dex:get url))
|
||||
for plump = (lquery:$ (initialize post))
|
||||
append (loop
|
||||
for article across (lquery:$ plump "article[data-author]")
|
||||
collect (build-post article))))
|
||||
|
||||
;;; Can print them all to check
|
||||
(mapcar #'print-post-listing *collected-posts*)
|
||||
|
||||
Reference in New Issue
Block a user