diff --git a/src/cl-kiwi.lisp b/src/cl-kiwi.lisp index 8bf4c74..5593cad 100644 --- a/src/cl-kiwi.lisp +++ b/src/cl-kiwi.lisp @@ -198,33 +198,16 @@ (defparameter *end-page* 15) ;;; Collect the specified thread posts into a list -(defparameter *collected-posts* nil) - -(loop for page-index from *start-page* to *end-page* do - ;; Request and parse the separate post HTML for each page - (let ((page (concatenate 'string - *thread-url* - "page-" - (write-to-string page-index)))) - (format t "Downloading: ~a~%" page) - (let* ((request (dex:get page)) - (parsed (lquery:$ (initialize request))) - (page-posts (lquery:$ parsed "article"))) - ;; We accidentally pick up the innermost article tags too, so - ;; we filter them out here. For now, doing this by checking whether - ;; the article has its data-author attribute and discarding the ones - ;; that don't. - ;; TODO: Can I write this better? It's kind of gross doing it this way. - ;; I suspect there's a better lquery command for page-posts. - (setf page-posts - (lquery:$ page-posts - (filter (lambda (article) - (elt (lquery:$ article (attr :data-author)) - 0))))) - ;; Build the posts and append them to the post list - (setf *collected-posts* - (append *collected-posts* - (map 'list #'build-post page-posts)))))) +(defvar *collected-posts* + (loop + for page-index from *start-page* to *end-page* + for url = (format nil "~Apage-~D" *thread-url* page-index) + for post = (progn (format t "Downloading: ~a~%" url) + (dex:get url)) + for plump = (lquery:$ (initialize post)) + append (loop + for article across (lquery:$ plump "article[data-author]") + collect (build-post article)))) ;;; Can print them all to check (mapcar #'print-post-listing *collected-posts*)