#!/usr/bin/env node // scan a category (or tag) for articles require('dotenv').config() const config = require('../config') const program = require('commander') const Database = require('better-sqlite3') const Bluebird = require('bluebird') const ds = require('../index') const db = ds.db() const baseURL = `https://${config.domain}` async function main() { program.option('-t, --taxonomy ', `"section" or "tag"`, 'section') program.option('-s, --skip ', `skip leading items`, 0) program.option('-k, --keep ', `keep leading items`, 0) program.option('-p, --pages

', `scrape this many listing pages`, 0) program.parse(process.argv) //console.log(program) const category = program.args[0] if (!category) { console.warn('category required') process.exit(1) } let options = {} if (program.pages) { options.pages = parseInt(program.pages) } const taxonomyBaseURL = `${baseURL}/${program.taxonomy}` const partialArticles = await ds.scanCategory(taxonomyBaseURL, category, options) let ps = partialArticles // partialArticles.slice(0, 3) //console.log(ps) //console.log('skip', program.skip) //console.log('ps.length', ps.length) if (program.skip) { let skip = parseInt(program.skip) ps = partialArticles.slice(skip) } if (program.keep) { let keep = parseInt(program.keep) ps = partialArticles.slice(0, keep) } //console.log('ps.length', ps.length) //ps.forEach((art) => console.log(art.title)) //process.exit(0) Bluebird.each(ps, (async (art, i, length) => { const exists = await ds.articleExists(db, art.slug) if (exists) { console.log(`= [${i+1}/${length}] "${art.title}"`) return } const more = await ds.fetchArticle(art.link) const article = Object.assign({}, art, more) const res = await ds.insertArticle(db, article) if (res.success) { console.log(`+ [${i+1}/${length}] "${art.title}"`) } else { console.log(`= [${i+1}/${length}] "${art.title}"`) } return })) } main()