forked from perception/dss
64 lines
2.0 KiB
JavaScript
Executable File
64 lines
2.0 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
// scan a category (or tag) for articles
|
|
require('dotenv').config()
|
|
const program = require('commander')
|
|
const Database = require('better-sqlite3')
|
|
const Bluebird = require('bluebird')
|
|
const ds = require('../index')
|
|
|
|
const db = ds.db()
|
|
const baseURL = 'https://dailystormer.su'
|
|
|
|
async function main() {
|
|
program.option('-t, --taxonomy <TYPE>', `"section" or "tag"`, 'section')
|
|
program.option('-s, --skip <ITEMS>', `skip leading items`, 0)
|
|
program.option('-k, --keep <ITEMS>', `keep leading items`, 0)
|
|
program.option('-p, --pages <P>', `scrape this many listing pages`, 0)
|
|
program.parse(process.argv)
|
|
//console.log(program)
|
|
const category = program.args[0]
|
|
if (!category) {
|
|
console.warn('category required')
|
|
process.exit(1)
|
|
}
|
|
let options = {}
|
|
if (program.pages) {
|
|
options.pages = parseInt(program.pages)
|
|
}
|
|
const taxonomyBaseURL = `${baseURL}/${program.taxonomy}`
|
|
const partialArticles = await ds.scanCategory(taxonomyBaseURL, category, options)
|
|
let ps = partialArticles // partialArticles.slice(0, 3)
|
|
//console.log(ps)
|
|
//console.log('skip', program.skip)
|
|
//console.log('ps.length', ps.length)
|
|
if (program.skip) {
|
|
let skip = parseInt(program.skip)
|
|
ps = partialArticles.slice(skip)
|
|
}
|
|
if (program.keep) {
|
|
let keep = parseInt(program.keep)
|
|
ps = partialArticles.slice(0, keep)
|
|
}
|
|
//console.log('ps.length', ps.length)
|
|
//ps.forEach((art) => console.log(art.title))
|
|
//process.exit(0)
|
|
Bluebird.each(ps, (async (art, i, length) => {
|
|
const exists = await ds.articleExists(db, art.slug)
|
|
if (exists) {
|
|
console.log(`= [${i+1}/${length}] "${art.title}"`)
|
|
return
|
|
}
|
|
const more = await ds.fetchArticle(art.link)
|
|
const article = Object.assign({}, art, more)
|
|
const res = await ds.insertArticle(db, article)
|
|
if (res.success) {
|
|
console.log(`+ [${i+1}/${length}] "${art.title}"`)
|
|
} else {
|
|
console.log(`= [${i+1}/${length}] "${art.title}"`)
|
|
}
|
|
return
|
|
}))
|
|
}
|
|
|
|
main()
|