diff --git a/index.js b/index.js index 3fd290c..090dd6c 100644 --- a/index.js +++ b/index.js @@ -1,4 +1,5 @@ const axios = require('axios').default +const Database = require('better-sqlite3') const cheerio = require('cheerio') const flatten = require('lodash.flatten') const RSSParser = require('rss-parser') @@ -8,9 +9,13 @@ const ucfirst = require('ucfirst') const luxon = require('luxon') const DateTime = luxon.DateTime -const parser = new RSSParser() +function db(options={}) { + const dbPath = process.env.DSS_DB || './main.db' + return new Database(dbPath, options) +} -async function getFeed(feedURL='https://dailystormer.su/feed/') { +async function fetchFeed(feedURL='https://dailystormer.su/feed/') { + const parser = new RSSParser() const res = await axios.get(feedURL) const feed = await parser.parseString(res.data) return feed @@ -21,8 +26,8 @@ async function getFeed(feedURL='https://dailystormer.su/feed/') { * @param {String} feedURL - URL of the feed * @returns {Object} parsed RSS feed */ -async function getArticlesFromFeed(feedURL='https://dailystormer.su/feed/') { - const feed = await getFeed(feedURL); +async function fetchArticlesFromFeed(feedURL='https://dailystormer.su/feed/') { + const feed = await fetchFeed(feedURL); return feed.items.map((a) => { const slug = url.parse(a.link).pathname const article = { @@ -45,12 +50,13 @@ async function scanCategory(baseURL, category, options={}) { // scan first page of category to find out how many pages we have. const res = await axios.get(categoryURL) const $ = cheerio.load(res.data) - const p = parseInt($('.pagination .pages').text().split(/\s/)[3]) + let p + p = (options.pages) ? options.pages : parseInt($('.pagination .pages').text().split(/\s/)[3]) const pages = [] for (let i = 1; i <= p; i++) { pages.push(i) } - const scans = await Bluebird.map(pages, (n) => { return scanCategoryPage(baseURL, category, n) }, { concurrency: 4 }) + const scans = await Bluebird.map(pages, (n) => { return scanCategoryPage(baseURL, category, n) }, { concurrency: 8 }) const flatScans = flatten(scans) return flatScans } @@ -103,7 +109,7 @@ async function articleExists(db, slug) { } // TODO rename to fetchArticle -async function getArticle(url) { +async function fetchArticle(url) { const res = await axios.get(url) const $ = cheerio.load(res.data) const $article = $('article') @@ -378,15 +384,51 @@ async function countArticlesByCategory(db, name) { // SECTION: Tags +async function getArticlesByTag(db, name, limit, offset) { + const articles = db.prepare(` + SELECT a.slug, a.title, a.author, a.published_date + FROM article a + JOIN article__tag ac ON a.id = ac.article_id + JOIN tag c ON c.id = ac.tag_id + WHERE c.name = @name + ORDER BY published_date DESC + LIMIT @limit + OFFSET @offset + `).all({ name, limit, offset }) + const articlesWithMonth = articles.map((art) => { + const d = DateTime.fromISO(art.published_date).setZone('UTC') + art.month = d.toFormat('MMMM y') + return art + }) + return articlesWithMonth +} + +async function countArticlesByTag(db, name) { + const count = db.prepare(` + SELECT count(*) AS c + FROM article a + JOIN article__tag at ON a.id = at.article_id + JOIN tag c ON c.id = at.tag_id + WHERE c.name = @name + `).get({ name }) + return count.c +} + +async function countTags(db) { + const count = db.prepare(`SELECT count(*) c FROM tag`).get() + return count.c +} + module.exports = { - getFeed, - getArticlesFromFeed, + db, + fetchFeed, + fetchArticlesFromFeed, scanCategory, scanCategoryPage, unhyphen, extractTaxonomyFromArticle, articleExists, - getArticle, + fetchArticle, getCategory, getTag, assocCategory,