social-analyzer/modules/helper.js

const verbose = false
const global_lock = []
const google_api_key = ''
const google_api_cs = ''
const grid_url = ''
const proxy = ''
let tecert_file = ''

const detection_level = {
  extreme: {
    fast: 'normal',
    slow: 'normal,advanced,ocr',
    detections: 'true',
    count: 1,
    found: 2
  },
  high: {
    fast: 'normal',
    slow: 'normal,advanced,ocr',
    detections: 'true,false',
    count: 2,
    found: 1
  },
  current: 'high'
}

const profile_template = {
  found: 0,
  username: '',
  image: '',
  link: '',
  rate: '',
  status: '',
  title: '',
  language: '',
  country: '',
  rank: '',
  text: '',
  type: '',
  metadata: '',
  extracted: '',
  good: '',
  method: ''
}

const detected_websites = {
  normal: 0,
  advanced: 0,
  ocr: 0,
  true: 0,
  false: 0,
  count: 0
}

const header_options = {
  headers: {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0'
  }
}


import https from 'follow-redirects'
import fs from 'fs'
import url from 'url'
import {franc} from 'franc'
import langs from 'langs'
import cheerio from 'cheerio'
import path from 'path'
import slash from 'slash'
import colors from 'colors/safe.js'
import {QBIxora} from 'ixora'
import {fileURLToPath} from 'url';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const sites_json_path = slash(path.join(__dirname, '..', 'data', 'sites.json'))
const names_json_path = slash(path.join(__dirname, '..', 'data', 'names.json'))
const dict_json_path = slash(path.join(__dirname, '..', 'data', 'dict.json'))
const countries_json_path = slash(path.join(__dirname, '..', 'data', 'names.json'))
const public_graph_path = slash(path.join(__dirname, '..', 'public', 'graph.html'))

let temp_ixora = new QBIxora('Social-Analyzer', false)
temp_ixora.save_base_html(public_graph_path)
temp_ixora = null

const websites_entries = JSON.parse(fs.readFileSync(sites_json_path)).websites_entries
const shared_detections = JSON.parse(fs.readFileSync(sites_json_path)).shared_detections
const parsed_names_origins = JSON.parse(fs.readFileSync(names_json_path))
const parsed_json = JSON.parse(fs.readFileSync(dict_json_path))
const parsed_countries = JSON.parse(fs.readFileSync(names_json_path))

let logs_queue = Promise.resolve()

const strings_pages = new RegExp('captcha-info|Please enable cookies|Completing the CAPTCHA', 'i')
const strings_titles = new RegExp('not found|blocked|attention required|cloudflare', 'i')
const top_websites = new RegExp('^top([0-9]+)$', 'i')

function get_log_file (uuid) {
  const _uuid = uuid.replace(/[^a-zA-Z0-9\-]+/g, '')
  const _string = slash(path.join('logs', _uuid + '_log.txt'))
  return _string
}

function log_to_file_queue (uuid, msg, table = false, argv = undefined) {
  logs_queue = logs_queue.then(function () {
    return new Promise(function (resolve) {
      const temp_log_file = slash(path.join('logs', uuid + '_log.txt'))
      fs.appendFile(temp_log_file, msg + '\n', function (err, data) {
        if (table) {
          msg.forEach((item, index) => {
            if (index === 0) {
              console.log('-----------------------')
            }
            for (const [key, value] of Object.entries(item)) {
              if (key === 'extracted' || key === 'metadata') {
                if ((key === 'extracted' && argv.extract) || (key === 'metadata' && argv.metadata)) {
                  if (value !== 'unavailable') {
                    try {
                      value.forEach((metadata_item, i) => {
                        let temp_string_meta = key + ' ' + i
                        temp_string_meta = temp_string_meta.padEnd(13)
                        temp_string_meta = colors.blue(temp_string_meta) + ': '
                        for (const [metadata_key, metadata_value] of Object.entries(metadata_item)) {
                          if (metadata_value.length > 80 && argv.trim) {
                            temp_string_meta += colors.blue(metadata_key) + ' : ' + colors.yellow(metadata_value.substring(0, 80).replace(/\r?\n|\r/g, '') + '..') + ' '
                          } else {
                            temp_string_meta += colors.blue(metadata_key) + ' : ' + colors.yellow(metadata_value.replace(/\r?\n|\r/g, '')) + ' '
                          }
                        };
                        console.log(temp_string_meta)
                      })
                    } catch (err) {

                    }
                  } else {
                    console.log(colors.blue(key.padEnd(12)) + ' : ' + colors.yellow(value))
                  }
                }
              } else {
                console.log(colors.blue(key.padEnd(12)) + ' : ' + colors.yellow(value))
              }
            }
            console.log('-----------------------')
          })
        } else {
          console.log(msg)
        }
        resolve()
      })
    })
  })
}

function get_language_by_parsing (body) {
  let language = 'unavailable'
  try {
    const $ = cheerio.load(body)
    const code = $('html').attr('lang')
    if (code !== '') {
      if (langs.where('1', code) !== 'undefined' && langs.where('1', code)) {
        language = langs.where('1', code).name
      }
    }
  } catch (err) {
    verbose && console.log(err)
  }
  return language
}

function get_language_by_guessing (text) {
  let language = 'unavailable'
  try {
    if (text !== 'unavailable' && text !== '') {
      const code = franc(text)
      if (code !== 'und') {
        if (langs.where('3', code) !== 'undefined' && langs.where('3', code)) {
          language = langs.where('3', code).name + ' (Maybe)'
        }
      }
    }
  } catch (err) {
    verbose && console.log(err)
  }

  return language
}

function get_site_from_url (_url) {
  const temp = url.parse(_url.replace('{username}', 'nothinghere')).hostname
  return temp.replace('nothinghere.', '')
}

async function get_url_wrapper_json (url, time = 2) {
  try {
    const http_promise = new Promise((resolve, reject) => {
      const request = https.https.get(url, header_options, function (res) {
        let body = ''
        res.on('data', function (chunk) {
          body += chunk
        })
        res.on('end', function () {
          resolve({
            data: JSON.parse(body.toString())
          })
        })
      })
      const timeout = (time !== 0) ? time * 1000 : 5000
      request.setTimeout(timeout, function() {
        reject({
          data: ''
        })
      });
      request.on('error', function (e) {
        reject({
          data: ''
        })
      })
      request.on('socket', function (socket) {
        const timeout = (time !== 0) ? time * 1000 : 5000
        socket.setTimeout(timeout, function () {
          request.abort()
        })
      })
    })
    const response_body = await http_promise
    return response_body
  } catch (err) {
    verbose && console.log(err)
  }
}

async function get_url_wrapper_text (url, time = 2) {
  const response_body = 'error-get-url'
  const ret = 500
  try {
    const http_promise = new Promise((resolve, reject) => {
      const request = https.https.get(url, header_options, function (res) {
        let body = ''
        res.on('data', function (chunk) {
          body += chunk
        })
        res.on('end', function () {
          resolve([res.statusCode,body])
        })
      })
      const timeout = (time !== 0) ? time * 1000 : 5000
      request.setTimeout(timeout, function() {
        reject({
          data: ''
        })
      });
      request.on('error', function (e) {
        reject({
          data: ''
        })
      })
      request.on('socket', function (socket) {
        const timeout = (time !== 0) ? time * 1000 : 5000
        socket.setTimeout(timeout, function () {
          request.abort()
        })
      })
    })
    const [ret, response_body] = await http_promise
    return [ret, response_body]
  } catch (err) {
    verbose && console.log(err)
    return [ret, response_body]
  }
}

function compare_objects (object1, object2, key) {
  try {
    if (object1[key] === '') {
      object1[key] = '%0.0'
    }
    if (object2[key] === '') {
      object2[key] = '%0.0'
    }
    if (parseInt(object1[key].replace('%', '')) > parseInt(object2[key].replace('%', ''))) {
      return -1
    } else if (parseInt(object1[key].replace('%', '')) < parseInt(object2[key].replace('%', ''))) {
      return 1
    } else {
      return 0
    }
  } catch (err) {
    return 0
  }
}

function find_country (code) {
  let ctr = ''
  try {
    if (code.toUpperCase() in parsed_countries) {
      ctr = parsed_countries[code.toUpperCase()]
    }
  } catch (error) {

  }
  return ctr
}

async function setup_tecert () {
  if (!fs.existsSync('eng.traineddata')) {
    const file = fs.createWriteStream('eng.traineddata')
    const http_promise = new Promise((resolve, reject) => {
      const request = https.https.get('https://raw.githubusercontent.com/tesseract-ocr/tessdata/master/eng.traineddata', function (response) {
        response.pipe(file)
        resolve(1)
        request.setTimeout(12000, function () {
          request.abort()
        })
        file.on('finish', function () {
          file.close()
        })
        resolve(0)
      })
    })

    const get_eng = await http_promise
    if (get_eng === 1) {
      if (fs.existsSync('eng.traineddata')) {
        tecert_file = path.resolve(__dirname, 'eng.traineddata')
      }
    }
  } else {
    if (tecert_file === '') {
      if (fs.existsSync('eng.traineddata')) {
        tecert_file = path.resolve(__dirname, 'eng.traineddata')
      }
    }
  }
}

export default {
  strings_pages,
  strings_titles,
  top_websites,
  tecert_file,
  setup_tecert,
  compare_objects,
  get_log_file,
  find_country,
  profile_template,
  detection_level,
  detected_websites,
  shared_detections,
  get_language_by_parsing,
  get_language_by_guessing,
  websites_entries,
  parsed_names_origins,
  parsed_json,
  verbose,
  global_lock,
  google_api_key,
  google_api_cs,
  grid_url,
  header_options,
  proxy,
  get_site_from_url,
  log_to_file_queue,
  get_url_wrapper_text,
  get_url_wrapper_json
}