mirror of
https://github.com/qeeqbox/social-analyzer.git
synced 2026-05-26 23:04:16 +00:00
288 lines
8.4 KiB
JavaScript
Executable File
288 lines
8.4 KiB
JavaScript
Executable File
const helper = require('./helper.js')
|
|
const {
|
|
findWord
|
|
} = require('most-common-words-by-language')
|
|
|
|
const WordsNinjaPack = require('wordsninja')
|
|
const WordsNinja = new WordsNinjaPack()
|
|
|
|
function most_common1 (all_words, temp_words) {
|
|
const temp_list = []
|
|
Object.keys(all_words).forEach(function (key) {
|
|
all_words[key].forEach(function (item) {
|
|
if (!temp_list.includes(item) && item.length > 1) {
|
|
temp_list.push(item)
|
|
const temp = findWord(item)
|
|
if (Object.keys(temp).length !== 0) {
|
|
const languages = Object.keys(temp).map(function (key) {
|
|
return [key, temp[key]]
|
|
})
|
|
languages.sort(function (first, second) {
|
|
return second[1] - first[1]
|
|
}).reverse()
|
|
temp_words.push({
|
|
word: item,
|
|
languages: languages.map(e => e.join(':')).join(' ')
|
|
})
|
|
}
|
|
}
|
|
})
|
|
})
|
|
}
|
|
|
|
async function most_common (all_words, temp_words) {
|
|
const temp_list = []
|
|
Object.keys(all_words).forEach(function (key) {
|
|
all_words[key].forEach(function (item) {
|
|
if (!temp_list.includes(item) && item.length > 1) {
|
|
temp_list.push(item)
|
|
const temp = findWord(item)
|
|
if (Object.keys(temp).length !== 0) {
|
|
const languages = Object.keys(temp).map(function (key) {
|
|
return [key, temp[key]]
|
|
})
|
|
languages.sort(function (first, second) {
|
|
return second[1] - first[1]
|
|
}).reverse()
|
|
temp_words.push({
|
|
word: item,
|
|
languages: languages.map(e => e[0]).join(', ')
|
|
})
|
|
}
|
|
}
|
|
})
|
|
})
|
|
}
|
|
|
|
function find_other (req, all_words) {
|
|
const words = WordsNinja.splitSentence(req.body.string)
|
|
|
|
words.forEach(function (word) {
|
|
let value = false
|
|
Object.keys(all_words).forEach(function (key) {
|
|
if (all_words[key].includes(word)) {
|
|
value = true
|
|
}
|
|
})
|
|
|
|
if (!value && !all_words.maybe.includes(word)) {
|
|
all_words.maybe.push(word)
|
|
}
|
|
})
|
|
}
|
|
|
|
function remove_word (str, sub_string) {
|
|
const part1 = str.substring(0, str.indexOf(sub_string))
|
|
const part2 = str.substring(str.indexOf(sub_string) + sub_string.length, str.length)
|
|
const temp = (part1 + part2).replace(/[ \[\]:"\\|,.<>\/?~`!@#$%^&*()_+\-={};"]/gi, '')
|
|
return temp
|
|
}
|
|
|
|
async function analyze_string (req, all_words) {
|
|
helper.log_to_file_queue(req.body.uuid, '[Starting] String analysis')
|
|
let temp_rr_names = []
|
|
const string_to_check = req.body.string
|
|
helper.parsed_json.prefix.forEach(function (item, index) {
|
|
if (string_to_check.indexOf(item) === 0 && !all_words.prefix.includes(item)) {
|
|
all_words.prefix.push(item)
|
|
const temp = remove_word(string_to_check, item)
|
|
if (temp !== null && temp !== '' && !all_words.unknown.includes(temp) && !all_words.maybe.includes(temp) && temp.length > 1) {
|
|
all_words.unknown.push(temp)
|
|
}
|
|
}
|
|
})
|
|
helper.parsed_json.m_names.forEach(function (item, index) {
|
|
if (string_to_check.indexOf(item) >= 0 && !all_words.name.includes(item)) {
|
|
all_words.name.push(item)
|
|
const temp = remove_word(string_to_check, item)
|
|
if (temp !== null && temp !== '' && !all_words.unknown.includes(temp) && !all_words.maybe.includes(temp) && temp.length > 1) {
|
|
all_words.unknown.push(temp)
|
|
}
|
|
}
|
|
})
|
|
helper.parsed_json.f_names.forEach(function (item, index) {
|
|
if (string_to_check.indexOf(item) >= 0 && !all_words.name.includes(item)) {
|
|
all_words.name.push(item)
|
|
const temp = remove_word(string_to_check, item)
|
|
if (temp !== null && temp !== '' && !all_words.unknown.includes(temp) && !all_words.maybe.includes(temp) && temp.length > 1) {
|
|
all_words.unknown.push(temp)
|
|
}
|
|
}
|
|
})
|
|
|
|
all_words.prefix.forEach(function (h_item, index) {
|
|
all_words.unknown.forEach(function (r_item, index) {
|
|
if (r_item.indexOf(h_item) === 0) {
|
|
const temp = remove_word(r_item, h_item)
|
|
if (temp !== null && temp !== '' && !temp_rr_names.includes(temp) && !all_words.maybe.includes(temp) && temp.length > 1) {
|
|
temp_rr_names.push(temp)
|
|
}
|
|
}
|
|
})
|
|
})
|
|
|
|
let temp_r_concat = all_words.unknown.concat(temp_rr_names.filter((item) => all_words.unknown.indexOf(item) < 0))
|
|
|
|
all_words.unknown = temp_r_concat
|
|
temp_rr_names = []
|
|
|
|
all_words.number.forEach(function (n_item, index) {
|
|
all_words.unknown.forEach(function (r_item, index) {
|
|
if (r_item.indexOf(n_item) >= 0) {
|
|
const temp = remove_word(r_item, n_item)
|
|
if (temp !== null && temp !== '' && !temp_rr_names.includes(temp) && !all_words.maybe.includes(temp) && temp.length > 1) {
|
|
temp_rr_names.push(temp)
|
|
}
|
|
}
|
|
})
|
|
})
|
|
|
|
temp_r_concat = all_words.unknown.concat(temp_rr_names.filter((item) => all_words.unknown.indexOf(item) < 0))
|
|
all_words.unknown = temp_r_concat
|
|
helper.log_to_file_queue(req.body.uuid, '[Done] String analysis')
|
|
}
|
|
|
|
async function split_comma (req, all_words) {
|
|
try {
|
|
req.body.string.split(',').forEach((item) => {
|
|
if (item.length > 1 && !all_words.unknown.includes(item) && !all_words.maybe.includes(item)) {
|
|
all_words.unknown.push(item.toLowerCase())
|
|
}
|
|
})
|
|
} catch (err) {}
|
|
}
|
|
|
|
async function split_upper_case (req, all_words) {
|
|
try {
|
|
req.body.string.match(/[A-Z][a-z]+/g).forEach((item) => {
|
|
if (item.length > 1 && !all_words.unknown.includes(item) && !all_words.maybe.includes(item)) {
|
|
all_words.unknown.push(item.toLowerCase())
|
|
}
|
|
})
|
|
} catch (err) {}
|
|
}
|
|
|
|
async function split_alphabet_case (req, all_words) {
|
|
try {
|
|
req.body.string.match(/[A-Za-z]+/g).forEach((item) => {
|
|
if (item.length > 1 && !all_words.unknown.includes(item) && !all_words.maybe.includes(item)) {
|
|
all_words.unknown.push(item.toLowerCase())
|
|
}
|
|
})
|
|
} catch (err) {}
|
|
}
|
|
|
|
async function find_symbols (req, all_words) {
|
|
try {
|
|
req.body.string.match(/[ \[\]:"\\|,.<>\/?~`!@#$%^&*()_+\-={};']/gi).forEach((item) => {
|
|
if (item !== ' ' && !all_words.symbol.includes(item)) {
|
|
all_words.symbol.push(item)
|
|
}
|
|
})
|
|
} catch (err) {}
|
|
}
|
|
|
|
async function find_numbers (req, all_words) {
|
|
try {
|
|
req.body.string.match(/(\d+)/g).forEach((item) => {
|
|
if (!all_words.number.includes(item)) {
|
|
all_words.number.push(item)
|
|
}
|
|
})
|
|
} catch (err) {}
|
|
}
|
|
|
|
async function convert_numbers (req) {
|
|
try {
|
|
const numbers_to_letters = {
|
|
4: 'a',
|
|
8: 'b',
|
|
3: 'e',
|
|
1: 'l',
|
|
0: 'o',
|
|
5: 's',
|
|
7: 't',
|
|
2: 'z'
|
|
}
|
|
|
|
let temp_value = ''
|
|
for (let i = 0; i < req.body.string.length; i++) {
|
|
const _temp = numbers_to_letters[req.body.string.charAt(i)]
|
|
if (_temp !== undefined) {
|
|
temp_value += numbers_to_letters[req.body.string.charAt(i)]
|
|
} else {
|
|
temp_value += req.body.string.charAt(i)
|
|
}
|
|
}
|
|
req.body.string = temp_value
|
|
} catch (err) {}
|
|
}
|
|
|
|
async function get_maybe_words (req, all_words) {
|
|
await WordsNinja.loadDictionary()
|
|
all_words.maybe = await WordsNinja.splitSentence(req.body.string).filter(function (elem, index, self) {
|
|
return index === self.indexOf(elem)
|
|
}).filter(word => word.length > 1)
|
|
}
|
|
|
|
async function guess_age_from_string(req) {
|
|
let results = []
|
|
|
|
try {
|
|
let age_4_numbers = /\d{4}|\d{2}/g
|
|
let current_year = new Date().getFullYear()
|
|
while ((match = age_4_numbers.exec(req.body.string)) != null) {
|
|
let temp_dict = {"found":"","year":"","age":""}
|
|
let found = 0
|
|
let year = 0
|
|
let age = 0
|
|
temp_dict.found = match[0]
|
|
found = parseInt(match[0])
|
|
if (found >= 50 && found <= 99){
|
|
year = found + 1900
|
|
age = current_year - year
|
|
if (age <= 75){
|
|
temp_dict.year = year.toString()
|
|
temp_dict.age = age.toString()
|
|
}
|
|
}
|
|
if (found >= 14 && found <= 49){
|
|
year = current_year - found
|
|
age = found
|
|
if (age <= 75){
|
|
temp_dict.year = year.toString()
|
|
temp_dict.age = age.toString()
|
|
}
|
|
}
|
|
if (found >= 1950){
|
|
year = found
|
|
age = current_year - year
|
|
if (age <= 75){
|
|
temp_dict.year = year.toString()
|
|
temp_dict.age = age.toString()
|
|
}
|
|
}
|
|
|
|
results.push(temp_dict)
|
|
}
|
|
|
|
} catch (err) {
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
module.exports = {
|
|
get_maybe_words,
|
|
find_symbols,
|
|
find_numbers,
|
|
convert_numbers,
|
|
split_comma,
|
|
split_upper_case,
|
|
split_alphabet_case,
|
|
most_common,
|
|
find_other,
|
|
analyze_string,
|
|
guess_age_from_string
|
|
}
|