mirror of
https://github.com/qeeqbox/social-analyzer.git
synced 2026-05-29 17:08:26 +00:00
[special request] added dynamic language detection - ref ef25c868
This commit is contained in:
@@ -12,6 +12,7 @@ This project *is currently used by some law enforcement agencies in countries wh
|
||||
**All Pull Requests are welcomed!**
|
||||
|
||||
## Updates
|
||||
- Added dynamic language detections for profiles
|
||||
- Added --websites "all" for python & NodeJS CLI (You can scan ALL the websites within 2~10 seconds) 👏👏👏
|
||||
- Added a new refactored version
|
||||
|
||||
@@ -55,7 +56,7 @@ Profile images **will not** be blurred. If you want them to be blurred, turn tha
|
||||
## Special Detections
|
||||
- Facebook (Phone number, name or profile name)
|
||||
- Gmail (example@gmail.com)
|
||||
- Google (example@example.com)
|
||||
- Google (example@example.com)
|
||||
|
||||
## Install and run as web app (NodeJS + NPM + Firefox)
|
||||
```bash
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
{"version":"2021.V.2.11",
|
||||
{"version":"2021.V.2.12",
|
||||
"build":"pass",
|
||||
"test":"pass",
|
||||
"grid_test":"pass",
|
||||
"websites":"342",
|
||||
"detections":"939",
|
||||
"special":"3",
|
||||
"full_scan":"2-10s",
|
||||
"full_scan":"2-8s",
|
||||
"awaiting_verification":"22",
|
||||
"auto_testing":"7734d352-8446-4cda-99f1-73b5a263ac31"}
|
||||
"auto_testing":"e1c95116-4579-4922-bc33-25e56308cd4c"}
|
||||
|
||||
+9
-1
@@ -1,7 +1,7 @@
|
||||
var helper = require("./helper.js")
|
||||
var async = require("async");
|
||||
var sanitizeHtml = require("sanitize-html");
|
||||
const {
|
||||
var {
|
||||
htmlToText
|
||||
} = require('html-to-text');
|
||||
var cheerio = require('cheerio');
|
||||
@@ -35,12 +35,14 @@ async function find_username_site(uuid, username, options, site) {
|
||||
var source = body;
|
||||
var text_only = "unavailable";
|
||||
var title = "unavailable";
|
||||
var language = "unavailable"
|
||||
var temp_profile = {
|
||||
"found": 0,
|
||||
"image": "",
|
||||
"link": "",
|
||||
"rate": "",
|
||||
"title": "",
|
||||
"language": "",
|
||||
"text": "",
|
||||
"type": ""
|
||||
};
|
||||
@@ -79,7 +81,13 @@ async function find_username_site(uuid, username, options, site) {
|
||||
helper.verbose && console.log(err);
|
||||
}
|
||||
|
||||
language = helper.get_language_by_parsing(body)
|
||||
if (language == "unavailable"){
|
||||
language = helper.get_language_by_guessing(temp_profile.text)
|
||||
}
|
||||
|
||||
temp_profile.title = title;
|
||||
temp_profile.language = language;
|
||||
temp_profile.rate = "%" + ((temp_profile["found"] / detections_count) * 100).toFixed(2);
|
||||
temp_profile.link = site.url.replace("{username}", username);
|
||||
temp_profile.type = site.type
|
||||
|
||||
@@ -13,6 +13,9 @@ var header_options = {
|
||||
var https = require("follow-redirects").https;
|
||||
var fs = require("fs");
|
||||
var url = require("url");
|
||||
var franc = require('franc');
|
||||
var langs = require('langs');
|
||||
var cheerio = require('cheerio');
|
||||
|
||||
var parsed_sites = JSON.parse(fs.readFileSync("sites.json"));
|
||||
var logs_queue = Promise.resolve();
|
||||
@@ -28,6 +31,36 @@ function log_to_file_queue(uuid, msg) {
|
||||
});
|
||||
}
|
||||
|
||||
function get_language_by_parsing(body) {
|
||||
var language = "unavailable"
|
||||
try {
|
||||
var $ = cheerio.load(body);
|
||||
var code = $("html").attr("lang")
|
||||
if (code != "") {
|
||||
language = langs.where("1", code).name
|
||||
}
|
||||
} catch (err) {
|
||||
helper.verbose && console.log(err);
|
||||
}
|
||||
return language
|
||||
}
|
||||
|
||||
function get_language_by_guessing(text) {
|
||||
var language = "unavailable"
|
||||
try {
|
||||
if (text != "unavailable" && text != "") {
|
||||
var code = franc(text);
|
||||
if (code !== 'und') {
|
||||
language = langs.where("3", code).name + " (Maybe)"
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
helper.verbose && console.log(err);
|
||||
}
|
||||
|
||||
return language
|
||||
}
|
||||
|
||||
function get_site_from_url(_url) {
|
||||
temp = url.parse(_url.replace("{username}", "nothinghere")).hostname
|
||||
return temp.replace("nothinghere.", "")
|
||||
@@ -98,6 +131,8 @@ async function get_url_wrapper_text(url, time = 5) {
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
get_language_by_parsing,
|
||||
get_language_by_guessing,
|
||||
parsed_sites,
|
||||
verbose,
|
||||
google_api_key,
|
||||
|
||||
@@ -74,6 +74,7 @@ async function find_username_site(uuid, username, options, site) {
|
||||
|
||||
var source = "";
|
||||
var data = "";
|
||||
var language = "unavailable"
|
||||
var text_only = "unavailable";
|
||||
var title = "unavailable";
|
||||
var temp_profile = {
|
||||
@@ -82,6 +83,7 @@ async function find_username_site(uuid, username, options, site) {
|
||||
"link": "",
|
||||
"rate": "",
|
||||
"title": "",
|
||||
"language": "",
|
||||
"text": "",
|
||||
"type": ""
|
||||
};
|
||||
@@ -155,8 +157,15 @@ async function find_username_site(uuid, username, options, site) {
|
||||
}));
|
||||
}
|
||||
if (temp_profile.found > 0 || temp_profile.image != "") {
|
||||
|
||||
language = helper.get_language_by_parsing(source)
|
||||
if (language == "unavailable"){
|
||||
language = helper.get_language_by_guessing(text_only)
|
||||
}
|
||||
|
||||
temp_profile.text = sanitizeHtml(text_only);
|
||||
temp_profile.title = sanitizeHtml(title);
|
||||
temp_profile.language = language
|
||||
temp_profile.rate = "%" + ((temp_profile.found / site.detections.length) * 100).toFixed(2);
|
||||
temp_profile.link = site.url.replace("{username}", username);
|
||||
temp_profile.type = site.type
|
||||
|
||||
@@ -66,6 +66,7 @@ async function find_username_site_special_facebook_1(uuid, username, site) {
|
||||
"link": "",
|
||||
"rate": "",
|
||||
"title": "",
|
||||
"language": "",
|
||||
"text": "",
|
||||
"type": ""
|
||||
};
|
||||
@@ -132,6 +133,7 @@ async function find_username_site_special_gmail_1(uuid, username, site) {
|
||||
"link": "",
|
||||
"rate": "",
|
||||
"title": "",
|
||||
"language": "",
|
||||
"text": "",
|
||||
"type": ""
|
||||
};
|
||||
@@ -198,6 +200,7 @@ async function find_username_site_special_google_1(uuid, username, site) {
|
||||
"link": "",
|
||||
"rate": "",
|
||||
"title": "",
|
||||
"language": "",
|
||||
"text": "",
|
||||
"type": ""
|
||||
};
|
||||
|
||||
+4
-2
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "social-analyzer",
|
||||
"version": "2.0.10",
|
||||
"version": "2.0.12",
|
||||
"description": "Advanced Social Media Profiles Finder and String Analysis Tool",
|
||||
"main": "app.js",
|
||||
"scripts": {
|
||||
@@ -29,7 +29,9 @@
|
||||
"tmp": "0.2.1",
|
||||
"wink-tokenizer": "1.1.0",
|
||||
"wordsninja": "1.0.0",
|
||||
"yargs": "16.2.0"
|
||||
"yargs": "16.2.0",
|
||||
"franc": "5.0.0",
|
||||
"langs": "2.0.0"
|
||||
},
|
||||
"author": "QeeqBox",
|
||||
"license": "AGPL-3.0",
|
||||
|
||||
+3
-3
@@ -964,7 +964,7 @@
|
||||
temp_image = '<img src=' + site.image + '>'
|
||||
}
|
||||
temp_tr += '<div class="user-info-container"><div class="user-info-container-left"><div>Link: <a href="' + site.link + '">' + site.link + '</a></div><div>Rate: ' + site.rate + '</div><div>Title: ' + site.title +
|
||||
'</div><div>Site Type: ' + site.type + '</div><div>Text: ' + site.text + '</div></div><div class="user-info-container-right">' + temp_image + '</div></div>'
|
||||
'</div><div>Language: ' + site.language + '</div><div>Description: ' + site.type + '</div><div>Text: ' + site.text + '</div></div><div class="user-info-container-right">' + temp_image + '</div></div>'
|
||||
console.log(temp_tr)
|
||||
}
|
||||
});
|
||||
@@ -997,7 +997,7 @@
|
||||
temp_image = '<img src=' + site.image + '>'
|
||||
}
|
||||
temp_tr += '<div class="user-info-container"><div class="user-info-container-left"><div>Link: <a href="' + site.link + '">' + site.link + '</a></div><div>Rate: ' + site.rate + '</div><div>Title: ' + site.title +
|
||||
'</div><div>Site Type: ' + site.type + '</div><div>Text: ' + site.text + '</div></div><div class="user-info-container-right">' + temp_image + '</div></div>'
|
||||
'</div><div>Language: ' + site.language + '</div><div>Description: ' + site.type + '</div><div>Text: ' + site.text + '</div></div><div class="user-info-container-right">' + temp_image + '</div></div>'
|
||||
console.log(temp_tr)
|
||||
}
|
||||
});
|
||||
@@ -1012,7 +1012,7 @@
|
||||
if (site.found > 0) {
|
||||
temp_image = ""
|
||||
temp_tr += '<div class="user-info-container"><div class="user-info-container-left"><div>Link: <a href="' + site.link + '">' + site.link + '</a></div><div>Rate: ' + site.rate + '</div><div>Title: ' + site.title +
|
||||
'</div><div>Site Type: ' + site.type + '</div><div>Text: ' + site.text + '</div></div><div class="user-info-container-right">' + temp_image + '</div></div>'
|
||||
'</div><div>Description: ' + site.type + '</div><div>Text: ' + site.text + '</div></div><div class="user-info-container-right">' + temp_image + '</div></div>'
|
||||
console.log(temp_tr)
|
||||
}
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user