[special request] added dynamic language detection - ref ef25c868

This commit is contained in:
G
2021-01-11 22:58:40 -08:00
committed by GitHub
parent 160276bdf6
commit 538454a197
8 changed files with 68 additions and 10 deletions
+2 -1
View File
@@ -12,6 +12,7 @@ This project *is currently used by some law enforcement agencies in countries wh
**All Pull Requests are welcomed!**
## Updates
- Added dynamic language detections for profiles
- Added --websites "all" for python & NodeJS CLI (You can scan ALL the websites within 2~10 seconds) 👏👏👏
- Added a new refactored version
@@ -55,7 +56,7 @@ Profile images **will not** be blurred. If you want them to be blurred, turn tha
## Special Detections
- Facebook (Phone number, name or profile name)
- Gmail (example@gmail.com)
- Google (example@example.com)
- Google (example@example.com)
## Install and run as web app (NodeJS + NPM + Firefox)
```bash
+3 -3
View File
@@ -1,10 +1,10 @@
{"version":"2021.V.2.11",
{"version":"2021.V.2.12",
"build":"pass",
"test":"pass",
"grid_test":"pass",
"websites":"342",
"detections":"939",
"special":"3",
"full_scan":"2-10s",
"full_scan":"2-8s",
"awaiting_verification":"22",
"auto_testing":"7734d352-8446-4cda-99f1-73b5a263ac31"}
"auto_testing":"e1c95116-4579-4922-bc33-25e56308cd4c"}
+9 -1
View File
@@ -1,7 +1,7 @@
var helper = require("./helper.js")
var async = require("async");
var sanitizeHtml = require("sanitize-html");
const {
var {
htmlToText
} = require('html-to-text');
var cheerio = require('cheerio');
@@ -35,12 +35,14 @@ async function find_username_site(uuid, username, options, site) {
var source = body;
var text_only = "unavailable";
var title = "unavailable";
var language = "unavailable"
var temp_profile = {
"found": 0,
"image": "",
"link": "",
"rate": "",
"title": "",
"language": "",
"text": "",
"type": ""
};
@@ -79,7 +81,13 @@ async function find_username_site(uuid, username, options, site) {
helper.verbose && console.log(err);
}
language = helper.get_language_by_parsing(body)
if (language == "unavailable"){
language = helper.get_language_by_guessing(temp_profile.text)
}
temp_profile.title = title;
temp_profile.language = language;
temp_profile.rate = "%" + ((temp_profile["found"] / detections_count) * 100).toFixed(2);
temp_profile.link = site.url.replace("{username}", username);
temp_profile.type = site.type
+35
View File
@@ -13,6 +13,9 @@ var header_options = {
var https = require("follow-redirects").https;
var fs = require("fs");
var url = require("url");
var franc = require('franc');
var langs = require('langs');
var cheerio = require('cheerio');
var parsed_sites = JSON.parse(fs.readFileSync("sites.json"));
var logs_queue = Promise.resolve();
@@ -28,6 +31,36 @@ function log_to_file_queue(uuid, msg) {
});
}
function get_language_by_parsing(body) {
var language = "unavailable"
try {
var $ = cheerio.load(body);
var code = $("html").attr("lang")
if (code != "") {
language = langs.where("1", code).name
}
} catch (err) {
helper.verbose && console.log(err);
}
return language
}
function get_language_by_guessing(text) {
var language = "unavailable"
try {
if (text != "unavailable" && text != "") {
var code = franc(text);
if (code !== 'und') {
language = langs.where("3", code).name + " (Maybe)"
}
}
} catch (err) {
helper.verbose && console.log(err);
}
return language
}
function get_site_from_url(_url) {
temp = url.parse(_url.replace("{username}", "nothinghere")).hostname
return temp.replace("nothinghere.", "")
@@ -98,6 +131,8 @@ async function get_url_wrapper_text(url, time = 5) {
}
module.exports = {
get_language_by_parsing,
get_language_by_guessing,
parsed_sites,
verbose,
google_api_key,
+9
View File
@@ -74,6 +74,7 @@ async function find_username_site(uuid, username, options, site) {
var source = "";
var data = "";
var language = "unavailable"
var text_only = "unavailable";
var title = "unavailable";
var temp_profile = {
@@ -82,6 +83,7 @@ async function find_username_site(uuid, username, options, site) {
"link": "",
"rate": "",
"title": "",
"language": "",
"text": "",
"type": ""
};
@@ -155,8 +157,15 @@ async function find_username_site(uuid, username, options, site) {
}));
}
if (temp_profile.found > 0 || temp_profile.image != "") {
language = helper.get_language_by_parsing(source)
if (language == "unavailable"){
language = helper.get_language_by_guessing(text_only)
}
temp_profile.text = sanitizeHtml(text_only);
temp_profile.title = sanitizeHtml(title);
temp_profile.language = language
temp_profile.rate = "%" + ((temp_profile.found / site.detections.length) * 100).toFixed(2);
temp_profile.link = site.url.replace("{username}", username);
temp_profile.type = site.type
+3
View File
@@ -66,6 +66,7 @@ async function find_username_site_special_facebook_1(uuid, username, site) {
"link": "",
"rate": "",
"title": "",
"language": "",
"text": "",
"type": ""
};
@@ -132,6 +133,7 @@ async function find_username_site_special_gmail_1(uuid, username, site) {
"link": "",
"rate": "",
"title": "",
"language": "",
"text": "",
"type": ""
};
@@ -198,6 +200,7 @@ async function find_username_site_special_google_1(uuid, username, site) {
"link": "",
"rate": "",
"title": "",
"language": "",
"text": "",
"type": ""
};
+4 -2
View File
@@ -1,6 +1,6 @@
{
"name": "social-analyzer",
"version": "2.0.10",
"version": "2.0.12",
"description": "Advanced Social Media Profiles Finder and String Analysis Tool",
"main": "app.js",
"scripts": {
@@ -29,7 +29,9 @@
"tmp": "0.2.1",
"wink-tokenizer": "1.1.0",
"wordsninja": "1.0.0",
"yargs": "16.2.0"
"yargs": "16.2.0",
"franc": "5.0.0",
"langs": "2.0.0"
},
"author": "QeeqBox",
"license": "AGPL-3.0",
+3 -3
View File
@@ -964,7 +964,7 @@
temp_image = '<img src=' + site.image + '>'
}
temp_tr += '<div class="user-info-container"><div class="user-info-container-left"><div>Link: <a href="' + site.link + '">' + site.link + '</a></div><div>Rate: ' + site.rate + '</div><div>Title: ' + site.title +
'</div><div>Site Type: ' + site.type + '</div><div>Text: ' + site.text + '</div></div><div class="user-info-container-right">' + temp_image + '</div></div>'
'</div><div>Language: ' + site.language + '</div><div>Description: ' + site.type + '</div><div>Text: ' + site.text + '</div></div><div class="user-info-container-right">' + temp_image + '</div></div>'
console.log(temp_tr)
}
});
@@ -997,7 +997,7 @@
temp_image = '<img src=' + site.image + '>'
}
temp_tr += '<div class="user-info-container"><div class="user-info-container-left"><div>Link: <a href="' + site.link + '">' + site.link + '</a></div><div>Rate: ' + site.rate + '</div><div>Title: ' + site.title +
'</div><div>Site Type: ' + site.type + '</div><div>Text: ' + site.text + '</div></div><div class="user-info-container-right">' + temp_image + '</div></div>'
'</div><div>Language: ' + site.language + '</div><div>Description: ' + site.type + '</div><div>Text: ' + site.text + '</div></div><div class="user-info-container-right">' + temp_image + '</div></div>'
console.log(temp_tr)
}
});
@@ -1012,7 +1012,7 @@
if (site.found > 0) {
temp_image = ""
temp_tr += '<div class="user-info-container"><div class="user-info-container-left"><div>Link: <a href="' + site.link + '">' + site.link + '</a></div><div>Rate: ' + site.rate + '</div><div>Title: ' + site.title +
'</div><div>Site Type: ' + site.type + '</div><div>Text: ' + site.text + '</div></div><div class="user-info-container-right">' + temp_image + '</div></div>'
'</div><div>Description: ' + site.type + '</div><div>Text: ' + site.text + '</div></div><div class="user-info-container-right">' + temp_image + '</div></div>'
console.log(temp_tr)
}
});