mirror of
https://github.com/roytam1/UXP.git
synced 2026-06-22 00:18:59 +00:00
198 lines
5.5 KiB
JavaScript
198 lines
5.5 KiB
JavaScript
/*
|
|
* Copyright 2012, Mozilla Foundation and contributors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
'use strict';
|
|
|
|
/*
|
|
* A spell-checker based on Damerau-Levenshtein distance.
|
|
*/
|
|
|
|
var CASE_CHANGE_COST = 1;
|
|
var INSERTION_COST = 10;
|
|
var DELETION_COST = 10;
|
|
var SWAP_COST = 10;
|
|
var SUBSTITUTION_COST = 20;
|
|
var MAX_EDIT_DISTANCE = 40;
|
|
|
|
/**
|
|
* Compute Damerau-Levenshtein Distance, with a modification to allow a low
|
|
* case-change cost (1/10th of a swap-cost)
|
|
* @see http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
|
|
*/
|
|
var distance = exports.distance = function(wordi, wordj) {
|
|
var wordiLen = wordi.length;
|
|
var wordjLen = wordj.length;
|
|
|
|
// We only need to store three rows of our dynamic programming matrix.
|
|
// (Without swap, it would have been two.)
|
|
var row0 = new Array(wordiLen+1);
|
|
var row1 = new Array(wordiLen+1);
|
|
var row2 = new Array(wordiLen+1);
|
|
var tmp;
|
|
|
|
var i, j;
|
|
|
|
// The distance between the empty string and a string of size i is the cost
|
|
// of i insertions.
|
|
for (i = 0; i <= wordiLen; i++) {
|
|
row1[i] = i * INSERTION_COST;
|
|
}
|
|
|
|
// Row-by-row, we're computing the edit distance between substrings wordi[0..i]
|
|
// and wordj[0..j].
|
|
for (j = 1; j <= wordjLen; j++) {
|
|
// Edit distance between wordi[0..0] and wordj[0..j] is the cost of j
|
|
// insertions.
|
|
row0[0] = j * INSERTION_COST;
|
|
|
|
for (i = 1; i <= wordiLen; i++) {
|
|
// Handle deletion, insertion and substitution: we can reach each cell
|
|
// from three other cells corresponding to those three operations. We
|
|
// want the minimum cost.
|
|
var dc = row0[i - 1] + DELETION_COST;
|
|
var ic = row1[i] + INSERTION_COST;
|
|
var sc0;
|
|
if (wordi[i-1] === wordj[j-1]) {
|
|
sc0 = 0;
|
|
}
|
|
else {
|
|
if (wordi[i-1].toLowerCase() === wordj[j-1].toLowerCase()) {
|
|
sc0 = CASE_CHANGE_COST;
|
|
}
|
|
else {
|
|
sc0 = SUBSTITUTION_COST;
|
|
}
|
|
}
|
|
var sc = row1[i-1] + sc0;
|
|
|
|
row0[i] = Math.min(dc, ic, sc);
|
|
|
|
// We handle swap too, eg. distance between help and hlep should be 1. If
|
|
// we find such a swap, there's a chance to update row0[1] to be lower.
|
|
if (i > 1 && j > 1 && wordi[i-1] === wordj[j-2] && wordj[j-1] === wordi[i-2]) {
|
|
row0[i] = Math.min(row0[i], row2[i-2] + SWAP_COST);
|
|
}
|
|
}
|
|
|
|
tmp = row2;
|
|
row2 = row1;
|
|
row1 = row0;
|
|
row0 = tmp;
|
|
}
|
|
|
|
return row1[wordiLen];
|
|
};
|
|
|
|
/**
|
|
* As distance() except that we say that if word is a prefix of name then we
|
|
* only count the case changes. This allows us to use words that can be
|
|
* completed by typing as more likely than short words
|
|
*/
|
|
var distancePrefix = exports.distancePrefix = function(word, name) {
|
|
var dist = 0;
|
|
|
|
for (var i = 0; i < word.length; i++) {
|
|
if (name[i] !== word[i]) {
|
|
if (name[i].toLowerCase() === word[i].toLowerCase()) {
|
|
dist++;
|
|
}
|
|
else {
|
|
// name does not start with word, even ignoring case, use
|
|
// Damerau-Levenshtein
|
|
return exports.distance(word, name);
|
|
}
|
|
}
|
|
}
|
|
|
|
return dist;
|
|
};
|
|
|
|
/**
|
|
* A function that returns the correction for the specified word.
|
|
*/
|
|
exports.correct = function(word, names) {
|
|
if (names.length === 0) {
|
|
return undefined;
|
|
}
|
|
|
|
var distances = {};
|
|
var sortedCandidates;
|
|
|
|
names.forEach(function(candidate) {
|
|
distances[candidate] = exports.distance(word, candidate);
|
|
});
|
|
|
|
sortedCandidates = names.sort(function(worda, wordb) {
|
|
if (distances[worda] !== distances[wordb]) {
|
|
return distances[worda] - distances[wordb];
|
|
}
|
|
else {
|
|
// if the score is the same, always return the first string
|
|
// in the lexicographical order
|
|
return worda < wordb;
|
|
}
|
|
});
|
|
|
|
if (distances[sortedCandidates[0]] <= MAX_EDIT_DISTANCE) {
|
|
return sortedCandidates[0];
|
|
}
|
|
else {
|
|
return undefined;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Return a ranked list of matches:
|
|
*
|
|
* spell.rank('fred', [ 'banana', 'fred', 'ed', 'red' ]);
|
|
* ↓
|
|
* [
|
|
* { name: 'fred', dist: 0 },
|
|
* { name: 'red', dist: 1 },
|
|
* { name: 'ed', dist: 2 },
|
|
* { name: 'banana', dist: 10 },
|
|
* ]
|
|
*
|
|
* @param word The string that we're comparing names against
|
|
* @param names An array of strings to compare word against
|
|
* @param options Comparison options:
|
|
* - noSort: Do not sort the output by distance
|
|
* - prefixZero: Count prefix matches as edit distance 0 (i.e. word='bana' and
|
|
* names=['banana'], would return { name:'banana': dist: 0 }) This is useful
|
|
* if someone is typing the matches and may not have finished yet
|
|
*/
|
|
exports.rank = function(word, names, options) {
|
|
options = options || {};
|
|
|
|
var reply = names.map(function(name) {
|
|
// If any name starts with the word then the distance is based on the
|
|
// number of case changes rather than Damerau-Levenshtein
|
|
var algo = options.prefixZero ? distancePrefix : distance;
|
|
return {
|
|
name: name,
|
|
dist: algo(word, name)
|
|
};
|
|
});
|
|
|
|
if (!options.noSort) {
|
|
reply = reply.sort(function(d1, d2) {
|
|
return d1.dist - d2.dist;
|
|
});
|
|
}
|
|
|
|
return reply;
|
|
};
|