Files
UXP/devtools/shared/gcli/source/lib/gcli/util/spell.js
T

198 lines
5.5 KiB
JavaScript

/*
* Copyright 2012, Mozilla Foundation and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
'use strict';
/*
* A spell-checker based on Damerau-Levenshtein distance.
*/
var CASE_CHANGE_COST = 1;
var INSERTION_COST = 10;
var DELETION_COST = 10;
var SWAP_COST = 10;
var SUBSTITUTION_COST = 20;
var MAX_EDIT_DISTANCE = 40;
/**
* Compute Damerau-Levenshtein Distance, with a modification to allow a low
* case-change cost (1/10th of a swap-cost)
* @see http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
*/
var distance = exports.distance = function(wordi, wordj) {
var wordiLen = wordi.length;
var wordjLen = wordj.length;
// We only need to store three rows of our dynamic programming matrix.
// (Without swap, it would have been two.)
var row0 = new Array(wordiLen+1);
var row1 = new Array(wordiLen+1);
var row2 = new Array(wordiLen+1);
var tmp;
var i, j;
// The distance between the empty string and a string of size i is the cost
// of i insertions.
for (i = 0; i <= wordiLen; i++) {
row1[i] = i * INSERTION_COST;
}
// Row-by-row, we're computing the edit distance between substrings wordi[0..i]
// and wordj[0..j].
for (j = 1; j <= wordjLen; j++) {
// Edit distance between wordi[0..0] and wordj[0..j] is the cost of j
// insertions.
row0[0] = j * INSERTION_COST;
for (i = 1; i <= wordiLen; i++) {
// Handle deletion, insertion and substitution: we can reach each cell
// from three other cells corresponding to those three operations. We
// want the minimum cost.
var dc = row0[i - 1] + DELETION_COST;
var ic = row1[i] + INSERTION_COST;
var sc0;
if (wordi[i-1] === wordj[j-1]) {
sc0 = 0;
}
else {
if (wordi[i-1].toLowerCase() === wordj[j-1].toLowerCase()) {
sc0 = CASE_CHANGE_COST;
}
else {
sc0 = SUBSTITUTION_COST;
}
}
var sc = row1[i-1] + sc0;
row0[i] = Math.min(dc, ic, sc);
// We handle swap too, eg. distance between help and hlep should be 1. If
// we find such a swap, there's a chance to update row0[1] to be lower.
if (i > 1 && j > 1 && wordi[i-1] === wordj[j-2] && wordj[j-1] === wordi[i-2]) {
row0[i] = Math.min(row0[i], row2[i-2] + SWAP_COST);
}
}
tmp = row2;
row2 = row1;
row1 = row0;
row0 = tmp;
}
return row1[wordiLen];
};
/**
* As distance() except that we say that if word is a prefix of name then we
* only count the case changes. This allows us to use words that can be
* completed by typing as more likely than short words
*/
var distancePrefix = exports.distancePrefix = function(word, name) {
var dist = 0;
for (var i = 0; i < word.length; i++) {
if (name[i] !== word[i]) {
if (name[i].toLowerCase() === word[i].toLowerCase()) {
dist++;
}
else {
// name does not start with word, even ignoring case, use
// Damerau-Levenshtein
return exports.distance(word, name);
}
}
}
return dist;
};
/**
* A function that returns the correction for the specified word.
*/
exports.correct = function(word, names) {
if (names.length === 0) {
return undefined;
}
var distances = {};
var sortedCandidates;
names.forEach(function(candidate) {
distances[candidate] = exports.distance(word, candidate);
});
sortedCandidates = names.sort(function(worda, wordb) {
if (distances[worda] !== distances[wordb]) {
return distances[worda] - distances[wordb];
}
else {
// if the score is the same, always return the first string
// in the lexicographical order
return worda < wordb;
}
});
if (distances[sortedCandidates[0]] <= MAX_EDIT_DISTANCE) {
return sortedCandidates[0];
}
else {
return undefined;
}
};
/**
* Return a ranked list of matches:
*
* spell.rank('fred', [ 'banana', 'fred', 'ed', 'red' ]);
* ↓
* [
* { name: 'fred', dist: 0 },
* { name: 'red', dist: 1 },
* { name: 'ed', dist: 2 },
* { name: 'banana', dist: 10 },
* ]
*
* @param word The string that we're comparing names against
* @param names An array of strings to compare word against
* @param options Comparison options:
* - noSort: Do not sort the output by distance
* - prefixZero: Count prefix matches as edit distance 0 (i.e. word='bana' and
* names=['banana'], would return { name:'banana': dist: 0 }) This is useful
* if someone is typing the matches and may not have finished yet
*/
exports.rank = function(word, names, options) {
options = options || {};
var reply = names.map(function(name) {
// If any name starts with the word then the distance is based on the
// number of case changes rather than Damerau-Levenshtein
var algo = options.prefixZero ? distancePrefix : distance;
return {
name: name,
dist: algo(word, name)
};
});
if (!options.noSort) {
reply = reply.sort(function(d1, d2) {
return d1.dist - d2.dist;
});
}
return reply;
};