Files
similarWords/createwordlist.js
2013-04-07 11:48:40 +02:00

73 lines
1.6 KiB
JavaScript

fs = require('fs');
//var dict = '/usr/share/dict/american-english';
var dict = '/usr/share/dict/italian';
//var dict = '/usr/share/dict/ngerman';
fs.readFile(dict, 'utf8', function(err, data) {
if (err) {
return console.log(err);
}
var lines = data.split('\n');
console.log(lines.length + ' words in dictionary');
var word = '';
var words = [];
for(var i=0; i<lines.length; i++) {
word = lines[i];
if (word.length > 6 && word.length < 10) {
//console.log('using ' + word);
words.push(word);
};
}
console.log('using ' + words.length + ' words out of the dictionary');
var wildcardWord = function(word) {
var words = [];
for(var i=1; i<word.length; i++) {
words.push(word.substr(0, i) + '.' + word.substr(i + 1));
}
return words;
};
var matchingWordlist = [];
for(var w=0; w<words.length; w++) {
var word = words[w];
var wildcardedWords = wildcardWord(word);
var matches = [];
for(var i=0; i<wildcardedWords.length; i++) {
wildcardedWord = wildcardedWords[i];
for(var j=0; j<words.length; j++) {
//console.log('match ' + words[j] + ' with ' + wildcardedWord);
if(word.length == words[j].length && words[j].match(wildcardedWord) && word != words[j]) {
matches.push(words[j]);
//console.log(word + ' -> ' + words[j]);
}
}
}
//add to dictionary?
if(matches.length >= 4) {
console.log('using ' + word);
var mw = {};
mw[word] = matches;
matchingWordlist.push(mw);
}
if(w % 5000 == 0) {
console.log('intermediary results...');
console.log(matchingWordlist);
}
}
console.log('final result');
console.log(JSON.stringify(matchingWordlist));
process.exit(0);
});