Files
similarWords/createwordlist.js
2017-12-04 14:37:37 +01:00

76 lines
1.7 KiB
JavaScript

fs = require('fs');
// var dict = '/usr/share/dict/american-english';
var dict = '/usr/share/dict/italian';
// var dict = '/usr/share/dict/ngerman';
fs.readFile(dict, 'utf8', function(err, data) {
if (err) {
return console.log(err);
}
var lines = data.split('\n');
console.log(lines.length + ' words in dictionary');
var word = '';
var words = [];
for ( var i = 0; i < lines.length; i++) {
word = lines[i];
if (word.length > 6 && word.length < 10) {
// console.log('using ' + word);
words.push(word);
}
;
}
console.log('using ' + words.length + ' words out of the dictionary');
var wildcardWord = function(word) {
var words = [];
for ( var i = 1; i < word.length; i++) {
words.push(word.substr(0, i) + '.' + word.substr(i + 1));
}
return words;
};
var matchingWordlist = [];
for ( var w = 0; w < words.length; w++) {
var word = words[w];
var wildcardedWords = wildcardWord(word);
var matches = [];
for ( var i = 0; i < wildcardedWords.length; i++) {
wildcardedWord = wildcardedWords[i];
for ( var j = 0; j < words.length; j++) {
// console.log('match ' + words[j] + ' with ' + wildcardedWord);
if (word.length == words[j].length && words[j].match(wildcardedWord) && word != words[j]) {
matches.push(words[j]);
// console.log(word + ' -> ' + words[j]);
}
}
}
// add to dictionary?
if (matches.length >= 6) {
console.log('using ' + word);
var mw = {};
mw[word] = matches;
matchingWordlist.push(mw);
}
if (w % 5000 == 0) {
console.log('intermediary results...');
console.log(matchingWordlist);
}
}
console.log('final result');
console.log(JSON.stringify({
words: matchingWordlist
}));
process.exit(0);
});