public class SpellReader
extends Object
Reads a spelling dictionary created by SpellWriter
, and provides
fast single- and multi-word spelling suggestions. Typical usage:
Inspired by and very distantly based on Nicolas Maisonneuve / David Spencer code.
Modifier and Type | Class and Description |
---|---|
private class |
SpellReader.Phrase
Track an ordered group of words.
|
private class |
SpellReader.Word
Keeps track of a single word, either an original or suggested word.
|
private static class |
SpellReader.WordQueue
Queue of words, ordered by score and then frequency
|
Modifier and Type | Field and Description |
---|---|
private PrintWriter |
debugWriter
Where to send debugging info (or null for none)
|
private CharsetDecoder |
edMapDecoder
Charset decoder for reading edit map entries
|
private RandomAccessFile |
edMapFile
File for reading edit map entries
|
private IntList |
edMapKeys
Keys in the edit map file
|
private IntList |
edMapPosns
Positions in the edit map file
|
private int[] |
freqSamples
Frequencies from the term data, sampled at 5 levels
|
private FreqData |
pairFreqs
Pair frequency data
|
private Pattern |
splitPat
Pattern used for splitting up lines delimited by bars
|
private Set |
stopSet
Set of stop-words to use during spell correction, or null for none
|
private WordEquiv |
wordEquiv
Word equivalency checker
|
private FreqData |
wordFreqs
Word frequency data
|
Modifier | Constructor and Description |
---|---|
private |
SpellReader()
Private constructor -- use
open(File) instead. |
Modifier and Type | Method and Description |
---|---|
private String |
calcMetaphone(String word) |
void |
close()
Closes any open files and/or resources associated with the SpellReader
|
private int |
comboChar(int c) |
private int |
comboKey(String word,
int p0,
int p1,
int p2,
int p3)
Calculate a four letter key for the given word, by sticking together
characters from the given positions.
|
protected void |
finalize() |
private void |
findCloseWords(SpellReader.Word orig,
int minFreq,
SpellReader.WordQueue queue)
Find words "close" to the given one, and add them to a queue.
|
boolean |
inDictionary(String word)
Check if the given word is in the spelling dictionary
|
static boolean |
isValidDictionary(File spellDir)
Check if there's a valid dictionary in the given directory
|
private void |
loadFreqSamples(File spellDir)
Get the term frequency sample array for our dictionary.
|
private void |
loadWordFreqs(File spellDir)
Get the term frequency sample array for our dictionary.
|
private SpellReader.Phrase |
max(SpellReader.Phrase orig,
SpellReader.Phrase test)
Return the better of two phrases (an original phase vs. a test phrase).
|
static SpellReader |
open(File spellDir)
Open a reader for the given spelling index directory.
|
private void |
openEdmap(File spellDir)
Read the index for the edit map file
|
private void |
openPairFreqs(File spellDir) |
private boolean |
readEdKey(SpellReader.Word orig,
int key,
int minFreq,
LongSet checked,
SpellReader.WordQueue queue)
Read the list of edit-map words for the given 4-character key.
|
private static String |
rtrim(String s) |
private float |
scorePair(SpellReader.Word sugg1,
SpellReader.Word sugg2)
Calculate a score for a suggested replacement for a given word.
|
void |
setDebugWriter(PrintWriter w)
Establishes a destination for detailed debugging output
|
void |
setStopwords(Set set)
Establishes a list of stopwords (e.g.
|
void |
setWordEquiv(WordEquiv eq)
Establishes a word equivalency checker.
|
private SpellReader.Phrase |
subJoin(SpellReader.Phrase in,
int pos1,
int pos2)
Consider joining the first two words together
|
private SpellReader.Phrase |
subPair(SpellReader.Phrase in,
int pos1,
int pos2)
Consider a set of changes to the pair of words at the given position.
|
private SpellReader.Phrase |
subPairs(SpellReader.Phrase in)
Consider pair-wise changes at each position.
|
private SpellReader.Phrase |
subSplit(SpellReader.Phrase in,
int pos)
Consider splitting a word
|
private SpellReader.Phrase |
subWord(SpellReader.Phrase in,
int pos)
Substitute a single word at the given position, trying to improve the score.
|
String[] |
suggestKeywords(String[] terms)
Keyword-oriented spelling suggestion mechanism.
|
private SpellReader.Word[] |
suggestSimilar(SpellReader.Word word,
int numSugg,
int minFreq)
Suggest similar words to a given original word.
|
String[] |
suggestSimilar(String str,
int numSugg)
Suggest similar words to a given original word, but not including the
word itself.
|
private IntList edMapKeys
private IntList edMapPosns
private RandomAccessFile edMapFile
private CharsetDecoder edMapDecoder
private FreqData pairFreqs
private FreqData wordFreqs
private int[] freqSamples
private PrintWriter debugWriter
private final Pattern splitPat
private Set stopSet
private WordEquiv wordEquiv
private SpellReader()
open(File)
instead.public static boolean isValidDictionary(File spellDir)
public static SpellReader open(File spellDir) throws IOException
setStopwords(Set)
. To specify a non-default
word equivalency, call setWordEquiv(WordEquiv)
.spellDir
- directory containing the spelling dictionaryIOException
public void setStopwords(Set set)
set
- Set of stop-words; all should be lower-case.public void setWordEquiv(WordEquiv eq)
eq
- the equivalency checker to useprivate void openEdmap(File spellDir) throws IOException
IOException
public void close() throws IOException
IOException
public void setDebugWriter(PrintWriter w)
private static String rtrim(String s)
private boolean readEdKey(SpellReader.Word orig, int key, int minFreq, LongSet checked, SpellReader.WordQueue queue) throws IOException
orig
- the original word being consideredkey
- the 4-char key to look upminFreq
- minimum frequency of words to be queuedchecked
- set of words that have already been consideredqueue
- receives the resulting wordsIOException
private void findCloseWords(SpellReader.Word orig, int minFreq, SpellReader.WordQueue queue) throws IOException
IOException
private int comboKey(String word, int p0, int p1, int p2, int p3)
private int comboChar(int c)
public boolean inDictionary(String word) throws IOException
IOException
public String[] suggestSimilar(String str, int numSugg) throws IOException
IOException
private SpellReader.Word[] suggestSimilar(SpellReader.Word word, int numSugg, int minFreq) throws IOException
IOException
public String[] suggestKeywords(String[] terms) throws IOException
terms
- Ordered list of query termsIOException
private SpellReader.Phrase subWord(SpellReader.Phrase in, int pos) throws IOException
in
- the best we've done so farpos
- position to substitute atIOException
private SpellReader.Phrase max(SpellReader.Phrase orig, SpellReader.Phrase test) throws IOException
IOException
private SpellReader.Phrase subPairs(SpellReader.Phrase in) throws IOException
IOException
private SpellReader.Phrase subPair(SpellReader.Phrase in, int pos1, int pos2) throws IOException
in
- the current best we've foundpos1
- first position to considerpos2
- second position to considerIOException
private SpellReader.Phrase subSplit(SpellReader.Phrase in, int pos) throws IOException
IOException
private SpellReader.Phrase subJoin(SpellReader.Phrase in, int pos1, int pos2) throws IOException
IOException
private float scorePair(SpellReader.Word sugg1, SpellReader.Word sugg2) throws IOException
IOException
private void loadFreqSamples(File spellDir) throws IOException
IOException
private void loadWordFreqs(File spellDir) throws IOException
IOException
private void openPairFreqs(File spellDir) throws IOException
IOException
protected void finalize() throws Throwable
finalize
in class Object
Throwable
private String calcMetaphone(String word)