|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object kylm.model.LanguageModel
public abstract class LanguageModel
An abstract class representing many of the common functions of language models. All probabilities/entropies are represented as log10 form floats.
Constructor Summary | |
---|---|
LanguageModel()
|
Method Summary | |
---|---|
boolean |
equals(java.lang.Object obj)
|
int |
findUnknownId(java.lang.String key)
Find the appropriate unknown word model for the key |
float[] |
getClassEntropies()
|
ClassMap |
getClassMap()
|
boolean |
getCountTerminals()
|
int |
getDebug()
|
int |
getId(java.lang.String word)
Get the ID assigned to a particular word |
int |
getMaxLength()
|
java.lang.String |
getName()
|
java.util.regex.Pattern |
getRegex()
|
float |
getSentenceClassEntropy()
|
float |
getSentenceEntropy(java.lang.String[] sent)
Returns the entropy of an entire sentence. |
int[] |
getSentenceIds(java.lang.String[] sent)
Get an array of IDs for a sentence, add terminal symbols on the left side, and on the right side if countTerminals is true. |
float |
getSentenceSimpleEntropy()
|
float |
getSentenceUnknownEntropy()
|
float[] |
getSimpleEntropies()
|
java.lang.String |
getStartSymbol()
|
java.lang.String |
getSymbol()
|
java.lang.String |
getTerminalSymbol()
|
float[] |
getUnknownEntropies()
|
int |
getUnknownModelCount()
|
LanguageModel[] |
getUnknownModels()
|
java.lang.String |
getUnknownSymbol()
|
SymbolSet |
getVocab()
|
int |
getVocabFrequency()
Get the frequency limit for the vocabulary. |
int |
getVocabLimit()
|
java.lang.String[] |
getVocabulary()
Get all in vocabulary words. |
float[] |
getWordEntropies(int[] ids)
Get the entropies of every word in a sentence by ID. |
float[] |
getWordEntropies(java.lang.String[] sent)
Get the entropies of every word in a sentence. |
abstract float |
getWordEntropy(int[] ids,
int pos)
Get the entropies of the last word in the sequence by ID |
void |
importVocabulary(java.lang.Iterable<java.lang.String[]> sl)
Load the vocabulary and trim it at the appropriate level. |
boolean |
isClosed()
|
boolean |
isInVocab(int idx)
Returns whether or not an id is in the vocabulary. |
boolean |
isInVocab(java.lang.String str)
Returns whether or not a string is in the vocabulary, returns false for special symbols as well. |
abstract java.lang.String |
printReport()
|
void |
setClassMap(ClassMap cm)
|
void |
setClosed(boolean closed)
|
void |
setCountTerminals(boolean countTerminals)
|
void |
setDebug(int debug)
|
void |
setMaxLength(int maxLength)
|
void |
setName(java.lang.String name)
|
void |
setRegex(java.lang.String regex)
|
void |
setStartSymbol(java.lang.String startSymbol)
|
void |
setSymbol(java.lang.String symbol)
|
void |
setTerminalSymbol(java.lang.String terminalSymbol)
|
void |
setUnknownModels(LanguageModel[] ukModels)
|
void |
setUnknownSymbol(java.lang.String ukSymbol)
|
void |
setVocab(SymbolSet newVocab)
|
void |
setVocabFrequency(int vocabFrequency)
Set the limit on the vocabulary frequency. |
void |
setVocabLimit(int vocabLimit)
|
void |
setVocabulary(java.lang.String[] voc)
Set the vocabulary to be used by the language model. |
abstract void |
trainModel(java.lang.Iterable<java.lang.String[]> sl)
|
Methods inherited from class java.lang.Object |
---|
getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
---|
public LanguageModel()
Method Detail |
---|
public boolean equals(java.lang.Object obj)
equals
in class java.lang.Object
public abstract float getWordEntropy(int[] ids, int pos)
ids
- The IDs of the words in the sentence. Will always start
and end with the sentence terminal symbol.pos
- The position of the word to be judged in ids
public float[] getWordEntropies(int[] ids)
ids
- The IDs of the words in the sentence. Will always start
and end with the sentence terminal symbol.
public float[] getWordEntropies(java.lang.String[] sent)
sent
- The string of words
public float[] getClassEntropies()
public float[] getSimpleEntropies()
public float[] getUnknownEntropies()
public float getSentenceEntropy(java.lang.String[] sent)
sent
- The sentence to find the entropy of
public float getSentenceSimpleEntropy()
public float getSentenceUnknownEntropy()
public float getSentenceClassEntropy()
public abstract void trainModel(java.lang.Iterable<java.lang.String[]> sl) throws java.io.IOException
java.io.IOException
public abstract java.lang.String printReport()
public void setVocabulary(java.lang.String[] voc)
public boolean isInVocab(java.lang.String str)
str
- The symbol to check
public boolean isInVocab(int idx)
idx
- the index to check
public int[] getSentenceIds(java.lang.String[] sent)
sent
- The sentence to convert
public java.lang.String[] getVocabulary()
public void importVocabulary(java.lang.Iterable<java.lang.String[]> sl) throws java.io.IOException
sl
- The sentenceLoader containing the corpus sentences.
java.io.IOException
public int findUnknownId(java.lang.String key)
public int getId(java.lang.String word)
word
- The word to find the id for
public int getDebug()
public void setDebug(int debug)
public java.lang.String getSymbol()
public void setSymbol(java.lang.String symbol)
public java.lang.String getName()
public void setName(java.lang.String name)
public SymbolSet getVocab()
public void setVocab(SymbolSet newVocab)
public java.lang.String getStartSymbol()
public void setStartSymbol(java.lang.String startSymbol)
public java.lang.String getTerminalSymbol()
public void setTerminalSymbol(java.lang.String terminalSymbol)
public java.lang.String getUnknownSymbol()
public void setUnknownSymbol(java.lang.String ukSymbol)
public LanguageModel[] getUnknownModels()
public void setUnknownModels(LanguageModel[] ukModels)
public boolean isClosed()
public void setClosed(boolean closed)
public boolean getCountTerminals()
public void setCountTerminals(boolean countTerminals)
public int getVocabFrequency()
public void setVocabFrequency(int vocabFrequency)
vocabFrequency
- All words that occur this many times or fewer will be treated
as unknown words.public java.util.regex.Pattern getRegex()
public void setRegex(java.lang.String regex)
public int getVocabLimit()
public void setVocabLimit(int vocabLimit)
public int getMaxLength()
public void setMaxLength(int maxLength)
public ClassMap getClassMap()
public void setClassMap(ClassMap cm)
public int getUnknownModelCount()
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |