|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object kylm.model.LanguageModel kylm.model.ngram.NgramLM
public class NgramLM
A class that implements a normal n-gram model
Constructor Summary | |
---|---|
NgramLM()
|
|
NgramLM(int n)
A constructor that creates a model of size n |
|
NgramLM(int n,
NgramSmoother smoother)
|
Method Summary | |
---|---|
void |
countNgrams(java.lang.Iterable<java.lang.String[]> sl)
Count the ngrams in the corpus |
boolean |
equals(java.lang.Object obj)
|
void |
expandUnknowns()
Expand unknown words in the vocabulary explicitly (useful for WFSTs) TODO: This only works for unigrams TODO: This assigns a uniform probability, doesn't take unknown word models into account |
int |
getN()
Get the length of the n-gram context |
int[] |
getNgramCounts()
Get the number of n-grams at each level |
java.lang.String |
getNodeName(NgramNode child)
|
BranchNode |
getRoot()
Get the root node of the n-gram Tree |
NgramSmoother |
getSmoother()
|
float[] |
getWordEntropies(int[] iids)
Get the entropies of every word in a sentence by ID. |
float |
getWordEntropy(int[] ids,
int pos)
Get the entropies of the last word in the sequence by ID |
java.lang.String |
printReport()
|
void |
setN(int n)
Set the length of the n-gram context |
void |
setNgramCounts(int[] cs)
|
void |
setSmoother(NgramSmoother smoother)
|
void |
trainModel(java.lang.Iterable<java.lang.String[]> sl)
|
Methods inherited from class java.lang.Object |
---|
getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
---|
public NgramLM(int n)
n
- the length of the context of the n-gram modelpublic NgramLM(int n, NgramSmoother smoother)
public NgramLM()
Method Detail |
---|
public float[] getWordEntropies(int[] iids)
LanguageModel
getWordEntropies
in class LanguageModel
iids
- The IDs of the words in the sentence. Will always start
and end with the sentence terminal symbol.
public float getWordEntropy(int[] ids, int pos)
LanguageModel
getWordEntropy
in class LanguageModel
ids
- The IDs of the words in the sentence. Will always start
and end with the sentence terminal symbol.pos
- The position of the word to be judged in ids
public void trainModel(java.lang.Iterable<java.lang.String[]> sl) throws java.io.IOException
trainModel
in class LanguageModel
java.io.IOException
public void countNgrams(java.lang.Iterable<java.lang.String[]> sl) throws java.io.IOException
sl
- An iterator of sentences in the corpus
java.io.IOException
public BranchNode getRoot()
public int getN()
public void setN(int n)
n
- The lengthpublic void expandUnknowns()
public boolean equals(java.lang.Object obj)
equals
in class LanguageModel
public int[] getNgramCounts()
public NgramSmoother getSmoother()
public void setSmoother(NgramSmoother smoother)
public void setNgramCounts(int[] cs)
public java.lang.String getNodeName(NgramNode child)
public java.lang.String printReport()
printReport
in class LanguageModel
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |