示例#1
0
def get_joint_ibm1(e_corpus: Corpus, f_corpus: Corpus):
    PL = cat.LengthDistribution()
    PM = cat.LengthDistribution()
    PZ = cat.ClusterDistribution(1)
    PEi = cat.UnigramMixture(1, e_corpus.vocab_size())
    PAj = cat.UniformAlignment()
    PFj = cat.BrownLexical(e_corpus.vocab_size(), f_corpus.vocab_size())
    return JointModel(PL, PM, PZ, PEi, PAj, PFj)
示例#2
0
def get_joint_ibm1z(e_corpus: Corpus,
                    f_corpus: Corpus,
                    n_clusters=1,
                    cluster_unigrams=True,
                    alpha=1.0):
    PL = cat.LengthDistribution()
    PM = cat.LengthDistribution()
    if not cluster_unigrams:
        PZ = cat.ClusterDistribution(n_clusters)
    else:
        PZ = cat.ClusterUnigrams(n_clusters)
    PEi = cat.UnigramMixture(n_clusters, e_corpus.vocab_size(), alpha)
    PAj = cat.UniformAlignment()
    PFj = cat.MixtureOfBrownLexical(n_clusters, e_corpus.vocab_size(),
                                    f_corpus.vocab_size(), alpha)
    return JointModel(PL, PM, PZ, PEi, PAj, PFj)
示例#3
0
def get_mlp_ibm1(e_corpus: Corpus, f_corpus: Corpus):
    PL = cat.LengthDistribution()
    PM = cat.LengthDistribution()
    PAj = cat.UniformAlignment()
    PFj = MLPLexical(e_corpus, f_corpus)
    return ConditionalModel(PL, PM, PAj, PFj)
示例#4
0
def get_ibm1(e_corpus: Corpus, f_corpus: Corpus):
    PL = cat.LengthDistribution()
    PM = cat.LengthDistribution()
    PAj = cat.UniformAlignment()
    PFj = cat.BrownLexical(e_corpus.vocab_size(), f_corpus.vocab_size())
    return ConditionalModel(PL, PM, PAj, PFj)