def get_joint_ibm1(e_corpus: Corpus, f_corpus: Corpus): PL = cat.LengthDistribution() PM = cat.LengthDistribution() PZ = cat.ClusterDistribution(1) PEi = cat.UnigramMixture(1, e_corpus.vocab_size()) PAj = cat.UniformAlignment() PFj = cat.BrownLexical(e_corpus.vocab_size(), f_corpus.vocab_size()) return JointModel(PL, PM, PZ, PEi, PAj, PFj)
def get_joint_ibm1z(e_corpus: Corpus, f_corpus: Corpus, n_clusters=1, cluster_unigrams=True, alpha=1.0): PL = cat.LengthDistribution() PM = cat.LengthDistribution() if not cluster_unigrams: PZ = cat.ClusterDistribution(n_clusters) else: PZ = cat.ClusterUnigrams(n_clusters) PEi = cat.UnigramMixture(n_clusters, e_corpus.vocab_size(), alpha) PAj = cat.UniformAlignment() PFj = cat.MixtureOfBrownLexical(n_clusters, e_corpus.vocab_size(), f_corpus.vocab_size(), alpha) return JointModel(PL, PM, PZ, PEi, PAj, PFj)
def get_mlp_ibm1(e_corpus: Corpus, f_corpus: Corpus): PL = cat.LengthDistribution() PM = cat.LengthDistribution() PAj = cat.UniformAlignment() PFj = MLPLexical(e_corpus, f_corpus) return ConditionalModel(PL, PM, PAj, PFj)
def get_ibm1(e_corpus: Corpus, f_corpus: Corpus): PL = cat.LengthDistribution() PM = cat.LengthDistribution() PAj = cat.UniformAlignment() PFj = cat.BrownLexical(e_corpus.vocab_size(), f_corpus.vocab_size()) return ConditionalModel(PL, PM, PAj, PFj)