Пример #1
0
def cur_cost(current_hyp, source, phrase_probs, lang_model: LanguageModel):
    translated_phrases = [x for _,_,x in current_hyp]
    translated_tokens = []
    for phrase in translated_phrases:
        translated_tokens.extend(phrase.split(" "))
    lang_model_probs = lang_model.get_prob_sentance(translated_tokens,padded_left=True,padded_right=False)

    translation_prob = 1
    distortion_prob = 1
    last_end_f = -1
    for cur_phrase in current_hyp:
        start_f,end_f,e = cur_phrase
        foreign_phrase = " ".join(source[start_f:end_f+1])
        translation_prob *= phrase_probs[foreign_phrase][e]
        if last_end_f != -1:
            distortion_prob *= d(start_f,last_end_f)
        last_end_f = end_f
    # print(translation_prob)
    # print(distortion_prob)
    # print(lang_model_probs)
    return lang_model_probs*distortion_prob*translation_prob