def get_ed_score(h, r): h = h.split() r = r.split() return ed.edratio(h, r)
) # making a set of all possible target tokens that appear with source n_e.update( tokens_target ) # adding all tokens of the target sentence a potential translations for source token 'e' initial_translation[e] = n_e #saving in a map if method == 'uniform': for k, v in initial_translation.iteritems( ): # walking through the map and setting initial translation probability uniformaly. for v_es in v: translations[v_es, k] = 1.0 / len(v) #print 'initial t:' #pp(translations) else: """ What if we dont set the initial translation probabilities uniformly? look at: http://research.microsoft.com/pubs/150581/acl11.pdf """ add_delta = 1.0 for k, v in initial_translation.iteritems(): print 'setting intial for ', k edr_k = map(lambda t: ed.edratio(t, k) + add_delta, v) sum_edr = sum(edr_k) for v_es, edr_es in zip(v, edr_k): translations[v_es, k] = edr_es / sum_edr writer = open(save, 'w') for k, v in translations.iteritems(): writer.write(str(' '.join(k)) + '\t' + str(v) + '\n') writer.flush() writer.close()
corpus_target[k] = tokens_target for e in tokens_source: n_e = initial_translation.get(e, set()) # making a set of all possible target tokens that appear with source n_e.update(tokens_target) # adding all tokens of the target sentence a potential translations for source token 'e' initial_translation[e] = n_e #saving in a map if method == 'uniform': for k, v in initial_translation.iteritems(): # walking through the map and setting initial translation probability uniformaly. for v_es in v: translations[v_es, k] = 1.0 / len(v) #print 'initial t:' #pp(translations) else: """ What if we dont set the initial translation probabilities uniformly? look at: http://research.microsoft.com/pubs/150581/acl11.pdf """ add_delta = 1.0 for k, v in initial_translation.iteritems(): print 'setting intial for ', k edr_k = map(lambda t: ed.edratio(t, k) + add_delta, v) sum_edr = sum(edr_k) for v_es, edr_es in zip(v, edr_k): translations[v_es, k] = edr_es / sum_edr writer = open(save, 'w') for k, v in translations.iteritems(): writer.write(str(' '.join(k)) + '\t' + str(v) + '\n') writer.flush() writer.close()