示例#1
0
def translate(bicorpus, file=sys.stdin):
    """
	input: Bs, a sentence in the source language
	output: list of Bt, sentences in the target language, candidate translations
	data (passed as arguments):
		bicorpus = list of pairs (As, At) where At is the translation of As.
		bidictionary = bilingual dictionary of (a_s, a_t) where a_s is a word, and a_t its translation.
	"""
    #	bidictionary = bicorpus
    for Bs in file:
        Bs = Bs.rstrip('\n')
        if __verbose__:
            print >> sys.stderr, '\n# Translating sentence: {}'.format(Bs)
        #		for As in bicorpus:
        for As in bicorpus.iter(string=Bs,
                                strategy='by distance',
                                method='direct'):
            a_s, b_s = single_substitution(As, Bs)
            try:
                a_t, b_t = bicorpus[a_s], bicorpus[b_s]
            except KeyError:
                continue
            else:
                if __verbose__:
                    print >> sys.stderr, '#\t{} : {} :: {} : {}\n'.format(
                        a_s, a_t, b_s, b_t)
                print '{}\t{}'.format(Bs, bicorpus[As].replace(a_t, b_t))
def translate(bicorpus, bidictionary, file=sys.stdin):
    """
	input: Bs in the source language
	output: list of Bt in the target language, candidate translations
	data (passed as argument):
		bicorpus = list of pairs (As, At) where At is the translation of As.
		bidictionary = bilingual dictionary of (a_s, a_t) where a_s is a word, and a_t its translation.
	"""
    #	bidictionary = bicorpus
    print file
    for Bs in file:
        Bs = Bs.rstrip('\n')
        if __verbose__:
            print >> sys.stderr, '\n# Translating sentence: {}'.format(Bs)
        for As in bicorpus:
            a_s, b_s = single_substitution(As, Bs)
            try:
                a_t, b_t = bidictionary[a_s], bidictionary[b_s]
            except KeyError:
                continue
            else:
                if __verbose__:
                    print >> sys.stderr, '\t{} <--> {}\t{} <--> {}'.format(
                        a_s, b_s, a_t, b_t)
                print '{}\t{}'.format(Bs, bicorpus[As].replace(a_t, b_t))