Пример #1
0
    def build_lattice(self, pt, sentence):
        '''
        Gets a phrase table and the tokenized sentence and outputs a lattice
        file formatted as follows:
            whole sentence
            1-1:
            <English translation> <Translation score>
            <English translation> <Translation score>
            ...
            1-2:
            <English translation> <Translation score>
            <English translation> <Translation score>
            ...
            2-2:

        The spans n-n refer to the tokens of the input Spanish sentence
        '''
        sentence = tokenize(sentence)
        self.sentence = sentence
        for start in xrange(len(sentence)):
            self.phrases[start] = {}
            for end in xrange(start+1, len(sentence)+1):
                foreign = sentence[start:end]
                p = Phrase(foreign, start, end)
                if len(foreign) == 1 and foreign[0] == ',':
                    p.translations = [Translation(foreign, (',',), 0)]
                else:
                    p.translations = pt.translate(foreign)
                self.phrases[start][end] = p
Пример #2
0
 def create_initial(cls):
     initial_phrase = Phrase(None, None, None)
     initial_phrase.translations = [Translation(None, (u'<s>',), 0)]
     return cls(None, initial_phrase, 0, (u'<s>',))