def build_lattice(self, pt, sentence): ''' Gets a phrase table and the tokenized sentence and outputs a lattice file formatted as follows: whole sentence 1-1: <English translation> <Translation score> <English translation> <Translation score> ... 1-2: <English translation> <Translation score> <English translation> <Translation score> ... 2-2: The spans n-n refer to the tokens of the input Spanish sentence ''' sentence = tokenize(sentence) self.sentence = sentence for start in xrange(len(sentence)): self.phrases[start] = {} for end in xrange(start+1, len(sentence)+1): foreign = sentence[start:end] p = Phrase(foreign, start, end) if len(foreign) == 1 and foreign[0] == ',': p.translations = [Translation(foreign, (',',), 0)] else: p.translations = pt.translate(foreign) self.phrases[start][end] = p
def create_initial(cls): initial_phrase = Phrase(None, None, None) initial_phrase.translations = [Translation(None, (u'<s>',), 0)] return cls(None, initial_phrase, 0, (u'<s>',))