Пример #1
0
    def train(self, data):
        self.model = {}
        for entry in data:
            triples = utils.delexicalize_struct(utils.split_struct(entry['source']))
            source= []
            for snt in triples:
                sentence = ' '.join(['<SNT>'] + [t[1] for t in snt] + ['</SNT>'])
                source.append(sentence)

            source = tuple(source)
            if source not in self.model:
                self.model[source] = []

            for target in entry['targets']:
                output = ' '.join(target['output'])
                self.model[source].append(output)

        return self.model
Пример #2
0
    def predict(self, source):
        sentences = utils.split_struct(source)
        triples = utils.delexicalize_struct(sentences)
        struct = []
        for snt in triples:
            sentence = ' '.join(['<SNT>'] + [t[1] for t in snt] + ['</SNT>'])
            struct.append(sentence)

        target = []
        # Try to extract a full template
        start, end, templates = 0, len(struct), []
        while start < len(struct):
            snts = tuple(struct[start:end])
            entities, _ = self.track_entity(sentences[start:end])

            if snts in self.model:
                pos = randint(0, len(self.model[snts]) - 1)
                template = self.model[snts][pos].split()
                for i, w in enumerate(template):
                    if w in entities:
                        template[i] = entities[w]
                target.extend(template)

                start = copy.copy(end)
                end = len(struct)
            else:
                end -= 1

                # jump a triple if it is not on training set
                if start == end:
                    start += 1
                    end = len(struct)

        _, entitytag = self.track_entity(sentences)
        for i, w in enumerate(target):
            if w in entitytag:
                target[i] = entitytag[w]
        return target