def train(self, data): self.model = {} for entry in data: triples = utils.delexicalize_struct(utils.split_struct(entry['source'])) source= [] for snt in triples: sentence = ' '.join(['<SNT>'] + [t[1] for t in snt] + ['</SNT>']) source.append(sentence) source = tuple(source) if source not in self.model: self.model[source] = [] for target in entry['targets']: output = ' '.join(target['output']) self.model[source].append(output) return self.model
def predict(self, source): sentences = utils.split_struct(source) triples = utils.delexicalize_struct(sentences) struct = [] for snt in triples: sentence = ' '.join(['<SNT>'] + [t[1] for t in snt] + ['</SNT>']) struct.append(sentence) target = [] # Try to extract a full template start, end, templates = 0, len(struct), [] while start < len(struct): snts = tuple(struct[start:end]) entities, _ = self.track_entity(sentences[start:end]) if snts in self.model: pos = randint(0, len(self.model[snts]) - 1) template = self.model[snts][pos].split() for i, w in enumerate(template): if w in entities: template[i] = entities[w] target.extend(template) start = copy.copy(end) end = len(struct) else: end -= 1 # jump a triple if it is not on training set if start == end: start += 1 end = len(struct) _, entitytag = self.track_entity(sentences) for i, w in enumerate(target): if w in entitytag: target[i] = entitytag[w] return target