def get_entity_alignments(self, x, y): # Cache lexicon if not self.lex: self.lex = atislexicon.get_lexicon() alignments = [] x_toks = x.split(' ') running_lens = [0] * (len(x_toks) + 1 ) # index to use if start at token i for i in range(1, len(x_toks) + 1): running_lens[i] = running_lens[i - 1] + 1 + len(x_toks[i - 1]) y_toks = y.split(' ') lex_items = self.lex.map_over_sentence(x_toks, return_entries=True) lex_ents = [a[1] for a in lex_items] for (i, j), ent in lex_items: # Make sure this entity occurs exactly once in lexicon entries # and in the logical form x_span = (running_lens[i], running_lens[j] - 1) ent_type = ent.split(':')[1][1:] cat = '$' + ent_type if lex_ents.count(ent) != 1: continue if y_toks.count(ent) != 1: continue y_ind = y.index(ent) y_span = (y_ind, y_ind + len(ent)) alignments.append((cat, x_span, y_span)) return alignments
def get_lexicon(): if OPTIONS.use_lexicon: if OPTIONS.domain == 'atis': return atislexicon.get_lexicon() elif OPTIONS.domain == 'geoquery': return geolexicon.get_lexicon() raise Exception('No lexicon for domain %s' % OPTIONS.domain) return None
def get_templates_and_replacements(data): lex = atislexicon.get_lexicon() templates = [] replacements = collections.defaultdict(set) for x, y in data: x_toks = x.split(' ') y_toks = y.split(' ') lex_items = lex.map_over_sentence(x_toks, return_entries=True) lex_ents = [x[1] for x in lex_items] x_holes = [] y_holes = [] reptypes = [] for (i, j), ent in lex_items: # Make sure this entity occurs exactly once in lexicon entries # and in the logical form if lex_ents.count(ent) != 1: continue if y_toks.count(ent) != 1: continue # Add the replacement rule x_span = ' '.join(x_toks[i:j]) ent_type = ent.split(':')[1] replacements[ent_type].add((x_span, ent)) # Update the template x_holes.append((i, j)) y_holes.append(y_toks.index(ent)) reptypes.append(ent_type) # Generate the template if len(x_holes) == 0: continue x_new_toks = list(x_toks) y_new_toks = list(y_toks) for count, ((i, j), y_ind) in enumerate(zip(x_holes, y_holes)): fmt_str = '%(w' + str(count) + ')s' x_new_toks[i] = fmt_str for k in range(i + 1, j): x_new_toks[k] = None y_new_toks[y_ind] = fmt_str x_t = ' '.join(t for t in x_new_toks if t is not None) y_t = ' '.join(y_new_toks) templates.append((x_t, y_t, reptypes)) # Print results # for t in replacements: # print '%s:' % t # for x in replacements[t]: # print ' %s' % str(x) # for x_t, y_t, reps in templates: # print '%s -> %s (%s)' % (x_t, y_t, reps) return templates, replacements
def get_templates_and_replacements(data): lex = atislexicon.get_lexicon() templates = [] replacements = collections.defaultdict(set) for x, y in data: x_toks = x.split(' ') y_toks = y.split(' ') lex_items = lex.map_over_sentence(x_toks, return_entries=True) lex_ents = [x[1] for x in lex_items] x_holes = [] y_holes = [] reptypes = [] for (i, j), ent in lex_items: # Make sure this entity occurs exactly once in lexicon entries # and in the logical form if lex_ents.count(ent) != 1: continue if y_toks.count(ent) != 1: continue # Add the replacement rule x_span = ' '.join(x_toks[i:j]) ent_type = ent.split(':')[1] replacements[ent_type].add((x_span, ent)) # Update the template x_holes.append((i, j)) y_holes.append(y_toks.index(ent)) reptypes.append(ent_type) # Generate the template if len(x_holes) == 0: continue x_new_toks = list(x_toks) y_new_toks = list(y_toks) for count, ((i, j), y_ind) in enumerate(zip(x_holes, y_holes)): fmt_str = '%(w' + str(count) + ')s' x_new_toks[i] = fmt_str for k in range(i+1, j): x_new_toks[k] = None y_new_toks[y_ind] = fmt_str x_t = ' '.join(t for t in x_new_toks if t is not None) y_t = ' '.join(y_new_toks) templates.append((x_t, y_t, reptypes)) # Print results # for t in replacements: # print '%s:' % t # for x in replacements[t]: # print ' %s' % str(x) # for x_t, y_t, reps in templates: # print '%s -> %s (%s)' % (x_t, y_t, reps) return templates, replacements
def get_lexicon(self): return atislexicon.get_lexicon()
def get_lexicon(): if OPTIONS.lexicon: if OPTIONS.domain == 'atis': return atislexicon.get_lexicon() raise Exception('No lexicon for domain %s' % OPTIONS.domain) return None