class TestMatchData: def setup(self): self.match = MatchData('name', ['one', 'two'], {'{word}': ['value', 'tokens']}, 0.5) def test_detokenize(self): self.match.detokenize() assert self.match.sent == 'one two' correct_match = MatchData('name', 'one two', {'word': 'value tokens'}, 0.5) assert self.match.__dict__ == correct_match.__dict__
def calc_intents(self, query): """ Tests all the intents against the query and returns data on how well each one matched against the query Args: query (str): Input sentence to test against intents Returns: list<MatchData>: List of intent matches See calc_intent() for a description of the returned MatchData """ if self.must_train: self.train() intents = {} if self.train_thread and self.train_thread.is_alive( ) else { i.name: i for i in self.intents.calc_intents(query, self.entities) } sent = tokenize(query) for perfect_match in self.padaos.calc_intents(query): name = perfect_match['name'] intents[name] = MatchData(name, sent, matches=perfect_match['entities'], conf=1.0) return list(intents.values())
def match(self, sent, entities=None): possible_matches = [MatchData(self.name, sent)] for pi in self.pos_intents: entity = entities.find(self.name, pi.token) if entities else None for i in list(possible_matches): possible_matches += pi.match(i, entity) possible_matches = [i for i in possible_matches if i.conf >= 0.0] for i in possible_matches: conf = ( (i.conf / len(i.matches)) if len(i.matches) > 0 else 0) + 0.5 i.conf = math.sqrt(conf * self.simple_intent.match(i.sent)) return max(possible_matches, key=lambda x: x.conf)
def calc_intent(self, query): """ Tests all the intents against the query and returns match data of the best intent Args: query (str): Input sentence to test against intents Returns: MatchData: Best intent match """ matches = self.calc_intents(query) if len(matches) == 0: return MatchData('', '') best_match = max(matches, key=lambda x: x.conf) best_matches = (match for match in matches if match.conf == best_match.conf) return min(best_matches, key=lambda x: sum(map(len, x.matches.values())))
def match(self, orig_data, entity=None): l_matches = [(self.edges[0].match(orig_data.sent, pos), pos) for pos in range(len(orig_data.sent))] r_matches = [(self.edges[1].match(orig_data.sent, pos), pos) for pos in range(len(orig_data.sent))] def is_valid(l_pos, r_pos): if r_pos < l_pos: return False for p in range(l_pos, r_pos + 1): if orig_data.sent[p].startswith('{'): return False return True possible_matches = [] for l_conf, l_pos in l_matches: if l_conf < 0.2: continue for r_conf, r_pos in r_matches: if r_conf < 0.2: continue if not is_valid(l_pos, r_pos): continue extracted = orig_data.sent[l_pos:r_pos + 1] pos_conf = (l_conf - 0.5 + r_conf - 0.5) / 2 + 0.5 ent_conf = (entity.match(extracted) if entity else 1) new_sent = orig_data.sent[:l_pos] + [ self.token ] + orig_data.sent[r_pos + 1:] new_matches = orig_data.matches.copy() new_matches[self.token] = extracted extra_conf = math.sqrt(pos_conf * ent_conf) - 0.5 data = MatchData(orig_data.name, new_sent, new_matches, orig_data.conf + extra_conf) possible_matches.append(data) return possible_matches
def calc_intent(self, query, entity_manager): matches = self.calc_intents(query, entity_manager) if len(matches) == 0: return MatchData('', '') return max(matches, key=lambda x: x.conf)
def test_detokenize(self): self.match.detokenize() assert self.match.sent == 'one two' correct_match = MatchData('name', 'one two', {'word': 'value tokens'}, 0.5) assert self.match.__dict__ == correct_match.__dict__
def setup(self): self.match = MatchData('name', ['one', 'two'], {'{word}': ['value', 'tokens']}, 0.5)