def analyze(self): logging.info('*******************************************************') result_dict = {} result_dict['source'] = self.source.strip().lower() result_dict['q_type'] = self.s_type.strip().lower() res = model.predict(sentence=self.sentence) root_dict = res['hierplane_tree']['root'] logging.info('sentence {} parsed as {}'.format(self.sentence, root_dict)) emb = elmo(batch_to_ids([ self.sentence.split() ]))['elmo_representations'][0].detach().numpy() parse_tree = ParseTree(root_dict, self.sentence) # logging.info('ParseTree type is: {}'.format(parse_tree.get_type())) # parse_tree.iterate() logging.info( 'Now it\'s time to check the string representation \n{}'.format( str(parse_tree.root))) # parse_tree.analyze() logging.info('extracting information') all_nodes = set() all_intent_nodes = set() all_desc_nodes = set() toponyms = NER.extract_place_names(self.sentence) result_dict['pnames'] = toponyms topo_nodes = set() for t in toponyms: logging.info('\ttoponym:\t{}'.format(t)) nodes = parse_tree.find(t) if nodes is None: logging.info('An error in finding nodes') else: for n in nodes: n.role = 'n' topo_nodes.add(n) for t_node in topo_nodes: logging.info('\t**Found Node: {} and index {}'.format( t_node.word, t_node.index)) all_nodes = all_nodes.union(topo_nodes) all_desc_nodes = all_desc_nodes.union(topo_nodes) dates = NER.extract_dates(self.sentence) result_dict['dates'] = dates dates_nodes = set() for d in dates: logging.info('\tdate:\t{}'.format(d)) nodes = parse_tree.find(d) if nodes is None: logging.info('An error in finding nodes') else: for n in nodes: n.role = 'd' dates_nodes.add(n) for d_node in dates_nodes: logging.info('\t**Found Node: {} and index {}'.format( d_node.word, d_node.index)) all_nodes = all_nodes.union(dates_nodes) all_desc_nodes = all_desc_nodes.union(dates_nodes) whs_nodes = parse_tree.get_intent() whs = [] for wh_node in whs_nodes: wh_node.role = intent_encoding(wh_node, PRONOUN) whs.append(wh_node.word) for w in whs: logging.info('intent is: {}'.format(w)) all_nodes = all_nodes.union(whs_nodes) all_intent_nodes = all_intent_nodes.union(whs_nodes) result_dict['intents'] = whs a_entities_set = set() a_entities_nodes = set() a_types = [] a_types_nodes = set() for whs_node in whs_nodes: wh_nouns = whs_node.iterate_nouns() wh_nouns.sort(key=sort_function, reverse=True) for n in wh_nouns: if not is_inside(n.word, toponyms) and not is_inside( n.word, dates) and not is_left_inside( n.word, a_types) and is_a_new_one( a_types_nodes, n): if is_left_inside( n.word.lower().strip(), pt_set) or is_left_inside( n.word.lower().strip(), pt_dict.keys()): a_types.append(n.word) n.role = 't' a_types_nodes.add(n) elif ' ' not in n.word.strip() and len(n.word) > 2: a_entities_set.add(n.word) n.role = 'o' a_entities_nodes.add(n) for t in a_types: logging.info('\ttype in intent:\t{}'.format(t)) a_entities = list(a_entities_set) for e in a_entities: logging.info('\tentity in intent:\t{}'.format(e)) all_nodes = all_nodes.union(a_types_nodes) all_intent_nodes = all_intent_nodes.union(a_types_nodes) all_nodes = all_nodes.union(a_entities_nodes) all_intent_nodes = all_intent_nodes.union(a_entities_nodes) result_dict['i_objects'] = a_entities result_dict['i_ptypes'] = a_types nouns = parse_tree.get_nouns() nouns.sort(key=sort_function, reverse=True) types = [] types_nodes = set() entities_set = set() entities_nodes = set() for n in nouns: if not is_inside(n.word, toponyms) and not is_inside( n.word, dates) and not is_inside( n.word, whs) and not is_left_inside( n.word, types) and is_a_new_one(types_nodes, n): if is_left_inside(n.word.lower().strip(), pt_set) or is_left_inside( n.word.lower().strip(), pt_dict.keys()): types.append(n.word) n.role = 't' types_nodes.add(n) elif ' ' not in n.word.strip() and len(n.word) > 2: entities_set.add(n.word) n.role = 'o' entities_nodes.add(n) for t in types: logging.info('\ttype:\t{}'.format(t)) entities = list(entities_set) for e in entities: logging.info('\tentity:\t{}'.format(e)) all_nodes = all_nodes.union(types_nodes) all_desc_nodes = all_desc_nodes.union(types_nodes) all_nodes = all_nodes.union(entities_nodes) all_desc_nodes = all_desc_nodes.union(entities_nodes) result_dict['objects'] = entities result_dict['ptypes'] = types verbs = parse_tree.get_verbs() situations = [] situations_nodes = set() activities = [] activities_nodes = set() unknowns = [] unknowns_nodes = set() for v in verbs: v_index = self.sentence.split().index(v.word) v_emb = [emb[0][v_index]] logging.debug('verb is {} and len of emb is {}'.format( v.word, len(v_emb))) decision = verb_encoding(v_emb, actv_emb, stav_emb) if decision == "a": activities.append(v.word) v.role = 'a' activities_nodes.add(v) elif decision == "s": situations.append(v.word) v.role = 's' situations_nodes.add(v) else: unknowns.append(v.word) unknowns_nodes.add(v) for s in situations: logging.info('\tsituation: {}'.format(s)) for a in activities: logging.info('\tactivities: {}'.format(a)) for u in unknowns: logging.info('\tunknown: {}'.format(u)) all_nodes = all_nodes.union(activities_nodes) all_desc_nodes = all_desc_nodes.union(activities_nodes) all_nodes = all_nodes.union(situations_nodes) all_desc_nodes = all_desc_nodes.union(situations_nodes) result_dict['situations'] = situations result_dict['activities'] = activities pps = parse_tree.get_pps() relations = [] relation_nodes = set() for pp in pps: for n in toponyms: if 'with' in pp.word.lower(): is_within = is_within_phrase(pp.word) if is_within is not None: in_pp = pp.get_in_in_pp() if in_pp is not None: relations.append(in_pp.word) in_pp.role = 'r' relation_nodes.add(in_pp) if n in pp.word and not is_inside_right( pp.word, entities) and not is_inside_right( pp.word, a_entities): in_pp = pp.get_in_in_pp() if in_pp is not None: relations.append(in_pp.word) in_pp.role = 'r' relation_nodes.add(in_pp) break for t in types: if t in pp.word: in_pp = pp.get_in_in_pp() if in_pp is not None: relations.append(in_pp.word) in_pp.role = 'r' relation_nodes.add(in_pp) break all_nodes = all_nodes.union(relation_nodes) all_desc_nodes = all_desc_nodes.union(relation_nodes) for relation in relations: logging.info('\trelation: {}'.format(relation)) result_dict['relations'] = relations adjs = parse_tree.get_adjectives() qualities = [] qualities_nodes = set() object_qualities = [] object_qualities_nodes = set() for adj in adjs: siblings = adj.get_siblings() for sibling in siblings: if is_inside(sibling.word, toponyms) or is_inside( sibling.word, types) or is_inside( sibling.word, a_types): if not is_inside(adj.word, types) and not is_inside( adj.word, a_types): qualities.append(adj.word) adj.role = 'q' qualities_nodes.add(adj) break elif is_inside(sibling.word, entities) or is_inside( sibling.word, a_entities): object_qualities.append(adj.word) adj.role = 'p' object_qualities_nodes.add(adj) break all_nodes = all_nodes.union(qualities_nodes) all_desc_nodes = all_desc_nodes.union(qualities_nodes) all_nodes = all_nodes.union(object_qualities_nodes) all_desc_nodes = all_desc_nodes.union(object_qualities_nodes) for q in qualities: logging.info('\tquality: {}'.format(q)) for oq in object_qualities: logging.info('\tobject quality: {}'.format(oq)) result_dict['pqualities'] = qualities result_dict['oqualities'] = object_qualities # coding schema: where: 1, what: 2, which: 3, why: 4, how: 5, how+adj: 6 etc. make it complete... other:0... # ...activity: a, situation: s, quality: q, object_quality: p, relation: r, toponym: n, type: t, date: d ignored_nodes = [] leaves = parse_tree.get_leaves() for leaf in leaves: if leaf.is_unknown(): ignored_nodes.append(leaf) temp = [] for leaf in ignored_nodes: for n in all_nodes: flag = True if n.is_fuzzy_matched: if leaf.word in n.word: flag = False break else: if n.is_your_child(leaf): flag = False break if flag: temp.append(leaf) all_nodes.add(leaf) # ignored_nodes = temp all_list = list(all_nodes) intent_list = list(all_intent_nodes) description_list = list(all_desc_nodes) all_list.sort(key=lambda x: x.index, reverse=False) intent_list.sort(key=lambda x: x.index, reverse=False) description_list.sort(key=lambda x: x.index, reverse=False) intent_code = '' intent_info = [] for node in intent_list: intent_code += node.role if node.is_fuzzy_matched: intent_info.append({ 'tag': node.role, 'value': node.fuzzy_word }) else: intent_info.append({'tag': node.role, 'value': node.word}) desc_code = '' desc_info = [] for node in description_list: desc_code += node.role if node.is_fuzzy_matched: desc_info.append({'tag': node.role, 'value': node.fuzzy_word}) else: desc_info.append({'tag': node.role, 'value': node.word}) if Sentence.is_ambiguous(intent_list, intent_code): logging.info( 'the intention is ambiguous, code: {}'.format(intent_code)) resolved = Sentence.resolving_intent(desc_info) result_dict['resolved_intent'] = resolved if resolved['code'] != '': intent_code += resolved['code'] intent_info.extend(resolved['list']) desc_temp_list = [] for d in desc_info: if d not in resolved['list']: desc_temp_list.append(d) else: logging.debug('found!') desc_code = desc_code.replace(resolved['code'], '', 1) desc_info = desc_temp_list logging.debug('updated...') result_dict['intent_code'] = intent_code result_dict['intent_info'] = intent_info result_dict['desc_code'] = desc_code result_dict['desc_info'] = desc_info all_code = '' all_info = [] for node in all_list: all_code += node.role if node.is_fuzzy_matched: all_info.append({'tag': node.role, 'value': node.fuzzy_word}) else: all_info.append({'tag': node.role, 'value': node.word}) result_dict['all_code'] = all_code result_dict['all_info'] = all_info logging.info('\tintent code is: {}'.format(intent_code)) logging.info('\tdesc code is: {}'.format(desc_code)) logging.info('\tall code is: {}'.format(all_code)) logging.info('*******************************************************') return result_dict
def find_toponyms(question): return NER.extract_place_names(question)