def parse_answer(self, answer_type, raw_answer): if answer_type == "Entity": return answer_type, Uri( self.kb.shorten_prefix() + raw_answer["AnswerArgument"], self.kb.parse_uri) elif answer_type == "Value": return answer_type, raw_answer["AnswerArgument"] else: return answer_type, Uri(raw_answer["EntityName"], self.kb.parse_uri)
def __extend_edge(self, edge, relation_uri): output = set() var_node = None if edge.source_node.are_all_uris_generic(): var_node = edge.source_node if edge.dest_node.are_all_uris_generic(): var_node = edge.dest_node ent1 = edge.source_node.first_uri_if_only() ent2 = edge.dest_node.first_uri_if_only() if not (var_node is None or ent1 is None or ent2 is None): result = self.kb.two_hop_graph(ent1, edge.uri, ent2, relation_uri) if result is not None: for item in result: if item[1]: if item[0] == 0: n_s = self.create_or_get_node(1, True) n_d = var_node e = Edge(n_s, relation_uri, n_d) output.add(e) elif item[0] == 1: n_s = var_node n_d = self.create_or_get_node(1, True) e = Edge(n_s, relation_uri, n_d) output.add(e) elif item[0] == 2: n_s = var_node n_d = self.create_or_get_node(1, True) e = Edge(n_s, relation_uri, n_d) output.add(e) self.suggest_retrieve_id = 1 elif item[0] == 3: n_s = self.create_or_get_node(1, True) n_d = var_node e = Edge(n_s, relation_uri, n_d) output.add(e) elif item[0] == 4: n_d = self.create_or_get_node(relation_uri) n_s = self.create_or_get_node(0, True) e = Edge(n_s, Uri(self.kb.type_uri, self.kb.parse_uri), n_d) output.add(e) n_s = self.create_or_get_node(1, True) e = Edge(n_s, Uri(self.kb.type_uri, self.kb.parse_uri), n_d) output.add(e) return output
def parse_sparql(self, raw_query): raw_query = raw_query.replace("https://", "http://") uris = URIs([ Uri(raw_uri, self.kb.parse_uri) for raw_uri in re.findall('(<[^>]*>|\?[^ ]*)', raw_query) ]) return raw_query, True, uris
def parse_sparql(self, raw_query): uris = [ Uri(raw_uri, DBpedia.parse_uri) for raw_uri in re.findall('<[^>]*>', raw_query) ] return raw_query, True, uris
def parse_answer(self, answer_type, raw_answer): if answer_type == "boolean": return answer_type, str(raw_answer) else: if not answer_type in raw_answer: answer_type = "\"{}\"".format(answer_type) return raw_answer[answer_type]["type"], Uri(raw_answer[answer_type]["value"], self.kb.parse_uri)
def parse_sparql(self, raw_query): if "sparql" in raw_query: raw_query = raw_query["sparql"] elif isinstance(raw_query, basestring) and "where" in raw_query.lower(): pass else: raw_query = "" if "PREFIX " in raw_query: # QALD-5 bug! raw_query = raw_query.replace("htp:/w.", "http://www.") raw_query = raw_query.replace("htp:/dbpedia.", "http://dbpedia.") for item in re.findall("PREFIX [^:]*: <[^>]*>", raw_query): prefix = item[7:item.find(" ", 9)] uri = item[item.find("<"):-1] raw_query = raw_query.replace(prefix, uri) idx = raw_query.find("WHERE") idx2 = raw_query[:idx - 1].rfind(">") raw_query = raw_query[idx2 + 1:] for uri in re.findall('<[^ ]*', raw_query): raw_query = raw_query.replace(uri, uri + ">") uris = [ Uri(raw_uri, self.kb.parse_uri) for raw_uri in re.findall('<[^>]*>', raw_query) ] supported = not any( substring in raw_query for substring in ["UNION", "FILTER", "OFFSET", "HAVING", "LIMIT"]) return raw_query, supported, uris
def query(): question = request.json['question'] raw_entities = request.json['entities'] raw_relations = request.json['relations'] entities = [] for item in raw_entities: uris = [Uri(uri["uri"], DBpedia.parse_uri, uri["confidence"]) for uri in item["uris"]] entities.append(LinkedItem(item["surface"], uris)) relations = [] for item in raw_relations: uris = [Uri(uri["uri"], DBpedia.parse_uri, uri["confidence"]) for uri in item["uris"]] relations.append(LinkedItem(item["surface"], uris)) question_type, type_confidence = get_question_type(question) count_query = False ask_query = False if question_type == 2: count_query = True elif question_type == 1: ask_query = True generated_queries = [] combination_mode = True if combination_mode: combination_list = create_entity_relations_combinations(entities, relations) for comb in combination_list: # why did Sylvia Liang just generates query if none exists? if len(generated_queries) == 0: generated_queries.extend(generate_query(question, question_type, comb[0], comb[1], count_query, ask_query)) else: generated_queries = generate_query(question, question_type, entities, relations, count_query, ask_query) queries = postprocess(generated_queries, count_query, ask_query) result = { "queries": queries, "type": get_question_type_text(question_type), "type_confidence": type_confidence } return jsonify(result)
def __one_hop_graph(self, entity_items, relation_items, threshold=None, number_of_entities=1): top_uri = 1 total = self.count_combinations(entity_items, relation_items, number_of_entities, top_uri) if threshold is not None: while total > threshold: top_uri -= 0.1 total = self.count_combinations(entity_items, relation_items, number_of_entities, top_uri) with tqdm(total=total, disable=self.logger.level >= 10) as pbar: for relation_item in relation_items: for relation_uri in relation_item.top_uris(top_uri): for entity_uris in itertools.product( * [items.top_uris(top_uri) for items in entity_items]): for entity_uri in itertools.combinations( entity_uris, number_of_entities): pbar.update(1) result = self.kb.one_hop_graph( entity_uri[0], relation_uri, entity_uri[1] if len(entity_uri) > 1 else None) print('result:', result) if result is not None: for item in result: m = int(item["m"]["value"]) uri = entity_uri[1] if len( entity_uri) > 1 else 0 if m == 0: n_s = self.create_or_get_node( uri, True) n_d = self.create_or_get_node( entity_uri[0]) e = Edge(n_s, relation_uri, n_d) self.add_edge(e) elif m == 1: n_s = self.create_or_get_node( entity_uri[0]) n_d = self.create_or_get_node( uri, True) e = Edge(n_s, relation_uri, n_d) self.add_edge(e) elif m == 2: n_s = self.create_or_get_node(uri) n_d = self.create_or_get_node( relation_uri) e = Edge( n_s, Uri(self.kb.type_uri, self.kb.parse_uri), n_d) self.add_edge(e)
def __parse(self, dataset, name, top): output = [] for item in dataset[name]: uris = [] for uri in item["uris"]: uris.append(Uri(uri["uri"], self.parser, uri["confidence"])) start_index, length = item["surface"] surface = dataset["question"][start_index:start_index + length] output.append(LinkedItem(surface, uris[:top])) return output
def parse_answerset(self, raw_answers): if len(raw_answers) == 0: return [] elif len(raw_answers) == 1: return self.parse_queryresult(raw_answers[0]) else: result = [] for item in raw_answers: result.append( AnswerRow(item["string"], lambda x: [Answer("uri", x, lambda t, y: ("uri", Uri(y, self.kb.parse_uri)))])) return result
def parse_sparql(self, raw_query): # remove comments from the sparql query for t in re.findall("\#[^\n]*", raw_query): raw_query = raw_query.replace(t, " ") if "WHERE {" in raw_query: raw_query = raw_query[raw_query.find("WHERE {") + 7:] if raw_query.split("\n")[2].startswith("FILTER"): raw_query = " ".join(raw_query.split("\n")[3:]) else: raw_query = raw_query.replace("\n", " ") raw_query = raw_query[:raw_query.rfind("}")] uris = [ Uri(raw_uri, Freebase.parse_uri) for raw_uri in re.findall('(ns:[^ ]*|\?[^ ]*)', raw_query) ] supported = not any(substring in raw_query.upper() for substring in ["EXISTS", "UNION", "FILTER"]) return raw_query, supported, uris
question_type_classifier = SVMClassifier( os.path.join(question_type_classifier_path, "svm.model")) o = Orchestrator(None, question_type_classifier, None, parser, True) raw_entities = [{ "surface": "", "uris": [{ "confidence": 1, "uri": "http://dbpedia.org/resource/Bill_Finger" }] }] entities = [] for item in raw_entities: uris = [ Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"] ] entities.append(LinkedItem(item["surface"], uris)) raw_relations = [{ "surface": "", "uris": [{ "confidence": 1, "uri": "http://dbpedia.org/ontology/creator" }] }, { "surface": "", "uris": [{
def generate_query(): if not flask.request.json: flask.abort(400) question = flask.request.json['question'] force_count_query = flask.request.json[ 'force_count'] if 'force_count' in flask.request.json else False force_bool_query = flask.request.json[ 'force_bool'] if 'force_bool' in flask.request.json else False force_list_query = flask.request.json[ 'force_list'] if 'force_list' in flask.request.json else False raw_entities = flask.request.json['entities'] raw_relations = flask.request.json['relations'] h1_threshold = int(flask.request.json['h1_threshold'] ) if 'h1_threshold' in flask.request.json else 9999999 timeout_threshold = int(flask.request.json['timeout'] ) if 'timeout' in flask.request.json else 9999999 use_cache = bool(flask.request.json['use_cache'] ) if 'use_cache' in flask.request.json else True hash_key = hash( (str(question) + str(raw_entities) + str(raw_relations) + str(h1_threshold) + str(force_count_query) + str(force_bool_query) + str(force_list_query)).encode('utf-8')) if use_cache and hash_key in hash_list: return flask.jsonify(hash_list[hash_key]), 201 logger.info(question) entities = [] for item in raw_entities: uris = [ Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"] ] entities.append(LinkedItem(item["surface"], uris)) relations = [] for item in raw_relations: uris = [ Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"] ] relations.append(LinkedItem(item["surface"], uris)) try: if len(entities) == 0 or len(relations) == 0: raise Exception('Wrong number of input entity/relation!') with timeout(timeout_threshold): question_type = None if force_list_query: question_type = 0 elif force_bool_query: question_type = 1 elif force_count_query: question_type = 2 queries, question_type, type_confidence = queryBuilder.generate_query( question, entities, relations, h1_threshold, question_type) question_type_str = "list" ask_query = False count_query = False if question_type == 2: question_type_str = "count" count_query = True elif question_type == 1: question_type_str = "boolean" ask_query = True queries = [{ "query": kb.sparql_query(item["where"], "?u_" + str(item["suggested_id"]), count_query, ask_query), "confidence": item["confidence"] } for item in queries] result = { 'queries': queries, 'type': question_type_str, 'type_confidence': type_confidence } if use_cache: hash_list[hash_key] = result hash_list.save(hash_file) return flask.jsonify(result), 201 except RuntimeError as expt: logger.error(expt) return flask.jsonify({'error': str(expt)}), 408 except Exception as expt: logger.error(expt) return flask.jsonify({'error': str(expt)}), 422
def __get_generic_uri(self, uri, edges): return Uri.generic_uri(uri)
def __parse_answer(self, answer_type, raw_answer): prefix = self.kb.prefix() if len(prefix) > 0 and raw_answer.startswith(prefix): raw_answer = self.kb.shorten_prefix() + raw_answer[len(prefix):] return answer_type, Uri(raw_answer, self.kb.parse_uri)
def generate_query(): if not flask.request.json: flask.abort(400) question = flask.request.json['question'] raw_entities = flask.request.json['entities'] raw_relations = flask.request.json['relations'] h1_threshold = int(flask.request.json['h1_threshold'] ) if 'h1_threshold' in flask.request.json else 9999999 timeout_threshold = int(flask.request.json['timeout'] ) if 'timeout' in flask.request.json else 9999999 use_cache = bool(flask.request.json['use_cache'] ) if 'use_cache' in flask.request.json else True hash_key = hash( str(question) + str(raw_entities) + str(raw_relations) + str(h1_threshold)) if use_cache and hash_key in hash_list: return flask.jsonify(hash_list[hash_key]), 201 logger.info(question) entities = [] for item in raw_entities: uris = [ Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"] ] entities.append(LinkedItem(item["surface"], uris)) relations = [] for item in raw_relations: uris = [ Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"] ] relations.append(LinkedItem(item["surface"], uris)) try: with timeout(timeout_threshold): queries, question_type = queryBuilder.generate_query( question, entities, relations, h1_threshold) question_type_str = "list" ask_query = False count_query = False if question_type == 2: question_type_str = "count" count_query = True elif question_type == 1: question_type_str = "boolean" ask_query = True queries = [{ "query": kb.sparql_query(item["where"], "?u_" + str(item["suggested_id"]), count_query, ask_query), "confidence": item["confidence"] } for item in queries] result = {'queries': queries, 'type': question_type_str} if use_cache: hash_list[hash_key] = result hash_list.save(hash_file) return flask.jsonify(result), 201 except: return flask.jsonify({}), 408
args.data = os.path.join(base_path, "data/lc_quad/") args.cuda = False parser = LC_QaudParser() kb = parser.kb base_dir = "./output" question_type_classifier_path = os.path.join(base_dir, "question_type_classifier") utility.makedirs(question_type_classifier_path) question_type_classifier = SVMClassifier(os.path.join(question_type_classifier_path, "svm.model")) o = Orchestrator(None, question_type_classifier, None, parser, True) raw_entities = [{"surface": "", "uris": [{"confidence": 1, "uri": "http://dbpedia.org/resource/Bill_Finger"}]}] entities = [] for item in raw_entities: uris = [Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"]] entities.append(LinkedItem(item["surface"], uris)) raw_relations = [{"surface": "", "uris": [{"confidence": 1, "uri": "http://dbpedia.org/ontology/creator"}]}, {"surface": "", "uris": [{"confidence": 1, "uri": "http://dbpedia.org/ontology/ComicsCharacter"}]}] relations = [] for item in raw_relations: uris = [Uri(uri["uri"], kb.parse_uri, uri["confidence"]) for uri in item["uris"]] relations.append(LinkedItem(item["surface"], uris)) question = "Which comic characters are painted by Bill Finger?" generated_queries = o.generate_query(question, entities, relations)[0] # print generated_queries # generated_queries = [ # {'where': [u'?u_0 <http://dbpedia.org/ontology/creator> <http://dbpedia.org/resource/Bill_Finger>',
def parse_sparql(self, raw_query): uris = [ Uri(raw_uri, self.kb.parse_uri) for raw_uri in re.findall('(<[^>]*>|\?[^ ]*)', raw_query) ] return raw_query, True, uris