def two_intentions_right_subgraph_entity_3_generation( current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]: generated_template = deepcopy(question_template) sparql_query = generated_template['sparql_wikidata'] old_entities_ids = questions_generator.get_elements_from_query( sparql_query, [0, 1]) answers_iter = iter( questions_generator.get_sparql_query_results(sparql_query)['results'] ['bindings'][0].values()) old_answer_id_1 = next(answers_iter)['value'].split("/")[-1] old_answer_id_2 = next(answers_iter)['value'].split("/")[-1] found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [ "select distinct ?ans ?ansLabel where {?ans ?rel wd:" + old_answer_id_1 + " . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \ "FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20", "select distinct ?ans ?ansLabel where {?ans ?rel wd:" + old_answer_id_2 + \ " . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20"]) if not found: # There aren't valid candidates, so try with a random entity of the same type or class return two_intentions_right_subgraph_entity_2_generation( current_uid, question_template, generated_questions, old_entities_ids, old_entities) else: two_intentions_right_subgraph_nnqt_question_construction( generated_template) return generated_template
def unknown_entity_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]: generated_template = deepcopy(question_template) sparql_query = generated_template['sparql_wikidata'] old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0]) # In this case the procedure is different from normal: it takes the answer query, modifies it to obtain all entities of the same type or class of the # original answer that return results, and then uses this list to exclude these entities from possible candidates general_sparql_query = sparql_query.replace("wd:" + old_entities_ids[0], "?ans") # Get the substring between "{" and "}" general_sparql_query = re.findall("{(.+)}", general_sparql_query)[0] type_common_string = "?ans wdt:|rel_entity_type| wd:|entity_type|" """query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" " + \ "&& NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 20" """ # This version of the query is more complete and adapted for "entity_3" logic, but is also slow and unsafe, since sometimes raises a server error old_answer = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1] # Find answer filter and type old_answer_filter, _ = questions_generator.get_filter_from_element(old_answer, "obj", "s", False) query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans ?rel ?obj . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" && " + \ old_answer_filter + "NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 5" found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [query]) if not found: # There aren't valid candidates, so try with a random entity of the same type or class return unknown_entity_2_generation(current_uid, question_template, generated_questions, old_entities_ids, old_entities) else: unknown_nnqt_question_construction(generated_template) return generated_template
def statement_property_entity_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]: generated_template = deepcopy(question_template) sparql_query = generated_template['sparql_wikidata'] # Check if there is a filter: if not so the last element is an entity query_contains_filter = 'filter' in sparql_query.lower() if query_contains_filter: old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0]) else: # If there isn't a filter then the last element is an entity old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0, 1]) # In this case the procedure is different from normal: it takes the answer query, modifies it to obtain all entities of the same type or class of the # original answer that return results, and then uses this list to exclude these entities from possible candidates general_sparql_query = sparql_query.replace("wd:" + old_entities_ids[0], "?ans") # Get the substring between "{" and "}" general_sparql_query = re.findall("{(.+)}", general_sparql_query)[0] type_common_string = "?ans wdt:|rel_entity_type| wd:|entity_type|" """first_query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" " + \ "&& NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 20" """ # This version of the query is more complete and adapted for "entity_3" logic, but is also slow and unsafe, since sometimes raises a server error old_answer = next(iter(questions_generator.get_sparql_query_results(sparql_query)['results']['bindings'][0].values()))['value'].split("/")[-1] # Find answer filter and type old_answer_filter, _ = questions_generator.get_filter_from_element(old_answer, "obj", "s", False) first_query = "SELECT distinct ?ans ?ansLabel WHERE {" + type_common_string + " . ?ans ?rel ?obj . ?ans rdfs:label ?ansLabel . FILTER(LANG(?ansLabel) = \"en\" && " + \ old_answer_filter + "NOT EXISTS {" + type_common_string + " . " + general_sparql_query + "})} LIMIT 5" if query_contains_filter: found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [first_query]) else: # In this case there is an additional normal query with two triples that link the first known entity to the second one found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [first_query, "select ?ans ?ansLabel where {wd:" + old_entities_ids[0] + " ?rel ?s . ?s ?rel2 ?ans . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \ "FILTER (LANG(?ansLabel) = \"en\" && REGEX(STR(?s), \"Q(\\\\d+)-\") && ?ans not in (wd:|old_entity_id|))} LIMIT 20"]) if not found: # There aren't valid candidates, so try with a random entity of the same type or class return statement_property_entity_2_generation(current_uid, question_template, generated_questions, old_entities_ids, old_entities) else: statement_property_nnqt_question_construction(generated_template) return generated_template
def right_subgraph_2_entity_3_generation(current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]: generated_template = deepcopy(question_template) sparql_query = generated_template['sparql_wikidata'] old_entities_ids = questions_generator.get_elements_from_query(sparql_query, [0]) answer_var_name = re.findall(r'SELECT (\?\w*) WHERE', sparql_query, re.IGNORECASE)[0] entity_var_name = re.findall(r'. (\?\w*) wdt:', sparql_query)[0] modified_sparql_query = sparql_query.replace(answer_var_name, entity_var_name, 1) modified_answer_entity = questions_generator.get_sparql_query_results(modified_sparql_query)['results']['bindings'][0][entity_var_name[1:]]['value'].split("/")[-1] modified_answer_filter, _ = questions_generator.get_filter_from_element(modified_answer_entity, "obj", "", False) if modified_answer_filter: query = "select ?ans ?ansLabel where {?ans ?rel ?obj . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \ "FILTER (" + modified_answer_filter + "LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20" else: query = "select ?ans ?ansLabel where {?ans ?rel wd:" + modified_answer_entity + " . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \ "FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20" found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [query]) if not found: # There aren't valid candidates, so try with a random entity of the same type or class return right_subgraph_2_entity_2_generation(current_uid, question_template, generated_questions, old_entities_ids, old_entities) else: right_subgraph_2_nnqt_question_construction(generated_template) return generated_template
def string_matching_simple_contains_word_entity_3_generation( current_uid: int, question_template: Dict[str, Any], generated_questions: List[Dict[str, Any]]) -> Dict[str, Any]: generated_template = deepcopy(question_template) sparql_query = generated_template['sparql_wikidata'] old_entities_ids = questions_generator.get_elements_from_query( sparql_query, [0]) old_answer_id = next( iter( questions_generator.get_sparql_query_results(sparql_query) ['results']['bindings'][0].values()))['value'].split("/")[-1] found, old_entities = questions_generator.entity_3_generation_common_part(current_uid, generated_template, generated_questions, old_entities_ids, [ "select ?ans ?ansLabel where {wd:" + old_answer_id + " ?rel ?ans . ?ans wdt:|rel_entity_type| wd:|entity_type| . ?ans rdfs:label ?ansLabel . " + \ "FILTER (LANG(?ansLabel) = \"en\" && ?ans not in (wd:|old_entity_id|))} LIMIT 20"]) if not found: # There aren't valid candidates, so try with a random entity of the same type or class return string_matching_simple_contains_word_entity_2_generation( current_uid, question_template, generated_questions, old_entities_ids, old_entities) else: string_matching_simple_contains_word_nnqt_question_construction( generated_template) return generated_template