def distinctive_summarization(): if FLAGS_DEBUG: print(request.args) targetEntType = request.args.get('targetEntityType') outputEntType = request.args.get('outputEntityType') relation = request.args.get('relation') targetEntSubtypes = request.args.getlist('targetEntitySubtypes') if FLAGS_DEBUG: print(targetEntType, outputEntType, relation, targetEntSubtypes) if FLAGS_DEBUG: print("[INFO] Start querying DB") target_type = targetEntType output_types = str("{"+outputEntType+"}") relation_type = relation sub_types = str(targetEntSubtypes) tmp_utils = data_utils({'caseolap_table': "caseolap_table"}) res = tmp_utils.query_distinctive(target_type=target_type, output_types=output_types, relation_type=relation_type, sub_types=sub_types, num_records=8) if FLAGS_DEBUG: print("[INFO] Complete querying DB") print(res) if FLAGS_DEBUG: print("[INFO] Start formatting DB output result into JSON") json_data = [] for i in range(len(targetEntSubtypes)): sub_type_name = targetEntSubtypes[i] sub_type_keyWords = [] ## reverse the list to obtain a score descending order for entity in reversed(res[i]): entity_score = entity['score'] entity_name = entity['entity'] sub_type_keyWords.append({ "name": entity_name, "number": entity_score }) json_data.append({ "name": sub_type_name, "keyWords": sub_type_keyWords }) if FLAGS_DEBUG: print("[INFO] Complete formatting DB output result into JSON") response = app.response_class( # response=json.dumps(sample_data, ensure_ascii = False), response=json.dumps(json_data, ensure_ascii = False), status=200, mimetype='application/json' ) response.headers.add('Access-Control-Allow-Origin', '*') return response
def get_relations(): type_b = request.args.get('type_a').replace(' ', '_') type_a = request.args.get('type_b').replace(' ', '_') if 'entities_left' in request.args: entities_right = request.args.get('entities_left') else: entities_right = [] if 'entities_right' in request.args: entities_left = request.args.get('entities_right') else: entities_left = [] tmp_utils = data_utils({'entity_table': 'entity_table', 'relation_table': 'relation_table'}) res = tmp_utils.get_relations(type_a=type_a, type_b=type_b, entities_left=entities_left, entities_right=entities_right) response = app.response_class( response=json.dumps(res, ensure_ascii = False), status=200, mimetype='application/json' ) response.headers.add('Access-Control-Allow-Origin', '*') return response
def network_exploration_prediction(): global cached_json_and_relation print(request.args) if len(cached_json_and_relation) == 0: print("[ERROR] Wrong usage of predict relationship" "should provide previous networks and relation") json_data = [] else: ## use a shallow list copy to avoid override the cached list ## json_data will be of the same format as the sample_data_2 json_data = cached_json_and_relation[0][0][:] cached_relation = cached_json_and_relation[0][1] if FLAGS_DEBUG: print("json_data = ", json_data) print("cached_relation = ", cached_relation) print("[INFO] Start quering prediction DB table for relation prediction") ### First extract all candidate nodes from cached network node_a_list = [] node_b_list = [] existed_edges = set() for ele in json_data: if ele["group"] == "nodes": entity_name = ele["data"]["label"] if "classes" in ele.keys(): # type_b_node node_b_list.append(entity_name) else: node_a_list.append(entity_name) if ele["group"] == "edges": source_name = "".join(ele["data"]["source"].split()) target_name = "".join(ele["data"]["target"].split()) existed_edges.add((source_name, target_name)) ### Second for all possible candidate pair (node_a, node_b), query DB for relation prediction tmp_utils = data_utils({'prediction_table': "prediction_table"}) relation_type = cached_relation new_edge_cnt = 0 # if FLAGS_DEBUG: # print("[INFO] start testing each edge") # print("[INFO] node_a_list = ", node_a_list) # print("[INFO] node_b_list = ", node_b_list) for i in range(len(node_a_list)): for j in range(len(node_b_list)): name_a = node_a_list[i] name_b = node_b_list[j] if FLAGS_DEBUG: print("[INFO] testing edge", (name_a, name_b)) res = tmp_utils.query_prediction_v2(name_a=name_a, name_b=name_b, relation_type=relation_type) if (res['score'] >= 0.77): print("Res = ", res) source_label = "".join(name_a.split()) target_label = "".join(name_b.split()) ## do not add existed edges if (target_label, source_label) in existed_edges: continue score = res['score'] if (len(res.keys()) == 1): json_data.append({ "group": "edge", "data": { "source": target_label, "target": source_label, "docs": [{ "title": "Confidence Score = " + str(score), "pmid": "#", "sentences": [""] }] }, "classes": "edge1" }) else: data_doc_sentences = [] data_doc_sentences.extend(seg_long_sent(res.get("sent",""), name_a)) data_doc_sentences.extend(seg_long_sent(res.get("sent",""), name_b)) json_data.append({ "group": "edge", "data": { # "source": source_label, # "target": target_label, "source": target_label, "target": source_label, "docs": [{ ## Show the prediction confidence score as the paper title "title": "Confidence Score = " + str(score), "pmid": "#", "sentences": [""] }, { "title": "Title: " + res.get("article_title",""), "pmid": res.get("pmid",""), "sentences": data_doc_sentences }] }, "classes": "edge1" }) new_edge_cnt += 1 # res = tmp_utils.query_prediction(name_a=name_a, name_b=name_b, relation_type=relation_type) # if res != 0: # one predicted relation, add a new edge # source_label = "".join(name_a.split()) # target_label = "".join(name_b.split()) # score = res # if (score < 0.79): # continue # ## do not add existed edges # # if (source_label, target_label) in existed_edges: # # continue # if (target_label, source_label) in existed_edges: # continue # json_data.append({ # "group": "edge", # "data": { # # "source": source_label, # # "target": target_label, # "source": target_label, # "target": source_label, # "docs": [{ # ## Show the prediction confidence score as the paper title # "title": "Confidence Score = " + str(score), # "pmid": "#", # "sentences": [""] # }] # }, # "classes": "edge1" # }) # new_edge_cnt += 1 if FLAGS_DEBUG: print("[INFO] Complete quering prediction DB table for relation prediction") print("[INFO] Add %s new edges" % new_edge_cnt) response = app.response_class( # response=json.dumps(sample_data_2, ensure_ascii = False), response=json.dumps(json_data, ensure_ascii = False), status=200, mimetype='application/json' ) response.headers.add('Access-Control-Allow-Origin', '*') return response
def network_exploration(): ''' arg1: corresponds to type a (node_a) arg2: corresponds to type b (node_b) :return: ''' # global cached_previous_json_network, cached_relation global cached_json_and_relation, sample_query_id type_b = request.args.get('type_a').replace(' ', '_') type_a = request.args.get('type_b').replace(' ', '_') relation_type = request.args.get('relation_type') if 'entities_left' in request.args: entities_right = request.args.get('entities_left') else: entities_right = [] if 'entities_right' in request.args: entities_left = request.args.get('entities_right') else: entities_left = [] ''' if 'num_edges' in request.args: num_edges = request.args.get('num_edges') else: num_edges = 5 if 'num_pps' in request.args: num_pps = request.args.get('num_pps') else: num_pps = 1 ''' num_edges = 35 num_pps = 5 print("Parameters in http request: ", type_a, type_a, relation_type, entities_left, entities_right, num_edges, num_pps) ''' Following the query format from @bran Example: python db/db_utils.py entity_table relation_table "{'name':'mesh', 'type':'{Chemicals_and_Drugs}'}" "{'name':'mesh', 'type':'{Anatomy}'}" is_associated_anatomy_of_gene_product ''' if FLAGS_DEBUG: print("[INFO] Start querying DB") tmp_utils = data_utils({'entity_table': 'entity_table', 'relation_table': 'relation_table'}) ''' arg1_type = check_types(arg1) arg2_type = check_types(arg2) relation_type = check_types(relation) if FLAGS_DEBUG: print("[INFO] marshal returned types = ", (arg1_type, arg2_type, relation_type)) if (arg1_type == "none" or arg2_type == "none" or relation_type == "none"): ## Return a placeholder response showing invalid query response = app.response_class( response=json.dumps(invalid_query_data, ensure_ascii = False), status=200, mimetype='application/json' ) response.headers.add('Access-Control-Allow-Origin', '*') return response ''' type_a = str({'name':'mesh', 'type':("{"+type_a+"}") }) type_b = str({'name':'mesh', 'type':("{"+type_b+"}") }) if sample_query_id == -1: res = tmp_utils.query_links_v2(type_a=type_a, type_b=type_b, relation_type=relation_type, entities_left=entities_left, entities_right=entities_right, num_edges=int(num_edges), num_pps=int(num_pps)) else: print("[INFO] load from dump query: ", sample_query_id) res = marshal.load(open("./data/dumped-query/"+str(sample_query_id)+".m","rb")) sample_query_id = -1 # reset print("[INFO] reset sample query_id") # res = tmp_utils.query_links(type_a=type_a, type_b=type_b, relation_type=relation_type, num_edges=number_of_edges) # res = tmp_utils.query_links_with_walk(type_a=type_a, type_b=type_b, relation_type=relation_type, # num_edges=number_of_edges, num_pps=number_of_papers) if FLAGS_DEBUG: print("[INFO] Complete querying DB") if (len(res['nodes']) == 0 and len(res['edges']) == 0): ## SQL returns empty, return the corresponding placeholder response = app.response_class( response=json.dumps({}, ensure_ascii = False), status=200, mimetype='application/json' ) response.headers.add('Access-Control-Allow-Origin', '*') return response if FLAGS_SAVE_DATA: with open("./db_res.txt", "w") as fout: fout.write(str(res)) print("[DATA] DB output data:", res) with open("./db_res.txt", "r") as fin: raw_data = fin.read().strip() res = eval(raw_data) response = app.response_class( response=json.dumps(res, ensure_ascii = False), # response=json.dumps(sample_data_2, ensure_ascii = False), status=200, mimetype='application/json' ) response.headers.add('Access-Control-Allow-Origin', '*') return response '''
"relation": "has_therapeutic_class" }, { "argument1": "Genetic_Variation", "argument2": "Diseases", "relation": "disease_may_have_cytogenetic_abnormality" }, { "argument1": "Lymphoproliferative_Disorders", "argument2": "Anatomy", "relation": "is_not_abnormal_cell_of_disease" }] if __name__ == '__main__': out_dir = 'dumped-query/' types = marshal.load(open('all_types.m', 'rb')) tmp_utils = data_utils({ 'entity_table': 'entity_table', 'relation_table': 'relation_table', 'prediction_table': 'prediction_table' }) for i, query in enumerate(QUERY_NET): arg1 = {'name': 'umls', 'type': '{' + query['argument1'] + '}'} arg2 = {'name': 'umls', 'type': '{' + query['argument2'] + '}'} if query['argument1'] in types['mesh']: arg1['name'] = 'mesh' if query['argument2'] in types['mesh']: arg2['name'] = 'mesh' result = tmp_utils.query_links(type_a=arg1, type_b=arg2, relation_type=query['relation'], num_edges=15, num_pps=6)