Python data_utils示例，db.db_utils.data_utils Python示例

示例#1

0

显示文件

文件： app.py 项目： afcarl/StructMineDataInfra

def distinctive_summarization():
    if FLAGS_DEBUG:
        print(request.args)
    targetEntType = request.args.get('targetEntityType')
    outputEntType = request.args.get('outputEntityType')
    relation = request.args.get('relation')
    targetEntSubtypes = request.args.getlist('targetEntitySubtypes')
    if FLAGS_DEBUG:
        print(targetEntType, outputEntType, relation, targetEntSubtypes)

    if FLAGS_DEBUG:
        print("[INFO] Start querying DB")
    target_type = targetEntType
    output_types = str("{"+outputEntType+"}")
    relation_type = relation
    sub_types = str(targetEntSubtypes)
    tmp_utils = data_utils({'caseolap_table': "caseolap_table"})
    res = tmp_utils.query_distinctive(target_type=target_type,
                                      output_types=output_types,
                                      relation_type=relation_type,
                                      sub_types=sub_types,
                                      num_records=8)
    if FLAGS_DEBUG:
        print("[INFO] Complete querying DB")
        print(res)

    if FLAGS_DEBUG:
        print("[INFO] Start formatting DB output result into JSON")

    json_data = []
    for i in range(len(targetEntSubtypes)):
        sub_type_name = targetEntSubtypes[i]
        sub_type_keyWords = []
        ## reverse the list to obtain a score descending order
        for entity in reversed(res[i]):
            entity_score = entity['score']
            entity_name = entity['entity']
            sub_type_keyWords.append({
                "name": entity_name,
                "number": entity_score
            })
        json_data.append({
            "name": sub_type_name,
            "keyWords": sub_type_keyWords
        })

    if FLAGS_DEBUG:
        print("[INFO] Complete formatting DB output result into JSON")

    response = app.response_class(
        # response=json.dumps(sample_data, ensure_ascii = False),
        response=json.dumps(json_data, ensure_ascii = False),
        status=200,
        mimetype='application/json'
    )
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response

示例#2

0

显示文件

文件： app.py 项目： afcarl/StructMineDataInfra

def get_relations():
    type_b = request.args.get('type_a').replace(' ', '_')
    type_a = request.args.get('type_b').replace(' ', '_')
    if 'entities_left' in request.args:
        entities_right = request.args.get('entities_left')
    else:
        entities_right = []
    if 'entities_right' in request.args:
        entities_left = request.args.get('entities_right')
    else:
        entities_left = []	
   
    tmp_utils = data_utils({'entity_table': 'entity_table', 'relation_table': 'relation_table'})
    res = tmp_utils.get_relations(type_a=type_a, type_b=type_b, entities_left=entities_left, entities_right=entities_right)
    response = app.response_class(
        response=json.dumps(res, ensure_ascii = False),
        status=200,
        mimetype='application/json'
    )
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response

示例#3

0

显示文件

文件： app.py 项目： afcarl/StructMineDataInfra

def network_exploration_prediction():
    global cached_json_and_relation
    print(request.args)
    if len(cached_json_and_relation) == 0:
        print("[ERROR] Wrong usage of predict relationship"
              "should provide previous networks and relation")
        json_data = []
    else:
        ## use a shallow list copy to avoid override the cached list
        ## json_data will be of the same format as the sample_data_2
        json_data = cached_json_and_relation[0][0][:]
        cached_relation = cached_json_and_relation[0][1]

        if FLAGS_DEBUG:
            print("json_data = ", json_data)
            print("cached_relation = ", cached_relation)
            print("[INFO] Start quering prediction DB table for relation prediction")

        ### First extract all candidate nodes from cached network
        node_a_list = []
        node_b_list = []
        existed_edges = set()

        for ele in json_data:
            if ele["group"] == "nodes":
                entity_name = ele["data"]["label"]
                if "classes" in ele.keys(): # type_b_node
                    node_b_list.append(entity_name)
                else:
                    node_a_list.append(entity_name)
            if ele["group"] == "edges":
                source_name = "".join(ele["data"]["source"].split())
                target_name = "".join(ele["data"]["target"].split())
                existed_edges.add((source_name, target_name))

        ### Second for all possible candidate pair (node_a, node_b), query DB for relation prediction
        tmp_utils = data_utils({'prediction_table': "prediction_table"})
        relation_type = cached_relation
        new_edge_cnt = 0
        # if FLAGS_DEBUG:
        #     print("[INFO] start testing each edge")
        #     print("[INFO] node_a_list = ", node_a_list)
        #     print("[INFO] node_b_list = ", node_b_list)
        for i in range(len(node_a_list)):
            for j in range(len(node_b_list)):
                name_a = node_a_list[i]
                name_b = node_b_list[j]
                if FLAGS_DEBUG:
                    print("[INFO] testing edge", (name_a, name_b))

                res = tmp_utils.query_prediction_v2(name_a=name_a, name_b=name_b, relation_type=relation_type)
                if (res['score'] >= 0.77):
                    print("Res = ", res)
                    source_label = "".join(name_a.split())
                    target_label = "".join(name_b.split())
                    ## do not add existed edges
                    if (target_label, source_label) in existed_edges:
                        continue
                    score = res['score']
                    if (len(res.keys()) == 1):
                        json_data.append({
                            "group": "edge",
                            "data": {
                                "source": target_label,
                                "target": source_label,
                                "docs": [{
                                    "title": "Confidence Score = " + str(score),
                                    "pmid": "#",
                                    "sentences": [""]
                                }]
                            },
                            "classes": "edge1"
                        })
                    else:
                        data_doc_sentences = []
                        data_doc_sentences.extend(seg_long_sent(res.get("sent",""), name_a))
                        data_doc_sentences.extend(seg_long_sent(res.get("sent",""), name_b))
                        json_data.append({
                            "group": "edge",
                            "data": {
                                # "source": source_label,
                                # "target": target_label,
                                "source": target_label,
                                "target": source_label,
                                "docs": [{
                                    ## Show the prediction confidence score as the paper title
                                    "title": "Confidence Score = " + str(score),
                                    "pmid": "#",
                                    "sentences": [""]
                                }, {
                                    "title": "Title: " + res.get("article_title",""),
                                    "pmid": res.get("pmid",""),
                                    "sentences": data_doc_sentences
                                }]
                            },
                            "classes": "edge1"
                        })
                    new_edge_cnt += 1

                # res = tmp_utils.query_prediction(name_a=name_a, name_b=name_b, relation_type=relation_type)
                # if res != 0: # one predicted relation, add a new edge
                #     source_label = "".join(name_a.split())
                #     target_label = "".join(name_b.split())
                #     score = res
                #     if (score < 0.79):
                #         continue
                #     ## do not add existed edges
                #     # if (source_label, target_label) in existed_edges:
                #     #     continue
                #     if (target_label, source_label) in existed_edges:
                #         continue
                #     json_data.append({
                #         "group": "edge",
                #         "data": {
                #             # "source": source_label,
                #             # "target": target_label,
                #             "source": target_label,
                #             "target": source_label,
                #             "docs": [{
                #                 ## Show the prediction confidence score as the paper title
                #                 "title": "Confidence Score = " + str(score),
                #                 "pmid": "#",
                #                 "sentences": [""]
                #             }]
                #         },
                #         "classes": "edge1"
                #     })
                #     new_edge_cnt += 1

        if FLAGS_DEBUG:
            print("[INFO] Complete quering prediction DB table for relation prediction")
            print("[INFO] Add %s new edges" % new_edge_cnt)


    response = app.response_class(
        # response=json.dumps(sample_data_2, ensure_ascii = False),
        response=json.dumps(json_data, ensure_ascii = False),
        status=200,
        mimetype='application/json'
    )
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response

示例#4

0

显示文件

文件： app.py 项目： afcarl/StructMineDataInfra

def network_exploration():
    '''
    arg1: corresponds to type a (node_a)
    arg2: corresponds to type b (node_b)

    :return:
    '''
    # global cached_previous_json_network, cached_relation
    global cached_json_and_relation, sample_query_id

    type_b = request.args.get('type_a').replace(' ', '_')
    type_a = request.args.get('type_b').replace(' ', '_')
    relation_type = request.args.get('relation_type')
    if 'entities_left' in request.args:
	entities_right = request.args.get('entities_left')
    else:
	entities_right = []
    if 'entities_right' in request.args:
	entities_left = request.args.get('entities_right')
    else:
	entities_left = []
    '''
    if 'num_edges' in request.args:
    	num_edges = request.args.get('num_edges')
    else:
	num_edges = 5
    if 'num_pps' in request.args:
	num_pps = request.args.get('num_pps')
    else:
	num_pps = 1
    '''
    num_edges = 35
    num_pps = 5 
    print("Parameters in http request: ", type_a, type_a, relation_type, entities_left, entities_right, num_edges, num_pps)

    '''
    Following the query format from @bran
    Example:
    python db/db_utils.py entity_table relation_table 
    "{'name':'mesh', 'type':'{Chemicals_and_Drugs}'}" 
    "{'name':'mesh', 'type':'{Anatomy}'}" 
    is_associated_anatomy_of_gene_product
    '''
    if FLAGS_DEBUG:
        print("[INFO] Start querying DB")
    tmp_utils = data_utils({'entity_table': 'entity_table', 'relation_table': 'relation_table'})
    '''
    arg1_type = check_types(arg1)
    arg2_type = check_types(arg2)
    relation_type = check_types(relation)
    if FLAGS_DEBUG:
        print("[INFO] marshal returned types = ", (arg1_type, arg2_type, relation_type))
    if (arg1_type == "none" or arg2_type == "none" or relation_type == "none"):
        ## Return a placeholder response showing invalid query
        response = app.response_class(
            response=json.dumps(invalid_query_data, ensure_ascii = False),
            status=200,
            mimetype='application/json'
        )
        response.headers.add('Access-Control-Allow-Origin', '*')
        return response
    '''
    type_a = str({'name':'mesh', 'type':("{"+type_a+"}") })
    type_b = str({'name':'mesh', 'type':("{"+type_b+"}") })

    if sample_query_id == -1:
        res = tmp_utils.query_links_v2(type_a=type_a, type_b=type_b, relation_type=relation_type, entities_left=entities_left, entities_right=entities_right,
                                num_edges=int(num_edges), num_pps=int(num_pps))
    else:
        print("[INFO] load from dump query: ", sample_query_id)
        res = marshal.load(open("./data/dumped-query/"+str(sample_query_id)+".m","rb"))
        sample_query_id = -1 # reset
        print("[INFO] reset sample query_id")
    # res = tmp_utils.query_links(type_a=type_a, type_b=type_b, relation_type=relation_type, num_edges=number_of_edges)
    # res = tmp_utils.query_links_with_walk(type_a=type_a, type_b=type_b, relation_type=relation_type,
    #                             num_edges=number_of_edges, num_pps=number_of_papers)
    if FLAGS_DEBUG:
        print("[INFO] Complete querying DB")
    if (len(res['nodes']) == 0 and len(res['edges']) == 0):
        ## SQL returns empty, return the corresponding placeholder
        response = app.response_class(
            response=json.dumps({}, ensure_ascii = False),
            status=200,
            mimetype='application/json'
        )
        response.headers.add('Access-Control-Allow-Origin', '*')
        return response

    if FLAGS_SAVE_DATA:
        with open("./db_res.txt", "w") as fout:
            fout.write(str(res))
            print("[DATA] DB output data:", res)
        with open("./db_res.txt", "r") as fin:
            raw_data = fin.read().strip()
            res = eval(raw_data)
    
    response = app.response_class(
        response=json.dumps(res, ensure_ascii = False),
        # response=json.dumps(sample_data_2, ensure_ascii = False),
        status=200,
        mimetype='application/json'
    )
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response 
    '''

示例#5

0

显示文件

    "relation": "has_therapeutic_class"
}, {
    "argument1": "Genetic_Variation",
    "argument2": "Diseases",
    "relation": "disease_may_have_cytogenetic_abnormality"
}, {
    "argument1": "Lymphoproliferative_Disorders",
    "argument2": "Anatomy",
    "relation": "is_not_abnormal_cell_of_disease"
}]
if __name__ == '__main__':
    out_dir = 'dumped-query/'
    types = marshal.load(open('all_types.m', 'rb'))
    tmp_utils = data_utils({
        'entity_table': 'entity_table',
        'relation_table': 'relation_table',
        'prediction_table': 'prediction_table'
    })
    for i, query in enumerate(QUERY_NET):
        arg1 = {'name': 'umls', 'type': '{' + query['argument1'] + '}'}
        arg2 = {'name': 'umls', 'type': '{' + query['argument2'] + '}'}
        if query['argument1'] in types['mesh']:
            arg1['name'] = 'mesh'
        if query['argument2'] in types['mesh']:
            arg2['name'] = 'mesh'

        result = tmp_utils.query_links(type_a=arg1,
                                       type_b=arg2,
                                       relation_type=query['relation'],
                                       num_edges=15,
                                       num_pps=6)