Пример #1
0
def SPFracAUC(network, nonexist_edges, iterations, steps):
    '''
    Function takes a network, list of non-existent edges, the number of iterations, and the percent of edges to sample
    (steps) and runs the Shortest Path scoring function over each sampled network for the specified number of
    iterations.
    :param network: undirected graph
    :param nonexist_edges: list of non-existent edges from the graph
    :param iterations: integer representing the number of iterations to run
    :param steps: list of percent of edges to sample
    :return:
    '''

    auc = []; prec = []

    for j in xrange(iterations):
        iteration_data = GraphMaker(network, steps)
        training_graph = iteration_data[0]
        testing_edges = iteration_data[1]
        missing_scores = LinkPrediction.ShortestPath(training_graph, testing_edges)
        nonexist_scores = LinkPrediction.ShortestPath(training_graph, nonexist_edges)

        #get AUC
        auc_round = EvaluationMetrics.AUC(nonexist_scores, missing_scores)
        auc.append(auc_round)

        #precision - getting top or bottom K links depends on whether or not AUC is >/< 0.5
        prec_round = EvaluationMetrics.KPrecision(auc_round, dict(missing_scores, **nonexist_scores), testing_edges)
        prec.append(prec_round)

    return auc, prec
Пример #2
0
def main():
    ##### Testing MESH class
    mv = MESH('MESH Terms.txt', '')
    #mv.create_MESH_vocab_and_IDmapping(); mv.save_MESH_IDmapping('MESH ID Mapping.txt')
    mesh_id_mapping = mv.read_MESH_IDmapping('MESH ID Mapping.txt')

    ##### Testing Corpus class
    #folder = "corpus/"
    #num = 100
    #corp = Corpus(folder,n=num)

    ##### Vectorizing
    #tf_vectorizer, tf_matrix = corp.vectorize_corpus(corp.clean(),voc)
    #tv = q.transform_query(tf_vectorizer).flatten()

    ##### Getting data
    tf = TextFile('Medical Case Reports.txt', '', encode='latin-1')
    #tf = TextFile('pubmed_result.txt','',encode='latin-1')
    mesh_terms = tf.get_MESH_terms()
    ui = tf.get_UI(mesh_id_mapping)
    keywords = tf.get_keywords()
    titles = tf.get_titles()

    ##### Assigning ICD10 codes
    asg = Assigner('MESH_ICD10_Mapping.csv', mesh_id_mapping)
    mesh_codes = asg.assign_MESHterms_ICD10(ui)
    #print("Printing direct mapped codes", mesh_codes)
    #keywords_codes = asg.assign_keywords_ICD10(keywords)
    #print("Printing codes using keywords", keywords_codes)
    #titles_codes = asg.assign_titles_ICD10(titles)
    #print("Printing codes using titles", titles_codes)
    #partial_codes = asg.assign_context_aware_codes(stopword_percent_include=0.92)
    #print("Printing context-aware codes", partial_codes)
    #total_codes = asg.assign_all_ICD10(ui,keywords,titles,stopword_percent_include=0.92)
    #print("Printing all codes", total_codes)
    #asg.write_codes_to_csv(tot,'all_codes.csv')  # Case Reports_ICD10_Mapping.csv

    ##### Link Prediction
    G = nx.Graph()
    d = mesh_codes
    G = lp.create_weighted_bipartite_graph(G, d)

    ratings = []
    for i in nx.connected_components(G):
        G_new = nx.Graph()
        print(i)
        for j in i:
            if j.isdigit():
                for v in d[j]:
                    G_new.add_node(v[0], bipartite='code')
                    G_new.add_edge(j, v[0], weight=v[1])

        df = lp.item_based_CF(G_new)
        ratings.append(df)

    return
Пример #3
0
def SFracAUC(network, nonexist_edges, iterations, steps):
    '''
    Function takes a network, list of non-existent edges, the number of iterations, and the percent of edges to sample
    (steps) and runs the SimRank scoring function over each sampled network for the specified number of
    iterations.
    :param network: undirected graph
    :param nonexist_edges: list of non-existent edges from the graph
    :param iterations: integer representing the number of iterations to run
    :param steps: list of percent of edges to sample
    :return:
    '''

    auc = []; prec = []

    for j in xrange(iterations):
        iteration_data = GraphMaker(network, steps)
        training_graph = iteration_data[0]
        testing_edges = iteration_data[1]
        scores = LinkPrediction.SimRank(training_graph, c=0.8, num_iterations= 10)

        # get AUC
        count = 0.0
        for i in xrange(1000):
            TN = random.sample(nonexist_edges, 1)[0]
            TP = random.sample(testing_edges, 1)[0]

            if (TN[0], TN[1]) in scores.keys():
                TN_val = scores[(TN[0], TN[1])]
            else:
                TN_val = 0.0

            if (TP[0], TP[1]) in scores.keys():
                TP_val = scores[(TP[0], TP[1])]
            else:
                TP_val = 0.0

            if TP_val > TN_val:
                count += 1.0
            if TP_val == TN_val:
                count += 0.5

        auc_round = count/1000
        auc.append(auc_round)

        # precision - getting top or bottom K links depends on whether or not AUC is >/< 0.5
        prec_round = EvaluationMetrics.KPrecision(auc_round, scores, testing_edges)
        prec.append(prec_round)

    return auc, prec
Пример #4
0
    '''
    #####*********Statistic attributes of graphs*************
    print "*Statistic attributes of graphs:"
    print "N", nx.number_of_nodes(G)
    print "M", nx.number_of_edges(G)
    print "C", nx.average_clustering(G)
    print "Cw", nx.average_clustering(G, weight='weight')
    print "<d>", nx.average_shortest_path_length(G)
    print "r", nx.degree_assortativity_coefficient(G)
    #print nx.density(G)
    #print nx.transitivity(G)
    degree_list = list(G.degree_iter())
    #print degree_list
    avg_degree_1 = 0.0
    avg_degree_2 = 0.0
    for node in degree_list:
        avg_degree_1 = avg_degree_1 + node[1]
        avg_degree_2 = avg_degree_2 + node[1]*node[1]
    avg_degree = avg_degree_1/len(degree_list)
    avg_degree_square = (avg_degree_2/len(degree_list)) / (avg_degree*avg_degree)
    print "<k>", avg_degree
    print "H", avg_degree_square
    '''

    #print "============Drift_Prediction_Experiment==============================="
    #G = LinkPrediction.Position_Drift.Graph_Spatial_Temporal_Dynamics(G, 3)  #Spatial_Temporal influence based node position drift.
    #Prediction_Experiment(G, Predictor, Probe_Set, Top_L, 3.0) #Top_K, Deleted_Ratio

    LinkPrediction.Drift_Prediction_Experiment(G, 'WCN', Probe_Set, Top_L,
                                               G.number_of_edges())
Пример #5
0
def main():

    #read in graphs
    owl_graph = nx.read_gml(
        'Network_Data/Trametinib_query_OWL_network.gml').to_undirected()
    nets_graph = nx.read_gml(
        'Network_Data/Trametinib_query_NETS_network.gml').to_undirected()
    mid_graph = nx.read_gml(
        'Network_Data/Trametinib_query_PART_network').to_undirected()

    #run link predictions for each graph
    nets_scores = LinkPrediction.katz(nets_graph,
                                      beta=0.001,
                                      max_power=5,
                                      weight=None,
                                      dtype=None)
    nets_nonexist = list(nx.non_edges(nets_graph))
    nets_preds = EdgeChecker(nets_scores, nets_nonexist)

    owl_nonexist = list(nx.non_edges(owl_graph))
    owl_scores = LinkPrediction.RPR(owl_graph, alpha=0.15, beta=0)
    owl_preds = EdgeChecker(owl_scores, owl_nonexist)

    #explore predictions
    len(nets_preds)  #1652
    np.min(nets_preds.values())
    np.mean(nets_preds.values())
    np.max(nets_preds.values())

    #print top 20 edges
    sorted(Counter(sorted(nets_preds.values())).items(),
           key=lambda i: i[0])  #get distribution of counts
    sorted_scores = sorted(owl_preds.items(),
                           key=operator.itemgetter(1),
                           reverse=True)  #biggest first
    sorted_scores = sorted(nets_preds.items(),
                           key=operator.itemgetter(1),
                           reverse=False)  #smallest first

    #investigate the top n items
    edges = sorted_scores[0:20]

    ## Write results for use with with ranking methods

    # graphs
    graph = nx.read_gml(
        'Network_Data/Trametinib_query_OWL_network.gml').to_undirected()

    # graph = nx.read_gml('Network_Data/Trametinib_query_NETS_network.gml').to_undirected()

    graph = nx.read_gml(
        'Network_Data/DDI_reactome_query_NETS_network.gml').to_undirected()

    methods = [
        LinkPrediction.DegreeProduct(graph, list(nx.non_edges(graph))),
        LinkPrediction.ShortestPath(graph, list(nx.non_edges(graph))),
        LinkPrediction.CommonNeighbors(graph, list(nx.non_edges(graph))),
        LinkPrediction.AdamicAdvar(graph, list(nx.non_edges(graph))),
        LinkPrediction.Jaccard(graph, list(nx.non_edges(graph))),
        LinkPrediction.LHN(graph, list(nx.non_edges(graph))),
        LinkPrediction.ResourceAllocation(graph, list(nx.non_edges(graph))),
        LinkPrediction.Sorensen(graph, list(nx.non_edges(graph))),
        LinkPrediction.katz(graph,
                            beta=0.001,
                            max_power=5,
                            weight=None,
                            dtype=None),
        LinkPrediction.RPR(graph, alpha=0.15, beta=0)
    ]

    # method counter for labeling csv files
    count = 0

    for method in methods:
        updated_res = EdgeChecker(method, list(nx.non_edges(graph)))

        with open('Results/DDI_reactome/NETS_DDI ' + str(count) + '.csv',
                  'wb') as csvfile:
            writer = csv.writer(csvfile,
                                delimiter=',',
                                quoting=csv.QUOTE_MINIMAL)
            for key, values in updated_res.items():
                writer.writerow([key, values])

        count += 1
Пример #6
0
            allones.append(res)
            pred_function = graph.train(lambda features: np.random.rand(features.shape[0])>.5)
            res = graph.test_and_evaluate(pred_function, Xa[test_set, 15:17], gold)
            randompred.append(res)

            pred_function = graph.train(perceptron, Xa[train_set, 15:17], ya[train_set])
            res = graph.test_and_evaluate(pred_function, Xa[test_set, 15:17], gold, pp)
            ppton.append(res)

            # asym.append([.8, .9, .5, .3, 2, res[-1]])
            # chiang.append([.8, .9, .5, .3, 2, res[-1]])
            # continue
            esigns = {(u, v): graph.E.get((u,v)) if (u,v) in graph.E else graph.E.get((v,u))
                      for u, adj in graph.Gfull.items() for v in adj}
            mapping={i: i for i in range(graph.order)}
            sstart = lp.clock()
            sadj, test_edges = sp.get_training_matrix(666, mapping, slcc=set(range(graph.order)),
                                                      tree_edges=graph.Esign.keys(), G=graph.Gfull,
                                                      EDGE_SIGN=esigns)
            ngold, pred = sp.predict_edges(sadj, 15, mapping, test_edges,
                                          graph.Gfull, esigns, bk=9000)
            time_elapsed = lp.clock() - sstart
            C = lp.confusion_matrix(ngold, pred)
            fp, tn = C[0, 1], C[0, 0]
            acc, fpr, f1, mcc = [lp.accuracy_score(ngold, pred),  fp/(fp+tn),
                                 lp.f1_score(ngold, pred, average='weighted', pos_label=None),
                                 lp.matthews_corrcoef(ngold, pred)]
            frac = 1 - len(test_edges)/len(graph.E)
            asym.append([acc, f1, mcc, fpr, time_elapsed, frac])

            ngold, pred, time_elapsed, frac = getWH.run_chiang(graph)