Exemplo n.º 1
0
def create_target_similarity_network_normalised(target_seq, name):
    import math
    target_names = list(target_seq.keys())
    seq_info = []
    total = len(target_names)
    for i in tqdm(range(len(target_names))):
        seq1 = target_seq[target_names[i]]
        #print(seq1)
        #raise Exception('stop')
        for j in range(i + 1, len(target_names)):
            #print(i,'  ',j)
            seq2 = target_seqs[target_names[j]]
            try:
                alignment, score, start_end_positions = skbio.alignment.local_pairwise_align_ssw(
                    Protein(seq1), Protein(seq2), substitution_matrix=blosum50)
            except:
                score = 0
            #alignment, score, start_end_positions = skbio.alignment.local_pairwise_align_protein(Protein(p1_s), Protein(p2_s))
            new_score = float(score) / (math.sqrt(len(seq1)) *
                                        math.sqrt(len(seq2)))
            seq_info.append([target_names[i], target_names[j], new_score])
        #t2 = time.time()
        #print(t2-t1)
    #print(seq_info[0:10])
    name = 'data/' + name + 'target_similarity.pkl'
    utils.save_any_obj_pkl(seq_info, name)
Exemplo n.º 2
0
def run_two_stage(args):
    DTI_network = utils.load_any_obj_pkl(args.dti_path)
    drug_similarity = utils.load_any_obj_pkl(args.drug_sim_path)
    target_similarity = utils.load_any_obj_pkl(args.target_sim_path)

    csn_network = network_construction.construct_signifcant_edge_network(
        drug_similarity, top_ratio=float(args.sparsity))
    tsn_network = network_construction.construct_signifcant_edge_network(
        target_similarity, top_ratio=float(args.sparsity))

    implicit_compounds = network_construction.create_implicit_networks(
        DTI_network, list(csn_network.nodes()))
    implicit_targets = network_construction.create_implicit_networks(
        DTI_network, list(tsn_network.nodes()))

    learner = seperate_learner.two_stage_learning(
        DTI_network=DTI_network,
        compound_list=list(csn_network.nodes()),
        target_list=list(tsn_network.nodes()),
        tsn_network=tsn_network,
        csn_network=csn_network,
        implicit_t_network=implicit_targets,
        implicit_c_network=implicit_compounds,
        wl=int(args.walk_length),
        nn=int(args.negative_number),
        wn=int(args.walk_num),
        worker=int(args.worker),
        load_emb=False)
    learner.learn_all_network_embedding()
    learner.build_node_representation()

    training_samples, training_labels = learner.construct_training_samples(
        negative_ratio=10)

    test_pairs = new_pairs_to_evaludate(list(csn_network.nodes()),
                                        list(tsn_network.nodes()), DTI_network)
    test_samples = learner.concatenate_pair_embeddings(test_pairs)

    training_samples = normalise_sample_representation.standardscaler_transform(
        training_samples)
    test_samples = normalise_sample_representation.standardscaler_transform(
        test_samples)

    clf = learner.train_DTI_prediction_svm(training_samples,
                                           training_labels,
                                           kernal=2)
    probs = clf.predict_proba(test_samples)
    new_probs = [row[1] for row in probs]
    all_evaluation = []
    #from tqdm import tqdm
    for i in range(len(test_pairs)):
        current_one = [test_pairs[i][0], test_pairs[i][1], new_probs[i]]
        all_evaluation.append(current_one)
    output_name = 'output/' + args.output_name + '.pkl'
    utils.save_any_obj_pkl(all_evaluation, output_name)
Exemplo n.º 3
0
def construct_original_graph():
    files = os.listdir("handle_data/data/collaboration_network")

    graphs = list()
    for file in files:
        path = os.path.join("handle_data/data/collaboration_network", file)
        g = nx.Graph()
        l = []
        node_attr = dict()
        with open(path, "r") as lines:
            for line in lines:
                line_json = json.loads(line)
                keys = list(line_json.keys())
                s1, s2, w = keys[0], keys[1], line_json[keys[2]]
                a1, a2 = line_json[keys[0]], line_json[keys[1]]
                l.append((s1, s2, w))
                if s1 not in node_attr:
                    node_attr[s1] = a1
                if s2 not in node_attr:
                    node_attr[s2] = a2
        g.add_weighted_edges_from(l)
        attr_list = []
        for node, attr in node_attr.items():
            attr_list.append(attr)
        attr_array = np.array(attr_list)
        attr_normed = attr_array / attr_array.max(axis=0)
        index = 0
        for node, attr in node_attr.items():
            node_attr[node] = list(attr_normed[index])
            index += 1
        for node, attr in node_attr.items():
            g.nodes[node]["attribute"] = attr
        print(g.nodes(data=True))
        print("#nodes: " + str(g.number_of_nodes()) + ", #edges: " +
              str(g.number_of_edges()))
        # connected_component_subgraphs(G)
        g = max(nx.connected_component_subgraphs(g), key=len)
        print(file[:-4] + ": #nodes: " + str(g.number_of_nodes()) +
              ", #edges: " + str(g.number_of_edges()))
        save_any_obj_pkl(
            g, "graph_data/collaborate_network_" + file[:-4] + ".pkl")

        filename = "graph_data/collaborate_network_" + file[:-4] + "_edgelist.txt"
        nx.write_edgelist(g, filename, data=False)
        nx.draw(g, node_size=20)
        plt.show()
        graphs.append(g)

    save_any_obj_pkl(graphs, "graph_data/collaborate_network_2006_2016.pkl")
Exemplo n.º 4
0
def construct_combined_graph():
    graphs = load_any_obj_pkl(
        "graph_data/collaborate_network(1G)/collaborate_network_2006_2016.pkl")
    for i in range(2, len(graphs)):
        g0 = graphs[i - 2]
        g1 = graphs[i - 1]
        g = graphs[i]
        print("#nodes: " + str(g.number_of_nodes()) + ", #edges: " +
              str(g.number_of_edges()))
        l = []
        for edge in g0.edges():
            if edge not in g.edges():
                n1, n2 = edge[0], edge[1]
                l.append((n1, n2, g0.get_edge_data(n1, n2)['weight']))
                if n1 not in g.nodes():
                    g.add_node(n1, attribute=g0.nodes[n1]["attribute"])
                if n2 not in g.nodes():
                    g.add_node(n2, attribute=g0.nodes[n2]["attribute"])

        for edge in g1.edges():
            if edge not in g.edges():
                n1, n2 = edge[0], edge[1]
                l.append((n1, n2, g1.get_edge_data(n1, n2)['weight']))
                if n1 not in g.nodes():
                    g.add_node(n1, attribute=g1.nodes[n1]["attribute"])
                if n2 not in g.nodes():
                    g.add_node(n2, attribute=g1.nodes[n2]["attribute"])
        g.add_weighted_edges_from(l)
        print("#nodes: " + str(g.number_of_nodes()) + ", #edges: " +
              str(g.number_of_edges()))
        # nx.draw(g, node_size=20)
        # plt.show()
        g = g.subgraph(max(nx.connected_components(g), key=len)).copy()
        print("#nodes: " + str(g.number_of_nodes()) + ", #edges: " +
              str(g.number_of_edges()))
        filename = "graph_data/collaborate_network_" + str(
            i) + "_edgelist_new.txt"
        nx.write_edgelist(g, filename, data=False)

        save_any_obj_pkl(
            g,
            "graph_data/collaborate_network(3G)" + str(i + 2006) + "_new.pkl")

        graphs.append(g)

    save_any_obj_pkl(graphs,
                     "graph_data/collaborate_network_2008_2016_new.pkl")
Exemplo n.º 5
0
def create_compound_similarity_network(compounds_smiles, top_ratio=0.04):
    compounds_name = list(compounds_smiles.keys())
    similarity_info = []
    total = len(compounds_name)
    for i in range(len(compounds_name)):
        #t1 = time.time()
        #print(compounds_smiles[compounds_name[i]])
        #print(i,' out of ',total)
        m1 = Chem.MolFromSmiles(compounds_smiles[compounds_name[i]])
        fps1 = Chem.RDKFingerprint(m1)
        for j in range(i + 1, len(compounds_name)):
            m2 = Chem.MolFromSmiles(compounds_smiles[compounds_name[i]])
            fps2 = Chem.RDKFingerprint(m2)
            simialrity_coefficient = DataStructs.FingerprintSimilarity(
                fps1, fps2)
            similarity_info.append(
                [compounds_name[i], compounds_name[j], simialrity_coefficient])
        #t2 = time.time()
        #print(t2-t1)
    utils.save_any_obj_pkl('data/all_interactions.pkl')
Exemplo n.º 6
0
def create_compound_similarity_network_mp(compounds_smiles_path,
                                          species_name='_DB',
                                          worker=4,
                                          top_ratio=0.04):
    compounds_smiles = utils.load_any_obj_pkl(compounds_smiles_path)
    all_compounds = list(compounds_smiles.keys())
    #print(Chem.SanitizeMol('CN(CCO[P@](O)(=O)O[P@@](O)(=O)O[Be-](F)(F)F)C1=CC=CC=C1[N+]([O-])=O'))
    #for item in all_compounds:
    #    m2 = Chem.MolFromSmiles(compounds_smiles[item])
    #    if m2 == None:
    #        print(item)
    #        print(compounds_smiles[item])
    #raise Exception('stop')
    ccd = calculate_molecular_similarity(compounds_smiles, worker=worker)
    all_corr = ccd.parallel_calculate_all_correlation()
    #all_corr = [[str(j) for j in i] for i in all_corr]
    final_corr = []
    for item in all_corr:
        for a_corr in item:
            #print(a_corr)
            final_corr.append(a_corr)
    save_name = 'data/' + 'compound_similarity' + species_name + '.pkl'
    utils.save_any_obj_pkl(final_corr, save_name)
Exemplo n.º 7
0
G_dynamic = load_any_obj_pkl(
    "graph_data/collaborate_network(2G)/collaborate_network_2007_2016.pkl")

model = DANRL(G_dynamic=G_dynamic,
              limit=ratio_most_affected_nodes,
              local_global=1,
              num_walks=num_walks,
              walk_length=walk_length,
              window=window_size,
              emb_dim=embedding_dimensional,
              n_negative=num_negative)
emb_dicts = model.train()
save_any_obj_pkl(
    obj=emb_dicts,
    path=
    "output/collaborate_network(2G)/DANRL/collaborate_network_2007_2016_embs.pkl"
)

# import os
# import numpy as np
# col_net = "collaborate_network(2G)"
# G_dynamic = load_any_obj_pkl("graph_data/"+ col_net + "/collaborate_network_2007_2016.pkl")
# for dim in [16, 64, 128, 256]:
#     for win in range(16, 42, 2):
#         model = DANRL(G_dynamic=G_dynamic,
#                               limit=0.2,
#                               local_global=1,
#                               num_walks=20,
#                               walk_length=80,
#                               window=win,
    else:
        for i in range(2, 51, 1):
            # clu = KMeans(n_clusters=5, init="k-means++", n_init=10, max_iter=300, random_state=0)
            # clu = AgglomerativeClustering(n_clusters=i, affinity="euclidean", linkage='average')
            clu = SpectralClustering(n_clusters=i, gamma=0.01)
            clu.fit(X)
            labels = clu.labels_
            du[str(i)] = Dunn_Validity_Index(labels=labels,
                                             data=X,
                                             n_clusters=i)
            sil[str(i)] = silhouette_score(X, labels)
            db[str(i)] = davies_bouldin_score(X, labels)
            ch[str(i)] = calinski_harabaz_score(X, labels)

    save_any_obj_pkl(
        sil, "metric_result\\" + clustering_method + "_" + embedding_method +
        "_sil.pkl")
    save_any_obj_pkl(
        db, "metric_result\\" + clustering_method + "_" + embedding_method +
        "_db.pkl")
    save_any_obj_pkl(
        ch, "metric_result\\" + clustering_method + "_" + embedding_method +
        "_ch.pkl")
    save_any_obj_pkl(
        du, "metric_result\\" + clustering_method + "_" + embedding_method +
        "_du.pkl")

    save_dict(filepath="metric_result\clustering_method\\" +
              clustering_method + "_" + "silhouette_score.txt",
              mode="a",
              dic={embedding_method: sil})
Exemplo n.º 9
0
    def train(self):
        w2v = gensim.models.Word2Vec(sentences=None,
                                     size=self.emb_dim,
                                     window=self.window,
                                     sg=1,
                                     hs=0,
                                     negative=self.n_negative,
                                     ns_exponent=0.75,
                                     alpha=0.025,
                                     min_alpha=0.0001,
                                     min_count=1,
                                     sample=0.001,
                                     iter=4,
                                     workers=self.workers,
                                     seed=self.seed,
                                     corpus_file=None,
                                     sorted_vocab=1,
                                     batch_words=10000,
                                     compute_loss=False,
                                     max_vocab_size=None,
                                     max_final_vocab=None,
                                     trim_rule=None)
        for t in range(len(self.G_dynamic)):
            if t == 0:
                G0 = self.G_dynamic[t]
                sentences = simulate_walks(nx_graph=G0,
                                           num_walks=self.num_walks,
                                           weighted=True,
                                           walk_length=self.walk_length)
                sentences = [[str(j) for j in i] for i in sentences]

                print("-start node embedding on Graph 0" + "/" +
                      str(len(self.G_dynamic)))
                w2v.build_vocab(sentences=sentences,
                                update=False)  # init traning, so update False
                # 利用Word2Vec模型进行训练
                w2v.train(sentences=sentences,
                          total_examples=w2v.corpus_count,
                          epochs=w2v.iter)  # follow w2v constructor
                print("-end node embedding on Graph 0" + "/" +
                      str(len(self.G_dynamic)))

                emb_dict = {}  # {nodeID: emb_vector, ...}
                for node in self.G_dynamic[t].nodes():
                    emb_dict[node] = w2v.wv[str(node)]
                save_any_obj_pkl(
                    obj=emb_dict,
                    path=
                    "output/collaborate_network(2G)/DynAttriWalks/collaborate_network_"
                    + str(t) + "_embs.pkl")
                self.emb_dicts.append(emb_dict)
            else:
                G0 = self.G_dynamic[t - 1]  # previous graph 之前的graph
                G1 = self.G_dynamic[t]  # current graph 现在的graph
                print("-start selecting nodes on Graph " + str(t) + "/" +
                      str(len(self.G_dynamic)))
                node_update_list, self.reservoir, node_del, node_add = node_selecting_scheme(
                    graph_t0=G0,
                    graph_t1=G1,
                    reservoir_dict=self.reservoir,
                    limit=self.limit,
                    local_global=self.local_global)
                print("-end selecting nodes on Graph " + str(t) + "/" +
                      str(len(self.G_dynamic)))

                sentences = simulate_walks(nx_graph=G1,
                                           num_walks=self.num_walks,
                                           weighted=True,
                                           walk_length=self.walk_length,
                                           selected_nodes=node_update_list)
                sentences = [[str(j) for j in i] for i in sentences]

                print("-start node embedding on Graph " + str(t) + "/" +
                      str(len(self.G_dynamic)))
                w2v.build_vocab(sentences=sentences,
                                update=True)  # online update
                # 利用Word2Vec模型进行训练
                w2v.train(sentences=sentences,
                          total_examples=w2v.corpus_count,
                          epochs=w2v.iter)
                print("-end node embedding on Graph " + str(t) + "/" +
                      str(len(self.G_dynamic)))

                emb_dict = {}  # {nodeID: emb_vector, ...}
                for node in self.G_dynamic[t].nodes():
                    emb_dict[node] = w2v.wv[str(node)]
                save_any_obj_pkl(
                    obj=emb_dict,
                    path=
                    "output/collaborate_network(2G)/DynAttriWalks/collaborate_network_"
                    + str(t) + "_embs.pkl")
                self.emb_dicts.append(emb_dict)
        return self.emb_dicts