Пример #1
0
def run_snf2(w1, w2, wall_label):
    Dist1 = dist2(w1.values, w1.values)
    Dist2 = dist2(w2.values, w2.values)

    S1 = snf.compute.affinity_matrix(Dist1, K=args.neighbor_size, mu=args.mu)
    S2 = snf.compute.affinity_matrix(Dist2, K=args.neighbor_size, mu=args.mu)

    # Do SNF2 diffusion
    (
        dicts_common,
        dicts_commonIndex,
        dict_sampleToIndexs,
        dicts_unique,
        original_order,
    ) = data_indexing([w1, w2])
    S1_df = pd.DataFrame(data=S1, index=original_order[0], columns=original_order[0])
    S2_df = pd.DataFrame(data=S2, index=original_order[1], columns=original_order[1])

    fused_networks = snf2(
        args,
        [S1_df, S2_df],
        dicts_common=dicts_common,
        dicts_unique=dicts_unique,
        original_order=original_order,
    )

    S1_fused = fused_networks[0]
    S2_fused = fused_networks[1]

    # S2_fused = S2_fused.reindex(wall_label.index.tolist())
    # labels_final = spectral_clustering(S2_fused.values, n_clusters=10)
    # score = v_measure_score(wall_label["label"].tolist(), labels_final)
    # print("SNF2 for clustering union 832 samples NMI score:", score)

    S_final = tsne_p_deep(
        args,
        dicts_commonIndex,
        dict_sampleToIndexs,
        [S1_fused.values, S2_fused.values],
    )

    S_final_df = pd.DataFrame(data=S_final, index=dict_sampleToIndexs.keys())
    S_final_df = S_final_df.reindex(wall_label.index.tolist())

    Dist_final = dist2(S_final_df.values, S_final_df.values)
    Wall_final = snf.compute.affinity_matrix(
        Dist_final, K=args.neighbor_size, mu=args.mu
    )

    labels_final = spectral_clustering(Wall_final, n_clusters=10)
    score = v_measure_score(wall_label["label"].tolist(), labels_final)
    print("SNF2 for clustering union 832 samples NMI score:", score)
    return score
Пример #2
0
'''
    First we integrate modality from different datasets
'''
#########################################################

# intergrate cnv
print("start integrating cnv data!")
(
    dicts_common,
    dicts_commonIndex,
    dict_sampleToIndexs,
    dicts_unique,
    original_order,
) = data_indexing([cnv1, cnv2, cnv3])

dist_cnv1 = dist2(cnv1.values, cnv1.values)
dist_cnv2 = dist2(cnv2.values, cnv2.values)
dist_cnv3 = dist2(cnv3.values, cnv3.values)

S1_cnv1 = snf.compute.affinity_matrix(dist_cnv1,
                                      K=args.neighbor_size,
                                      mu=args.mu)
S1_cnv2 = snf.compute.affinity_matrix(dist_cnv2,
                                      K=args.neighbor_size,
                                      mu=args.mu)
S1_cnv3 = snf.compute.affinity_matrix(dist_cnv3,
                                      K=args.neighbor_size,
                                      mu=args.mu)

S1_df = pd.DataFrame(data=S1_cnv1,
                     index=original_order[0],
Пример #3
0
label = ["label"]
w1_label = w1[label]
w2_label = w2[label]
wcom_label = w1_label.filter(regex="^common_", axis=0)
w1.drop(label, axis=1, inplace=True)
w2.drop(label, axis=1, inplace=True)
wall_label = pd.concat([w1_label, w2_label], axis=0)
wall_label = wall_label[~wall_label.index.duplicated(keep="first")]



"""
    Step2 : Use SNF2 to fuse not only the common samples, but also the unique samples
"""

Dist1 = dist2(w1.values, w1.values)
Dist2 = dist2(w2.values, w2.values)

S1 = snf.compute.affinity_matrix(Dist1, K=args.neighbor_size, mu=args.mu)
S2 = snf.compute.affinity_matrix(Dist2, K=args.neighbor_size, mu=args.mu)

labels_s1 = spectral_clustering(S1, n_clusters=10)
score1 = v_measure_score(w1_label["label"].tolist(), labels_s1)
print("Before diffusion for full {} p1 NMI score: ".format(len(labels_s1)), score1)

labels_s2 = spectral_clustering(S2, n_clusters=10)
score2 = v_measure_score(w2_label["label"].tolist(), labels_s2)
print("Before diffusion for full {} p2 NMI score:".format(len(labels_s2)), score2)

# Do SNF2 diffusion
(
Пример #4
0
w2_label = w2[label]
wcom_label = w1_label.filter(regex="^common_", axis=0)
w1.drop(label, axis=1, inplace=True)
w2.drop(label, axis=1, inplace=True)
wall_label = pd.concat([w1_label, w2_label], axis=0)
wall_label = wall_label[~wall_label.index.duplicated(keep="first")]


"""
    Step1 : Apply the original SNF on the common samples, and the score will be a reference point
"""
w1_com = w1.filter(regex="^common_", axis=0)
w2_com = w2.filter(regex="^common_", axis=0)

# need to make sure the order of common samples are the same for all views before fusing
dist1_com = dist2(w1_com.values, w1_com.values)
dist2_com = dist2(w2_com.values, w2_com.values)
S1_com = snf.compute.affinity_matrix(dist1_com, K=args.neighbor_size, mu=args.mu)
S2_com = snf.compute.affinity_matrix(dist2_com, K=args.neighbor_size, mu=args.mu)

fused_network = snf.snf([S1_com, S2_com], t=10, K=20)
labels_com = spectral_clustering(fused_network, n_clusters=10)
score_com = v_measure_score(wcom_label["label"].tolist(), labels_com)
print("Original SNF for clustering intersecting 832 samples NMI score: ", score_com)

# Do SNF2 diffusion
(
    dicts_common,
    dicts_commonIndex,
    dict_sampleToIndexs,
    dicts_unique,
Пример #5
0
rppa_ = os.path.join(testdata_dir, "rppa_426x204.csv")

cnv = pd.read_csv(cnv_, index_col=0)
meth = pd.read_csv(meth_, index_col=0)
mirna = pd.read_csv(mirna_, index_col=0)
rnaseq = pd.read_csv(rnaseq_, index_col=0)
rppa = pd.read_csv(rppa_, index_col=0)
print("finish loading data!")

# data indexing
dicts_common, dicts_unique, original_order = data_indexing(
    [cnv, meth, mirna, rnaseq, rppa])
print("finish indexing data!")

# build similarity networks for each motality
dist_cnv = dist2(cnv.values, cnv.values)
dist_meth = dist2(meth.values, meth.values)
dist_mirna = dist2(mirna.values, mirna.values)
dist_rnaseq = dist2(rnaseq.values, rnaseq.values)
dist_rppa = dist2(rppa.values, rppa.values)

S1_cnv = snf.compute.affinity_matrix(dist_cnv, K=20, mu=0.5)
S2_meth = snf.compute.affinity_matrix(dist_meth, K=20, mu=0.5)
S3_mirna = snf.compute.affinity_matrix(dist_mirna, K=20, mu=0.5)
S4_rnaseq = snf.compute.affinity_matrix(dist_rnaseq, K=20, mu=0.5)
S5_rppa = snf.compute.affinity_matrix(dist_rppa, K=20, mu=0.5)
print("finish building individual similarity network!")

# Do SNF2 diffusion
S1_df = pd.DataFrame(data=S1_cnv,
                     index=original_order[0],