def run_snf2(w1, w2, wall_label): Dist1 = dist2(w1.values, w1.values) Dist2 = dist2(w2.values, w2.values) S1 = snf.compute.affinity_matrix(Dist1, K=args.neighbor_size, mu=args.mu) S2 = snf.compute.affinity_matrix(Dist2, K=args.neighbor_size, mu=args.mu) # Do SNF2 diffusion ( dicts_common, dicts_commonIndex, dict_sampleToIndexs, dicts_unique, original_order, ) = data_indexing([w1, w2]) S1_df = pd.DataFrame(data=S1, index=original_order[0], columns=original_order[0]) S2_df = pd.DataFrame(data=S2, index=original_order[1], columns=original_order[1]) fused_networks = snf2( args, [S1_df, S2_df], dicts_common=dicts_common, dicts_unique=dicts_unique, original_order=original_order, ) S1_fused = fused_networks[0] S2_fused = fused_networks[1] # S2_fused = S2_fused.reindex(wall_label.index.tolist()) # labels_final = spectral_clustering(S2_fused.values, n_clusters=10) # score = v_measure_score(wall_label["label"].tolist(), labels_final) # print("SNF2 for clustering union 832 samples NMI score:", score) S_final = tsne_p_deep( args, dicts_commonIndex, dict_sampleToIndexs, [S1_fused.values, S2_fused.values], ) S_final_df = pd.DataFrame(data=S_final, index=dict_sampleToIndexs.keys()) S_final_df = S_final_df.reindex(wall_label.index.tolist()) Dist_final = dist2(S_final_df.values, S_final_df.values) Wall_final = snf.compute.affinity_matrix( Dist_final, K=args.neighbor_size, mu=args.mu ) labels_final = spectral_clustering(Wall_final, n_clusters=10) score = v_measure_score(wall_label["label"].tolist(), labels_final) print("SNF2 for clustering union 832 samples NMI score:", score) return score
''' First we integrate modality from different datasets ''' ######################################################### # intergrate cnv print("start integrating cnv data!") ( dicts_common, dicts_commonIndex, dict_sampleToIndexs, dicts_unique, original_order, ) = data_indexing([cnv1, cnv2, cnv3]) dist_cnv1 = dist2(cnv1.values, cnv1.values) dist_cnv2 = dist2(cnv2.values, cnv2.values) dist_cnv3 = dist2(cnv3.values, cnv3.values) S1_cnv1 = snf.compute.affinity_matrix(dist_cnv1, K=args.neighbor_size, mu=args.mu) S1_cnv2 = snf.compute.affinity_matrix(dist_cnv2, K=args.neighbor_size, mu=args.mu) S1_cnv3 = snf.compute.affinity_matrix(dist_cnv3, K=args.neighbor_size, mu=args.mu) S1_df = pd.DataFrame(data=S1_cnv1, index=original_order[0],
label = ["label"] w1_label = w1[label] w2_label = w2[label] wcom_label = w1_label.filter(regex="^common_", axis=0) w1.drop(label, axis=1, inplace=True) w2.drop(label, axis=1, inplace=True) wall_label = pd.concat([w1_label, w2_label], axis=0) wall_label = wall_label[~wall_label.index.duplicated(keep="first")] """ Step2 : Use SNF2 to fuse not only the common samples, but also the unique samples """ Dist1 = dist2(w1.values, w1.values) Dist2 = dist2(w2.values, w2.values) S1 = snf.compute.affinity_matrix(Dist1, K=args.neighbor_size, mu=args.mu) S2 = snf.compute.affinity_matrix(Dist2, K=args.neighbor_size, mu=args.mu) labels_s1 = spectral_clustering(S1, n_clusters=10) score1 = v_measure_score(w1_label["label"].tolist(), labels_s1) print("Before diffusion for full {} p1 NMI score: ".format(len(labels_s1)), score1) labels_s2 = spectral_clustering(S2, n_clusters=10) score2 = v_measure_score(w2_label["label"].tolist(), labels_s2) print("Before diffusion for full {} p2 NMI score:".format(len(labels_s2)), score2) # Do SNF2 diffusion (
w2_label = w2[label] wcom_label = w1_label.filter(regex="^common_", axis=0) w1.drop(label, axis=1, inplace=True) w2.drop(label, axis=1, inplace=True) wall_label = pd.concat([w1_label, w2_label], axis=0) wall_label = wall_label[~wall_label.index.duplicated(keep="first")] """ Step1 : Apply the original SNF on the common samples, and the score will be a reference point """ w1_com = w1.filter(regex="^common_", axis=0) w2_com = w2.filter(regex="^common_", axis=0) # need to make sure the order of common samples are the same for all views before fusing dist1_com = dist2(w1_com.values, w1_com.values) dist2_com = dist2(w2_com.values, w2_com.values) S1_com = snf.compute.affinity_matrix(dist1_com, K=args.neighbor_size, mu=args.mu) S2_com = snf.compute.affinity_matrix(dist2_com, K=args.neighbor_size, mu=args.mu) fused_network = snf.snf([S1_com, S2_com], t=10, K=20) labels_com = spectral_clustering(fused_network, n_clusters=10) score_com = v_measure_score(wcom_label["label"].tolist(), labels_com) print("Original SNF for clustering intersecting 832 samples NMI score: ", score_com) # Do SNF2 diffusion ( dicts_common, dicts_commonIndex, dict_sampleToIndexs, dicts_unique,
rppa_ = os.path.join(testdata_dir, "rppa_426x204.csv") cnv = pd.read_csv(cnv_, index_col=0) meth = pd.read_csv(meth_, index_col=0) mirna = pd.read_csv(mirna_, index_col=0) rnaseq = pd.read_csv(rnaseq_, index_col=0) rppa = pd.read_csv(rppa_, index_col=0) print("finish loading data!") # data indexing dicts_common, dicts_unique, original_order = data_indexing( [cnv, meth, mirna, rnaseq, rppa]) print("finish indexing data!") # build similarity networks for each motality dist_cnv = dist2(cnv.values, cnv.values) dist_meth = dist2(meth.values, meth.values) dist_mirna = dist2(mirna.values, mirna.values) dist_rnaseq = dist2(rnaseq.values, rnaseq.values) dist_rppa = dist2(rppa.values, rppa.values) S1_cnv = snf.compute.affinity_matrix(dist_cnv, K=20, mu=0.5) S2_meth = snf.compute.affinity_matrix(dist_meth, K=20, mu=0.5) S3_mirna = snf.compute.affinity_matrix(dist_mirna, K=20, mu=0.5) S4_rnaseq = snf.compute.affinity_matrix(dist_rnaseq, K=20, mu=0.5) S5_rppa = snf.compute.affinity_matrix(dist_rppa, K=20, mu=0.5) print("finish building individual similarity network!") # Do SNF2 diffusion S1_df = pd.DataFrame(data=S1_cnv, index=original_order[0],