def rep_correlation(coords1, coords2): dists1 = misc.distsFromCoords(coords1) dists2 = misc.distsFromCoords(coords2) assert dists1.shape == dists2.shape #convert to vectors dists1 = dists1.flatten() dists2 = dists2.flatten() #remove zeroes indices = np.where(dists1 != 0)[0] dists1 = dists1[indices] dists2 = dists2[indices] assert len(np.where(dists2 == 0)[0]) == 0 r, p = st.pearsonr(dists1, dists2) return r
import numpy as np import sys sys.path.append("..") import data_tools as dt import array_tools as at import misc #"true" distance matrix cluster = dt.clusterFromBed(bedpath, None, None) contactMat = dt.matFromBed(bedpath, cluster) distMat = at.contactToDist(contactMat) at.makeSymmetric(distMat) for j in range(len(distMat)): #remove diagonal distMat[j, j] = 0 chromthreed_distMat = misc.distsFromCoords( "Chromosome3D/output/chr22_100kb/chr22_100kb_coords.tsv") chromthreed_r = misc.pearson(distMat, chromthreed_distMat) mmds_distMat = dt.clusterFromFile( "hic_data/GM12878_combined_22_10kb_mmds_coords.tsv").distMat() mmds_r = misc.pearson(distMat, mmds_distMat) cmds_distMat = dt.clusterFromFile( "hic_data/GM12878_combined_22_10kb_cmds_coords.tsv").distMat() cmds_r = misc.pearson(distMat, cmds_distMat) minimds_distMat = dt.clusterFromFile( "hic_data/GM12878_combined_22_10kb_minimds_coords.tsv").distMat() minimds_r = misc.pearson(distMat, minimds_distMat) mogen_distMat = misc.distsFromCoords(
def rep_correlation(coords1, coords2): dists1 = misc.distsFromCoords(coords1) dists2 = misc.distsFromCoords(coords2) return misc.pearson(dists1, dists2)
cmds_rs[i] = misc.pearson(cmds_true_mat, cmds_distMat) minimds_structure = dt.structure_from_file( "hic_data/GM12878_combined_{}_10kb_minimds_coords.tsv".format(chrom)) contactMat = dt.matFromBed(bedpath, minimds_structure) minimds_true_mat = at.contactToDist(contactMat) at.makeSymmetric(minimds_true_mat) for j in range(len(minimds_true_mat)): #remove diagonal minimds_true_mat[j, j] = 0 minimds_distMat = misc.distMat(minimds_structure) minimds_rs[i] = misc.pearson(minimds_true_mat, minimds_distMat) mogen_coords = np.loadtxt( "MOGEN/examples/hiC/output/GM12878_combined_{}_10kb_rep1_coords.tsv". format(chrom)) mogen_distMat = misc.distsFromCoords(mogen_coords) mogen_rs[i] = misc.pearson( mmds_true_mat, mogen_distMat) #mMDS and MOGEN use the same matrix input procedure chrom_sizes = np.loadtxt("chrom_sizes_10kb.txt") fig = plt.figure() ax = fig.add_subplot(111, frameon=False) ax.plot(chrom_sizes, mmds_rs, linestyle="None", marker="o", markerfacecolor="r", mec="r", markersize=10,
contactMat = dt.matFromBed(bedpath, cluster) distMat = at.contactToDist(contactMat) at.makeSymmetric(distMat) for j in range(len(distMat)): #remove diagonal distMat[j,j] = 0 mmds_distMat = dt.clusterFromFile("hic_data/GM12878_combined_{}_10kb_mmds_coords.tsv".format(chrom)).distMat() mmds_rs[i] = misc.pearson(distMat, mmds_distMat) cmds_distMat = dt.clusterFromFile("hic_data/GM12878_combined_{}_10kb_cmds_coords.tsv".format(chrom)).distMat() cmds_rs[i] = misc.pearson(distMat, cmds_distMat) minimds_distMat = dt.clusterFromFile("hic_data/GM12878_combined_{}_10kb_minimds_coords.tsv".format(chrom)).distMat() minimds_rs[i] = misc.pearson(distMat, minimds_distMat) mogen_distMat = misc.distsFromCoords("MOGEN/examples/hiC/output/GM12878_combined_{}_10kb_rep1_coords.tsv".format(chrom)) mogen_rs[i] = misc.pearson(distMat, mogen_distMat) chrom_sizes = np.loadtxt("chrom_sizes_10kb.txt") fig = plt.figure() ax = fig.add_subplot(111, frameon=False) ax.plot(chrom_sizes, mmds_rs, linestyle="None", marker="o", markerfacecolor="r", mec="r", markersize=10, label="Standard metric MDS") ax.plot(chrom_sizes, cmds_rs, linestyle="None", marker="o", markerfacecolor="g", mec="g", markersize=10, label="Classical MDS") ax.plot(chrom_sizes, minimds_rs, linestyle="None", marker="o",markerfacecolor="b", mec="b", markersize=10, label="miniMDS") ax.plot(chrom_sizes, mogen_rs, linestyle="None", marker="o",markerfacecolor="m", mec="m", markersize=10, label="MOGEN") x_offset = 1000 #small number to prevent things from getting cut off y_offset = 0.01 xmin = min(chrom_sizes) - x_offset xmax = max(chrom_sizes) + x_offset ymin = 0 - y_offset