def infer_structure(contactMat, structure, alpha, num_threads, classical=False): """Infers 3D coordinates for one structure""" assert len(structure.nonzero_abs_indices()) == len(contactMat) at.makeSymmetric(contactMat) rowsums = np.array([sum(row) for row in contactMat]) assert len(np.where(rowsums == 0)[0]) == 0 distMat = at.contactToDist(contactMat, alpha) at.makeSymmetric(distMat) if classical: #classical MDS coords = la.cmds(distMat) else: coords = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(), verbose=0, dissimilarity="precomputed", n_jobs=num_threads).fit_transform(distMat) structure.setCoords(coords)
def fullMDS(path, classical=False): """MDS without partitioning""" cluster = dt.clusterFromBed(path, None, None) contactMat = dt.matFromBed(path, cluster) distMat = at.contactToDist(contactMat) infer_cluster(contactMat, cluster, classical) return cluster
def infer_clusters(contactMat, clusters, offsets, classical=False): """Infers 3D coordinates for multiple clusters with same contact matrix""" assert sum([len(cluster.getPointNums()) for cluster in clusters]) == len(contactMat) at.makeSymmetric(contactMat) rowsums = np.array([sum(row) for row in contactMat]) assert len(np.where(rowsums == 0)[0]) == 0 distMat = at.contactToDist(contactMat) at.makeSymmetric(distMat) if classical: #classical MDS coords = st.cmds(distMat) else: mds = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(seed=3), verbose=0, dissimilarity="precomputed", n_jobs=-1) coords = mds.fit_transform(distMat) for offset, cluster in zip(offsets, clusters): for i in range(len(cluster.getPoints())): cluster.getPoints()[i].pos = coords[i + offset]
def infer_structure(contactMat, structure, alpha, num_threads, weight, classical=False): """Infers 3D coordinates for one structure""" assert len(structure.nonzero_abs_indices()) == len(contactMat) expected = get_expected(contactMat) for i in range(len(contactMat)): for j in range(i): contactMat[i, j] = ( 1 - weight) * contactMat[i, j] + weight * expected[i - j - 1] at.makeSymmetric(contactMat) rowsums = np.array([sum(row) for row in contactMat]) assert len(np.where(rowsums == 0)[0]) == 0 distMat = at.contactToDist(contactMat, alpha) at.makeSymmetric(distMat) distMat = distMat / np.mean(distMat) #normalize if classical: #classical MDS coords = la.cmds(distMat) else: coords = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(), verbose=0, dissimilarity="precomputed", n_jobs=num_threads).fit_transform(distMat) structure.setCoords(coords)
def infer_structures(contactMat, structures, offsets, alpha, num_threads, classical=False): """Infers 3D coordinates for multiple structures with same contact matrix""" assert sum([len(structure.getPointNums()) for structure in structures]) == len(contactMat) at.makeSymmetric(contactMat) rowsums = np.array([sum(row) for row in contactMat]) assert len(np.where(rowsums == 0)[0]) == 0 distMat = at.contactToDist(contactMat, alpha) at.makeSymmetric(distMat) if classical: #classical MDS coords = la.cmds(distMat) else: coords = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(), verbose=0, dissimilarity="precomputed", n_jobs=num_threads).fit_transform(distMat) for offset, structure in zip(offsets, structures): structure.setCoords(coords[offset:offset+len(structure.getPoints())])
def distmat(contactMat, structure, alpha, weight, num_threads): assert len(structure.nonzero_abs_indices()) == len(contactMat) expected = get_expected(contactMat) for i in range(len(contactMat)): for j in range(i): corrected = ( 1 - weight) * contactMat[i, j] + weight * expected[i - j - 1] contactMat[i, j] = corrected contactMat[j, i] = corrected rowsums = np.array([sum(row) for row in contactMat]) assert len(np.where(rowsums == 0)[0]) == 0 distMat = at.contactToDist(contactMat, alpha) distMat = distMat / np.mean(distMat) #normalize return distMat
def infer_cluster(contactMat, cluster, classical=False): """Infers 3D coordinates for one cluster""" assert len(cluster.getPointNums()) == len(contactMat) at.makeSymmetric(contactMat) rowsums = np.array([sum(row) for row in contactMat]) assert len(np.where(rowsums == 0)[0]) == 0 distMat = at.contactToDist(contactMat) at.makeSymmetric(distMat) if classical: #classical MDS coords = st.cmds(distMat) else: mds = manifold.MDS(n_components=3, metric=True, random_state=np.random.RandomState(seed=3), verbose=0, dissimilarity="precomputed", n_jobs=-1) coords = mds.fit(distMat).embedding_ for i in range(len(cluster.getPoints())): cluster.getPoints()[i].pos = coords[i]
from matplotlib import pyplot as plt import numpy as np import sys sys.path.append("..") import data_tools as dt import array_tools as at import misc #"true" distance matrix cluster = dt.clusterFromBed(bedpath, None, None) contactMat = dt.matFromBed(bedpath, cluster) distMat = at.contactToDist(contactMat) at.makeSymmetric(distMat) for j in range(len(distMat)): #remove diagonal distMat[j, j] = 0 chromthreed_distMat = misc.distsFromCoords( "Chromosome3D/output/chr22_100kb/chr22_100kb_coords.tsv") chromthreed_r = misc.pearson(distMat, chromthreed_distMat) mmds_distMat = dt.clusterFromFile( "hic_data/GM12878_combined_22_10kb_mmds_coords.tsv").distMat() mmds_r = misc.pearson(distMat, mmds_distMat) cmds_distMat = dt.clusterFromFile( "hic_data/GM12878_combined_22_10kb_cmds_coords.tsv").distMat() cmds_r = misc.pearson(distMat, cmds_distMat) minimds_distMat = dt.clusterFromFile( "hic_data/GM12878_combined_22_10kb_minimds_coords.tsv").distMat() minimds_r = misc.pearson(distMat, minimds_distMat)
def normalized_dist_mat(path, structure): """Standard processing for creating distance matrix""" contacts = matFromBed(path, structure) dists = at.contactToDist(contacts, 4) at.makeSymmetric(dists) return dists / np.mean(dists) #normalize
chroms = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, "X") n = len(chroms) mmds_rs = np.zeros(n) cmds_rs = np.zeros(n) minimds_rs = np.zeros(n) mogen_rs = np.zeros(n) for i, chrom in enumerate(chroms): bedpath = "hic_data/GM12878_combined_{}_10kb.bed".format(chrom) mmds_structure = dt.structure_from_file( "hic_data/GM12878_combined_{}_10kb_mmds_coords.tsv".format(chrom)) contactMat = dt.matFromBed(bedpath, mmds_structure) mmds_true_mat = at.contactToDist(contactMat) at.makeSymmetric(mmds_true_mat) for j in range(len(mmds_true_mat)): #remove diagonal mmds_true_mat[j, j] = 0 mmds_distMat = misc.distMat(mmds_structure) mmds_rs[i] = misc.pearson(mmds_true_mat, mmds_distMat) cmds_structure = dt.structure_from_file( "hic_data/GM12878_combined_{}_10kb_cmds_coords.tsv".format(chrom)) contactMat = dt.matFromBed(bedpath, cmds_structure) cmds_true_mat = at.contactToDist(contactMat) at.makeSymmetric(cmds_true_mat) for j in range(len(cmds_true_mat)): #remove diagonal cmds_true_mat[j, j] = 0 cmds_distMat = misc.distMat(cmds_structure) cmds_rs[i] = misc.pearson(cmds_true_mat, cmds_distMat)