def interMDS(names, prefix, inter_res, intra_res, full, args): inter_res_string = tools.get_res_string(inter_res) intra_res_string = tools.get_res_string(intra_res) #get low-res structures from intra files low_structures = [] for name in names: path = "{}_{}_{}.bed".format(prefix, name, intra_res_string) chrom = dt.chromFromBed(path) #reduce res chrom.res = inter_res chrom.minPos = int(np.floor(float(chrom.minPos)/chrom.res)) * chrom.res #round chrom.maxPos = int(np.ceil(float(chrom.maxPos)/chrom.res)) * chrom.res low_structures.append(dt.structureFromBed(path, chrom)) #for correct indexing n = len(names) offsets = np.zeros(n, dtype=int) for i in range(1, n): offsets[i] = offsets[i-1] + len(low_structures[i-1].getPoints()) inter_mat = get_inter_mat(prefix, inter_res_string, intra_res_string, low_structures, offsets) #perform MDS at low resolution on all chroms infer_structures(inter_mat, low_structures, offsets, args[3], args[4]) #perform MDS at high resolution on each chrom high_structures = [] inferred_low_structures = [] ts = [] for true_low, name in zip(low_structures, names): path = "{}_{}_{}.bed".format(prefix, name, intra_res_string) if full: high_structure = mm.fullMDS(path, False, args[4], args[3]) else: high_structure = mm.partitionedMDS(path, args) high_structures.append(high_structure) inferred_low = dt.highToLow(high_structure, true_low.chrom.res/high_structure.chrom.res) inferred_low_structures.append(inferred_low) #rescale rescaling_factor = la.radius_of_gyration(true_low)/la.radius_of_gyration(inferred_low) rescaled_coords = [rescaling_factor * coord for coord in inferred_low.getCoords()] for i, point in enumerate(inferred_low.getPoints()): point.pos = rescaled_coords[i] r, t = la.getTransformation(inferred_low, true_low) high_structure.transform(r, None) #do not translate now (need to rescale) ts.append(t) #translate (with rescaling) low_rgs = np.array([la.radius_of_gyration(structure) for structure in low_structures]) high_rgs = np.array([la.radius_of_gyration(structure) for structure in high_structures]) scaling_factor = np.mean(high_rgs/low_rgs) for high_structure, t in zip(high_structures, ts): high_structure.transform(None, scaling_factor*t) #rescale translation return high_structures
def interMDS(names, inter_prefix, intra_prefix, inter_res, intra_res, intra_low_res, args): inter_res_string = tools.get_res_string(inter_res) intra_res_string = tools.get_res_string(intra_res) if intra_low_res is None: intra_low_res_string = None else: intra_low_res_string = tools.get_res_string(intra_low_res) #get low-res clusters from intra files low_clusters = [dt.clusterFromBed("{}_{}_{}.bed".format(intra_prefix, name, inter_res_string), None, None) for name in names] #for correct indexing n = len(names) offsets = np.zeros(n, dtype=np.int32) for i in range(1, n): offsets[i] = offsets[i-1] + len(low_clusters[i-1].getPoints()) inter_mat = get_inter_mat(intra_prefix, inter_prefix, inter_res, low_clusters, offsets) #perform MDS at low resolution on all chroms mm.infer_clusters(inter_mat, low_clusters, offsets, args[4]) #perform MDS at high resolution on each chrom high_clusters = [] inferred_low_clusters = [] ts = [] for true_low, name in zip(low_clusters, names): path = "{}_{}_{}.bed".format(intra_prefix, name, intra_res_string) if intra_low_res_string is None: high_cluster = mm.fullMDS(path, False, args[4]) else: low_path = "{}_{}_{}.bed".format(intra_prefix, name, intra_low_res_string) high_cluster = mm.partitionedMDS(path, low_path, args) high_clusters.append(high_cluster) inferred_low = dt.highToLow(high_cluster, true_low.chrom.res/high_cluster.chrom.res) inferred_low_clusters.append(inferred_low) #rescale rescaling_factor = la.radius_of_gyration(true_low)/la.radius_of_gyration(inferred_low) rescaled_coords = [rescaling_factor * coord for coord in inferred_low.getCoords()] for i, point in enumerate(inferred_low.getPoints()): point.pos = rescaled_coords[i] r, t = la.getTransformation(inferred_low, true_low) high_cluster.transform(r, None) #do not translate now (need to rescale) ts.append(t) #translate (with rescaling) low_rgs = np.array([la.radius_of_gyration(cluster) for cluster in low_clusters]) high_rgs = np.array([la.radius_of_gyration(cluster) for cluster in high_clusters]) scaling_factor = np.mean(high_rgs/low_rgs) for high_cluster, t in zip(high_clusters, ts): high_cluster.transform(None, scaling_factor*t) #rescale translation return high_clusters
def rescale(self): """Rescale radius of gyration of structure to 1""" rg = la.radius_of_gyration(self) for i, point in enumerate(self.points): if point != 0: x, y, z = point.pos self.points[i].pos = (x // rg, y // rg, z // rg)
def transform(trueLow, highSubstructure, res_ratio): #approximate as low resolution inferredLow = dt.highToLow(highSubstructure, res_ratio) scaling_factor = la.radius_of_gyration(trueLow) / la.radius_of_gyration( inferredLow) for i, point in enumerate(inferredLow.points): if point != 0: x, y, z = point.pos inferredLow.points[i].pos = (x * scaling_factor, y * scaling_factor, z * scaling_factor) #recover the transformation for inferred from true low structure r, t = la.getTransformation(inferredLow, trueLow) t /= scaling_factor #transform high structure highSubstructure.transform(r, t)
def partitionedMDS(path, args): """Partitions structure into substructures and performs MDS""" domainSmoothingParameter = args[0] minSizeFraction = args[1] maxmemory = args[2] num_threads = args[3] alpha = args[4] res_ratio = args[5] alpha2 = args[6] #create low-res structure low_chrom = dt.chromFromBed(path) low_chrom.res *= res_ratio lowstructure = dt.structureFromBed(path, low_chrom) #low global structure #get TADs low_contactMat = dt.matFromBed(path, lowstructure) low_tad_indices = tad.getDomains( low_contactMat, lowstructure, domainSmoothingParameter, minSizeFraction ) #low substructures, defined on relative indices not absolute indices tad.substructuresFromTads(lowstructure, low_tad_indices) #create high-res chrom size, res = dt.basicParamsFromBed(path) highChrom = dt.ChromParameters(lowstructure.chrom.minPos, lowstructure.chrom.maxPos, res, lowstructure.chrom.name, size) highstructure = dt.Structure([], [], highChrom, 0) high_substructures = [] low_gen_coords = lowstructure.getGenCoords() offset = 0 #initialize for td in low_tad_indices: start_gen_coord = low_gen_coords[td[0]] end_gen_coord = low_gen_coords[td[1]] high_substructure = dt.structureFromBed(path, highChrom, start_gen_coord, end_gen_coord, offset) high_substructures.append(high_substructure) offset += len(high_substructure.points) #update offset -= 1 highstructure.setstructures(high_substructures) infer_structure(low_contactMat, lowstructure, alpha, num_threads) print "Low-resolution MDS complete" highSubstructures = pymp.shared.list(highstructure.structures) lowSubstructures = pymp.shared.list(lowstructure.structures) numSubstructures = len(highstructure.structures) num_threads = min( (num_threads, mp.cpu_count(), numSubstructures) ) #don't exceed number of requested threads, available threads, or structures with pymp.Parallel(num_threads) as p: for substructurenum in p.range(numSubstructures): highSubstructure = highSubstructures[substructurenum] if len(highSubstructure.getPoints()) > 0: #skip empty trueLow = lowSubstructures[substructurenum] #perform MDS individually structure_contactMat = dt.matFromBed( path, highSubstructure) #contact matrix for this structure only infer_structure(structure_contactMat, highSubstructure, alpha2, num_threads) #approximate as low resolution inferredLow = dt.highToLow(highSubstructure, res_ratio) #rescale scaling_factor = la.radius_of_gyration( trueLow) / la.radius_of_gyration(inferredLow) for i, point in enumerate(inferredLow.points): if point != 0: x, y, z = point.pos inferredLow.points[i].pos = (x * scaling_factor, y * scaling_factor, z * scaling_factor) #recover the transformation for inferred from true low structure r, t = la.getTransformation(inferredLow, trueLow) t /= scaling_factor #transform high structure highSubstructure.transform(r, t) highSubstructures[substructurenum] = highSubstructure print "MDS performed on structure {} of {}".format( substructurenum + 1, numSubstructures) highstructure.setstructures(highSubstructures) return highstructure
def partitionedMDS(path, lowpath, args): """Partitions cluster into subclusters and performs MDS""" domainSmoothingParameter = args[0] minSizeFraction = args[1] maxmemory = args[2] num_threads = args[3] alpha = args[4] #create low-res cluster lowCluster = dt.clusterFromBed(lowpath, None, None) #get TADs low_contactMat = dt.matFromBed(lowpath, lowCluster) lowTads = tad.getDomains(low_contactMat, lowCluster, domainSmoothingParameter, minSizeFraction) #low subclusters #create high-res chrom size, res = dt.basicParamsFromBed(path) highChrom = dt.ChromParameters(lowCluster.chrom.minPos, lowCluster.chrom.maxPos, res, lowCluster.chrom.name, size) #create high-res cluster resRatio = lowCluster.chrom.res / highChrom.res highTads = lowTads * resRatio highCluster = dt.clusterFromBed(path, highChrom, highTads) #create compatible subclusters tad.subclustersFromTads(highCluster, lowCluster, lowTads) infer_cluster(low_contactMat, lowCluster, alpha) print "Low-resolution MDS complete" highSubclusters = pymp.shared.list(highCluster.clusters) lowSubclusters = pymp.shared.list(lowCluster.clusters) numSubclusters = len(highCluster.clusters) num_threads = min( (num_threads, mp.cpu_count(), numSubclusters) ) #don't exceed number of requested threads, available threads, or clusters with pymp.Parallel(num_threads) as p: for subclusternum in p.range(numSubclusters): highSubcluster = highSubclusters[subclusternum] trueLow = lowSubclusters[subclusternum] #perform MDS individually cluster_contactMat = dt.matFromBed( path, highSubcluster) #contact matrix for this cluster only infer_cluster(cluster_contactMat, highSubcluster, alpha) #approximate as low resolution inferredLow = dt.highToLow(highSubcluster, resRatio) #rescale scaling_factor = la.radius_of_gyration( trueLow) / la.radius_of_gyration(inferredLow) for i, point in enumerate(inferredLow.points): if point != 0: x, y, z = point.pos inferredLow.points[i].pos = (x * scaling_factor, y * scaling_factor, z * scaling_factor) #recover the transformation for inferred from true low cluster r, t = la.getTransformation(inferredLow, trueLow) t *= resRatio**(2. / 3) #rescale #transform high cluster highSubcluster.transform(r, t) highSubclusters[subclusternum] = highSubcluster print "MDS performed on cluster {} of {}".format( subclusternum + 1, numSubclusters) highCluster.setClusters(highSubclusters) return highCluster