def distsFromCoords(coords): """Creates distance matrix from 3D coords""" n = len(coords) distMat = np.zeros((n, n)) for i in range(n): for j in range(i): distMat[i, j] = la.calcDistance(coords[i], coords[j]) return distMat
def distMat(self): """Creates distance matrix from cluster""" points = self.getPoints() numPoints = len(points) mat = np.zeros((numPoints, numPoints)) for i in range(numPoints): for j in range(i): mat[i, j] = la.calcDistance(points[i].pos, points[j].pos) return mat
def distsFromCoords(coords): """Creates distance matrix from 3D coords""" n = len(coords) distMat = np.zeros((n,n)) for i in range(n): for j in range(i): distMat[i,j] = la.calcDistance(coords[i], coords[j]) if distMat[i,j] == 0: print "Error. Duplicate coordinates." print coords[i] print coords[j] sys.exit(0) return distMat
def error(dists, coords): assert len(dists) == len(coords) n = len(dists) sse = 0 count = 0 for i in range(n): for j in range(i): embedded_dist = la.calcDistance(coords[i], coords[j]) sse += (embedded_dist - dists[i, j])**2 count += 1 mse = sse / count rmse = mse**(1. / 2) return rmse
def calculateRadius(structures): """Calculate to-scale radius based on Kuhn length and diameter of chromatin""" conversionFactors = np.zeros(len(structures)) for j, structure in enumerate(structures): totDist = 0 coords = structure.getCoords() n = len(coords) for i in range(1, n): totDist += la.calcDistance(coords[i-1], coords[i]) avgDist = totDist/(n-1) #average distance between neighboring loci physicalDist = kl * (structure.chrom.res/bpPerKL)**(1./2) #physical distance between neighboring loci (nm) conversionFactors[j] = avgDist/physicalDist conversionFactor = np.mean(conversionFactors) return chromatinDiameter/2 * conversionFactor
def calculateRadius(coords, res): """Calculate to-scale radius based on Kuhn length and diameter of chromatin""" #from Rippe (2001) kl = 289 #Kuhn length (nm) bpPerKL = 30000. #base pairs per Kuhn length chromatinDiameter = 30 #diameter of heterochromatin (nm) totDist = 0 count = 0 n = len(coords) for i in range(1, n): totDist += la.calcDistance(coords[i-1], coords[i]) count += 1 avgDist = totDist/count #average distance between neighboring loci physicalDist = kl * (res/bpPerKL)**(1./2) #physical distance between neighboring loci (nm) conversionFactor = avgDist/physicalDist return chromatinDiameter/2 * conversionFactor
def rmsd(cluster1, cluster2): """Root mean square distance""" assert cluster1.chrom.res == cluster2.chrom.res res = cluster1.chrom.res assert cluster1.chrom.minPos / res == cluster2.chrom.minPos / res #indexing must be same intersection = [ num for num in cluster1.getPointNums() if num in cluster2.getPointNums() ] dist_sum = 0 for num in intersection: point1 = cluster1.points[num - cluster1.offset] point2 = cluster2.points[num - cluster2.offset] dist_sum += la.calcDistance(point1.pos, point2.pos)**2 msd = dist_sum / len(intersection) #mean square distance return msd**(1. / 2) #root mean square distance
def calculateRadius(clusters): """Calculate to-scale radius based on Kuhn length and diameter of chromatin""" conversionFactors = np.zeros(len(clusters)) clusterNum = 0 for cluster in clusters: totDist = 0 count = 0 coords = cluster.getCoords() n = len(coords) for i in range(1, n): totDist += la.calcDistance(coords[i - 1], coords[i]) count += 1 avgDist = totDist / count #average distance between neighboring loci physicalDist = kl * (cluster.chrom.res / bpPerKL)**( 1. / 2) #physical distance between neighboring loci (nm) conversionFactors[clusterNum] = avgDist / physicalDist clusterNum += 1 conversionFactor = np.mean(conversionFactors) return chromatinDiameter / 2 * conversionFactor
print(i) os.system("python ../multimds.py -P {} --full {} {}".format( p, path1, path2)) structure1 = dt.structure_from_file("{}_structure.tsv".format( os.path.basename(prefix1))) structure2 = dt.structure_from_file("{}_structure.tsv".format( os.path.basename(prefix2))) if p == 0: r, t = la.getTransformation(structure1, structure2) structure1.transform(r, t) all_changes.append( np.array([ la.calcDistance(coord1, coord2) for coord1, coord2 in zip( structure1.getCoords(), structure2.getCoords()) ])) r_sq = [] for i in range(n): for j in range(i): r, p = st.pearsonr(all_changes[i], all_changes[j]) r_sq.append(r**2) all_r_sq.append(r_sq) ys = all_r_sq #start with a frameless plot (extra room on the left) plt.subplot2grid((10, 10), (0, 0), 9, 10, frameon=False)
from matplotlib import pyplot as plt import numpy as np import compartment_analysis as ca from scipy import stats as st cell_type1 = sys.argv[1] cell_type2 = sys.argv[2] res_kb = int(sys.argv[3]) struct1 = dt.structure_from_file("{}_21_{}kb_structure.tsv".format( cell_type1, res_kb)) struct2 = dt.structure_from_file("{}_21_{}kb_structure.tsv".format( cell_type2, res_kb)) gen_coords = np.array(struct1.getGenCoords()) dists = np.array([ la.calcDistance(coord1, coord2) for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords()) ]) mat1 = dt.matFromBed("hic_data/{}_21_{}kb.bed".format(cell_type1, res_kb), struct1) comps1 = ca.get_compartments(mat1, struct1) mat2 = dt.matFromBed("hic_data/{}_21_{}kb.bed".format(cell_type2, res_kb), struct2) comps2 = ca.get_compartments(mat2, struct2) r, p = st.pearsonr(comps1, comps2) if r < 0: comps1 = -comps1 comp_diffs = np.abs(comps1 - comps2)
def main(): parser = argparse.ArgumentParser( description= "Jointly reconstruct 3D coordinates from two normalized intrachromosomal Hi-C BED files." ) parser.add_argument("path1", help="path to first intrachromosomal Hi-C BED file") parser.add_argument("path2", help="path to second intrachromosomal Hi-C BED file") parser.add_argument("--partitioned", action="store_true", help="use partitioned MDS (default: full MDS)") parser.add_argument("-l", type=int, help="low resolution/high resolution", default=10) parser.add_argument("-o", help="output file prefix") parser.add_argument("-r", default=32000000, help="maximum RAM to use (in kb)") parser.add_argument("-n", type=int, default=3, help="number of threads") parser.add_argument( "-a", type=float, default=4, help= "alpha factor for converting contact frequencies to physical distances" ) parser.add_argument("-P", type=float, default=0.05, help="joint MDS penalty") parser.add_argument("-m", type=int, default=0, help="midpoint (usually centromere) for partitioning") parser.add_argument("-N", type=int, default=2, help="number of partitions") parser.add_argument("-w", type=float, default=0.05, help="weight of distance decay prior") args = parser.parse_args() if args.partitioned: #TODO: cleanup params = (args.m, args.N, args.r, args.n, args.a, args.l, args.P, args.w) names = ("Midpoint", "Number of partitions", "Maximum memory", "Number of threads", "Alpha", "Resolution ratio", "Penalty", "Weight") intervals = ((None, None), (1, None), (0, None), (0, None), (1, None), (1, None), (0, None), (0, 1)) if not tools.args_are_valid(params, names, intervals): sys.exit(0) structure1, structure2 = partitionedMDS(args.path1, args.path2, params) else: structure1, structure2 = fullMDS(args.path1, args.path2, args.a, args.P, args.n, args.w) if args.o: prefix = args.o else: prefix = "" #print("structure 1") #for i in range(len(structure1.points)): # if structure1.points[i] != 0: # print(structure1.points[i].relative_index) #print("structure 2") #for i in range(len(structure2.points)): # if structure2.points[i] != 0: # print(structure2.points[i].relative_index) prefix1 = os.path.splitext(os.path.basename(args.path1))[0] structure1.write("{}{}_structure.tsv".format(prefix, prefix1)) prefix2 = os.path.splitext(os.path.basename(args.path2))[0] structure2.write("{}{}_structure.tsv".format(prefix, prefix2)) coords1 = np.array(structure1.getCoords()) coords2 = np.array(structure2.getCoords()) dists = [ la.calcDistance(coord1, coord2) for coord1, coord2 in zip(coords1, coords2) ] np.savetxt("{}{}_{}_relocalization.bed".format(prefix, prefix1, prefix2), dists) print("Fractional compartment change: ") print( calculate_compartment_fraction(structure1, structure2, args.path1, args.path2))
chrom_num = sys.argv[2] gene_loc = int(sys.argv[3]) prefix1 = sys.argv[4] prefix2 = sys.argv[5] res_kb = 32 max_dists = [] max_gencoords = [] plt.subplot2grid((10,10), (0,0), 9, 10, frameon=False) for strain in ("Scer", "Suva"): chrom_name = "{}_{}".format(strain, chrom_num) os.system("python ~/git/multimds/multimds.py --full -P 0.1 -w 0 {}_{}_{}kb.bed {}_{}_{}kb.bed".format(prefix1, chrom_name, res_kb, prefix2, chrom_name, res_kb)) struct1 = dt.structure_from_file("{}_{}_{}kb_structure.tsv".format(prefix1, chrom_name, res_kb)) struct2 = dt.structure_from_file("{}_{}_{}kb_structure.tsv".format(prefix2, chrom_name, res_kb)) dists = [la.calcDistance(coord1, coord2) for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords())] max_dists.append(max(dists)) max_gencoords.append(max(struct1.getGenCoords())) plt.plot(struct1.getGenCoords(), dists, label=strain, lw=4) x_int_size = 200000 ys = dists y_int_size = 0.01 x_start = -x_int_size/4. x_end = max(max_gencoords) + x_int_size/5. y_start = -y_int_size/5. y_end = max(max_dists) + y_int_size/5. plt.title("chr{}".format(chrom_num), fontsize=14) plt.xlabel("Genomic coordinate", fontsize=14) plt.ylabel("Relocalization", fontsize=14)