import os import numpy as np import sys sys.path.append("..") import data_tools as dt import plotting as plot os.system( "python ../multimds.py -P 0.1 -w 0 ctrl_Scer_13_32kb.bed galactose_Scer_13_32kb.bed" ) struct1 = dt.structure_from_file("ctrl_Suva_13_32kb_structure.tsv") struct2 = dt.structure_from_file("galactose_Suva_13_32kb_structure.tsv") colors = np.zeros_like(struct1.getPoints(), dtype=int) colors[struct1.get_rel_index(852000)] = 1 plot.plot_structures_interactive((struct1, struct2), (colors, colors))
import sys sys.path.append("..") import data_tools as dt import numpy as np import plotting as plot cell_type = sys.argv[1] res_kb = int(sys.argv[2]) struct = dt.structure_from_file("{}_21_{}kb_structure.tsv".format( cell_type, res_kb)) #truncate start = 45000000 index = struct.chrom.getAbsoluteIndex(start) struct.points = struct.points[index:len(struct.points)] struct.chrom.minPos = start for i in range(len(struct.points)): if struct.points[i] != 0: struct.points[i].absolute_index -= index struct.set_rel_indices() colors = np.zeros_like(struct.getPoints(), dtype=int) colors[struct.get_rel_index(46900000):struct.get_rel_index(46950000)] = 2 colors[struct.get_rel_index(47475000)] = 1 plot.plot_structure_interactive(struct, colors, colormap="brg")
prefix2 = os.path.basename(path2.split(".")[0]) n = 10 all_r_sq = [] ps = np.arange(0, 0.1, 0.01) for p in ps: all_changes = [] for i in range(n): print(i) os.system("python ../multimds.py -P {} --full {} {}".format( p, path1, path2)) structure1 = dt.structure_from_file("{}_structure.tsv".format( os.path.basename(prefix1))) structure2 = dt.structure_from_file("{}_structure.tsv".format( os.path.basename(prefix2))) if p == 0: r, t = la.getTransformation(structure1, structure2) structure1.transform(r, t) all_changes.append( np.array([ la.calcDistance(coord1, coord2) for coord1, coord2 in zip( structure1.getCoords(), structure2.getCoords()) ])) r_sq = [] for i in range(n):
return misc.pearson(dists1, dists2) #labels = ("Chromosome3D", "mMDS", "miniMDS", "MOGEN", "HSA", "ChromSDE") labels = ("mMDS", "miniMDS", "MOGEN", "HSA") n = len(labels) rs = np.zeros(n) #Chromosome3D #coords1 = np.loadtxt("Chromosome3D/output_models/chr22_10kb_rep1/rep1_coords.tsv") #coords2 = np.loadtxt("Chromosome3D/output_models/chr22_10kb_rep1/rep2_coords.tsv") #rs[0] = rep_correlation(coords1, coords2) #mMDS coords1 = dt.structure_from_file( "hic_data/GM12878_combined_22_10kb_mmds_rep1.tsv").getCoords() coords2 = dt.structure_from_file( "hic_data/GM12878_combined_22_10kb_mmds_rep2.tsv").getCoords() #rs[1] = rep_correlation(coords1, coords2) rs[0] = rep_correlation(coords1, coords2) #miniMDS coords1 = dt.structure_from_file( "hic_data/GM12878_combined_22_10kb_minimds_rep1.tsv").getCoords() coords2 = dt.structure_from_file( "hic_data/GM12878_combined_22_10kb_minimds_rep2.tsv").getCoords() #rs[2] = rep_correlation(coords1, coords2) rs[1] = rep_correlation(coords1, coords2) #MOGEN coords1 = np.loadtxt(
chromatinDiameter = 30 #diameter of heterochromatin (nm) totDist = 0 count = 0 n = len(coords) for i in range(1, n): totDist += la.calcDistance(coords[i - 1], coords[i]) count += 1 avgDist = totDist / count #average distance between neighboring loci physicalDist = kl * (res / bpPerKL)**( 1. / 2) #physical distance between neighboring loci (nm) conversionFactor = avgDist / physicalDist return chromatinDiameter / 2 * conversionFactor mmds_structure = dt.structure_from_file( "hic_data/GM12878_combined_22_10kb_mmds_coords.tsv") cmds_structure = dt.structure_from_file( "hic_data/GM12878_combined_22_10kb_cmds_coords.tsv") minimds_structure = dt.structure_from_file( "hic_data/GM12878_combined_22_10kb_minimds_coords.tsv") mmds_res = mmds_structure.chrom.res cmds_res = cmds_structure.chrom.res minimds_res = minimds_structure.chrom.res assert mmds_res == cmds_res == minimds_res res = mmds_res plot.plot_structure_interactive(mmds_structure, out_path="Fig9A.png") plot.plot_structure_interactive(cmds_structure, out_path="Fig9B.png")
import sys sys.path.append("..") import data_tools as dt import plotting as plot struct1 = dt.structure_from_file("GM12878_combined_21_100kb_structure.tsv") struct2 = dt.structure_from_file("K562_21_100kb_structure.tsv") plot.plot_structures_interactive((struct1, struct2))
from scipy import stats as st import misc chroms = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, "X") n = len(chroms) mmds_rs = np.zeros(n) cmds_rs = np.zeros(n) minimds_rs = np.zeros(n) mogen_rs = np.zeros(n) for i, chrom in enumerate(chroms): bedpath = "hic_data/GM12878_combined_{}_10kb.bed".format(chrom) mmds_structure = dt.structure_from_file( "hic_data/GM12878_combined_{}_10kb_mmds_coords.tsv".format(chrom)) contactMat = dt.matFromBed(bedpath, mmds_structure) mmds_true_mat = at.contactToDist(contactMat) at.makeSymmetric(mmds_true_mat) for j in range(len(mmds_true_mat)): #remove diagonal mmds_true_mat[j, j] = 0 mmds_distMat = misc.distMat(mmds_structure) mmds_rs[i] = misc.pearson(mmds_true_mat, mmds_distMat) cmds_structure = dt.structure_from_file( "hic_data/GM12878_combined_{}_10kb_cmds_coords.tsv".format(chrom)) contactMat = dt.matFromBed(bedpath, cmds_structure) cmds_true_mat = at.contactToDist(contactMat) at.makeSymmetric(cmds_true_mat) for j in range(len(cmds_true_mat)): #remove diagonal cmds_true_mat[j, j] = 0
import linear_algebra as la from matplotlib import pyplot as plt import numpy as np gene_name = sys.argv[1] chrom_num = sys.argv[2] gene_loc = int(sys.argv[3]) strain = sys.argv[4] prefix = sys.argv[5] res_kb = 32 chrom_name = "{}_{}".format(strain, chrom_num) all_dists = [] for iteration in range(1, 11): struct1 = dt.structure_from_file("{}_{}ctrl_{}_{}kb_structure.tsv".format( iteration, prefix, chrom_name, res_kb)) struct2 = dt.structure_from_file( "{}_{}galactose_{}_{}kb_structure.tsv".format(iteration, prefix, chrom_name, res_kb)) #dt.make_compatible((struct1, struct2)) #struct1.rescale() #struct2.rescale() #r, t = la.getTransformation(struct1, struct2) #struct1.transform(r,t) all_dists.append([ la.calcDistance(coord1, coord2) for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords()) ])
chrom = 21 res_kb = 100 prefix1 = "GM12878_combined" prefix2 = "K562" path1 = "hic_data/{}_{}_{}kb.bed".format(prefix1, chrom, res_kb) path2 = "hic_data/{}_{}_{}kb.bed".format(prefix2, chrom, res_kb) ps = np.arange(0, 0.6, 0.1) errors = np.zeros_like(ps) for i, p in enumerate(ps): os.system("python ../multimds.py -P {} {} {}".format(p, path1, path2)) structure1 = dt.structure_from_file("{}_{}_{}kb_structure.tsv".format( prefix1, chrom, res_kb)) structure2 = dt.structure_from_file("{}_{}_{}kb_structure.tsv".format( prefix2, chrom, res_kb)) dists1 = dt.normalized_dist_mat(path1, structure1) dists2 = dt.normalized_dist_mat(path2, structure2) errors[i] = np.mean( (error(dists1, structure1.getCoords()), error(dists1, structure1.getCoords()))) xs = ps x_int_size = 0.1 ys = errors y_int_size = 0.05 x_start = min(xs) - x_int_size / 4.
for i, comparison in enumerate(comparisons): print comparison with open("{}_design.txt".format(comparison)) as infile: for line in infile: prefix1, prefix2 = line.strip().split() for chrom in range(1, 23): path1 = "hic_data/{}_{}_100kb.bed".format(prefix1, chrom) path2 = "hic_data/{}_{}_100kb.bed".format(prefix2, chrom) if os.path.isfile(path1) and os.path.isfile(path2): os.system("python ../multimds.py {} {}".format( path1, path2)) #load structures structure1 = dt.structure_from_file( "{}_{}_100kb_structure.tsv".format(prefix1, chrom)) structure2 = dt.structure_from_file( "{}_{}_100kb_structure.tsv".format(prefix2, chrom)) dists = [ la.calcDistance(coord1, coord2) for coord1, coord2 in zip(structure1.getCoords(), structure2.getCoords()) ] boxes[i].append(np.mean(dists)) infile.close() #start with a frameless plot (extra room on the left) plt.subplot2grid((10, 10), (0, 0), 9, 10, frameon=False)
import sys sys.path.append("..") import data_tools as dt res_kb = int(sys.argv[1]) with open("chrom_sizes_{}kb.txt".format(res_kb), "w") as out: for chrom in [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, "X" ]: structure = dt.structure_from_file( "hic_data/GM12878_combined_chr{}_{}kb_structure.tsv".format( chrom, res_kb)) out.write(str(len(structure.getPoints())) + "\n") out.close()
import sys sys.path.append("../..") import data_tools as dt from matplotlib import pyplot as plt import linear_algebra as la struct1 = dt.structure_from_file("sim1_chr21_100kb_structure.tsv") struct2 = dt.structure_from_file("sim2_chr21_100kb_structure.tsv") #dt.make_compatible((struct1, struct2)) #struct1.rescale() #struct2.rescale() #r, t = la.getTransformation(struct1, struct2) #struct1.transform(r,t) gen_coords = struct1.getGenCoords() dists = [ la.calcDistance(coord1, coord2) for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords()) ] plt.subplot2grid((10, 10), (0, 0), 9, 10, frameon=False) plt.plot(gen_coords, dists, lw=2) #define offsets xmin = min(gen_coords) xmax = max(gen_coords) x_range = xmax - xmin x_start = xmin - x_range / 25. x_end = xmax + x_range / 25.
import sys sys.path.append("..") import plotting as plot import data_tools as dt chroms = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, "X"] structures = [dt.structure_from_file("hic_data/GM12878_combined_chr{}_10kb_structure.tsv".format(chrom)) for chrom in chroms] plot.plot_structures_interactive(structures, out_path="Fig10.png")
x_means = [] y_means = [] z_means = [] x_lengths = [] y_lengths = [] z_lengths = [] for chrom in chroms: path1 = "hic_data/{}_{}_{}kb.bed".format(cell_type1, chrom, res_kb) path2 = "hic_data/{}_{}_{}kb.bed".format(cell_type2, chrom, res_kb) if os.path.isfile(path1) and os.path.isfile(path2): os.system("python ~/git/multimds/multimds.py --full {} {}".format( path1, path2)) structure1 = dt.structure_from_file( "hic_data/{}_{}_{}kb_structure.tsv".format(cell_type1, chrom, res_kb)) structure2 = dt.structure_from_file( "hic_data/{}_{}_{}kb_structure.tsv".format(cell_type2, chrom, res_kb)) #plot.plot_structures_interactive((structure1, structure2)) #compartments contacts1 = dt.matFromBed(path1, structure1) contacts2 = dt.matFromBed(path2, structure2) at.makeSymmetric(contacts1) at.makeSymmetric(contacts2) compartments1 = np.array(ca.get_compartments(contacts1))
import sys sys.path.append("..") import data_tools as dt import linear_algebra as la from matplotlib import pyplot as plt import numpy as np import compartment_analysis as ca from scipy import stats as st cell_type1 = sys.argv[1] cell_type2 = sys.argv[2] res_kb = int(sys.argv[3]) struct1 = dt.structure_from_file("{}_21_{}kb_structure.tsv".format( cell_type1, res_kb)) struct2 = dt.structure_from_file("{}_21_{}kb_structure.tsv".format( cell_type2, res_kb)) gen_coords = np.array(struct1.getGenCoords()) dists = np.array([ la.calcDistance(coord1, coord2) for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords()) ]) mat1 = dt.matFromBed("hic_data/{}_21_{}kb.bed".format(cell_type1, res_kb), struct1) comps1 = ca.get_compartments(mat1, struct1) mat2 = dt.matFromBed("hic_data/{}_21_{}kb.bed".format(cell_type2, res_kb), struct2) comps2 = ca.get_compartments(mat2, struct2) r, p = st.pearsonr(comps1, comps2)
import sys sys.path.append("..") import plotting as plot import data_tools as dt chroms = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, "X" ] structures = [ dt.structure_from_file( "hic_data/GM12878_combined_{}_10kb_structure.tsv".format(chrom)) for chrom in chroms ] plot.plot_structures_interactive(structures, out_path="Fig10.png")
import sys sys.path.append("..") import data_tools as dt import numpy as np from mayavi import mlab gene_name = sys.argv[1] chrom_num = sys.argv[2] gene_loc = int(sys.argv[3]) strain = sys.argv[4] res_kb = 32 chrom_name = "{}_{}".format(strain, chrom_num) struct1 = dt.structure_from_file("ctrl_{}_{}kb_structure.tsv".format( chrom_name, res_kb)) struct2 = dt.structure_from_file("galactose_{}_{}kb_structure.tsv".format( chrom_name, res_kb)) coords1 = np.array(struct1.getCoords()) coords2 = np.array(struct2.getCoords()) colors = np.zeros_like(struct1.getPoints(), dtype=int) colors[struct1.get_rel_index(gene_loc)] = 1 mlab.figure(bgcolor=(1, 1, 1)) line = mlab.plot3d(coords1[:, 0], coords1[:, 1], coords1[:, 2], colors) lut = line.module_manager.scalar_lut_manager.lut.table.to_array() lut[0] = (0, 0, 255, 128) lut[1:len(lut)] = (255, 0, 0, 128) line.module_manager.scalar_lut_manager.lut.table = lut line = mlab.plot3d(coords2[:, 0], coords2[:, 1], coords2[:, 2], colors)