Exemplo n.º 1
0
y_means = []
z_means = []
x_lengths = []
y_lengths = []
z_lengths = []

with open(design_file) as infile:
    for line in infile:
        cell_type1, cell_type2 = line.strip().split()
        for chrom in chroms:
            path1 = "hic_data/{}_{}_{}kb.bed".format(cell_type1, chrom, res_kb)
            path2 = "hic_data/{}_{}_{}kb.bed".format(cell_type2, chrom, res_kb)

            if os.path.isfile(path1) and os.path.isfile(path2):
                structure1, structure2 = multimds.full_mds(path1,
                                                           path2,
                                                           penalty=penalty)

                structure1.rescale()
                structure2.rescale()
                r, t = la.getTransformation(structure1, structure2)
                structure1.transform(r, t)

                #compartments
                contacts1 = dt.matFromBed(path1, structure=structure1)
                contacts2 = dt.matFromBed(path2, structure=structure2)

                compartments1 = np.array(
                    ca.get_compartments(contacts1, structure1))
                compartments2 = np.array(
                    ca.get_compartments(contacts2, structure2))
Exemplo n.º 2
0
from multimds import compartment_analysis as ca
from multimds import data_tools as dt
from scipy import stats as st
from matplotlib import pyplot as plt
import numpy as np
from multimds import linear_algebra as la
from scipy import signal as sg
from multimds import multimds as mm

path1 = "hic_data/GM12878_combined_19_100kb.bed"
path2 = "hic_data/K562_19_100kb.bed"

struct1, struct2 = mm.full_mds(path1, path2, prefix="test_")

mat1 = dt.matFromBed(
    "hic_data/GM12878_combined_{}_{}kb.bed".format(chrom, res_kb), struct1)
comps1 = ca.get_compartments(mat1, struct1)
mat2 = dt.matFromBed("hic_data/K562_{}_{}kb.bed".format(chrom, res_kb),
                     struct2)
comps2 = ca.get_compartments(mat2, struct2)

r, p = st.pearsonr(comps1, comps2)
if r < 0:
    comps1 = -comps1

comp_diffs = np.abs(comps1 - comps2)

dists = np.array([
    la.calcDistance(coord1, coord2)
    for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords())
])
from multimds import multimds
import sys

chrom = sys.argv[1]

multimds.full_mds("hic_data/GM12878_combined_{}_100kb.bed".format(chrom), "hic_data/K562_{}_100kb.bed".format(chrom))
Exemplo n.º 4
0
from multimds import multimds
import sys

iteration = sys.argv[1]
chrom = sys.argv[2]
strain = sys.argv[3]

multimds.full_mds("hic_data/ctrl_{}_{}_32kb.bed".format(strain, chrom),
                  "hic_data/galactose_{}_{}_32kb.bed".format(strain, chrom),
                  weight=0,
                  penalty=0.1,
                  prefix=iteration + "_")
Exemplo n.º 5
0
import numpy as np
from multimds import plotting as plot
from multimds import multimds as mm

mappability = np.loadtxt("mappability_21_5kb.bed", usecols=3)

#struct1 = dt.structure_from_file("GM12878_combined_21_5kb_structure.tsv")
#struct2 = dt.structure_from_file("K562_21_5kb_structure.tsv")

struct1, struct2 = mm.full_mds("hic_data/GM12878_combined_21_5kb.bed",
                               "hic_data/K562_21_5kb.bed")

mappability = mappability[
    struct1.chrom.minPos / struct1.chrom.res +
    struct1.nonzero_abs_indices()]  #only loci in structures
mappable = np.where(mappability > 0.75)
struct1.points = struct1.points[mappable]
struct2.points = struct2.points[mappable]

plot.plot_structures_interactive((struct1, struct2))
Exemplo n.º 6
0
from multimds import multimds
import sys

chrom = sys.argv[1]
strain = sys.argv[2]

multimds.full_mds("hic_data/ctrl_{}_{}_32kb.bed".format(strain, chrom),
                  "hic_data/galactose_{}_{}_32kb.bed".format(strain, chrom),
                  weight=0,
                  penalty=0.1)
Exemplo n.º 7
0
import sys
from multimds import multimds

celltype1 = sys.argv[1]
celltype2 = sys.argv[2]

multimds.full_mds("hic_data/{}_21_100kb.bed".format(celltype1),
                  "hic_data/{}_21_100kb.bed".format(celltype2))
Exemplo n.º 8
0
from multimds import plotting as plot
from multimds import multimds as mm

struct1, struct2 = mm.full_mds("ctrl_Scer_12_32kb.bed",
                               "galactose_Scer_12_32kb.bed",
                               weight=0,
                               penalty=0.1)
plot.plot_structures_interactive((struct1, struct2), out_path="sup6a.png")

struct1, struct2 = mm.full_mds("ctrl_Scer_12-upstream_32kb.bed",
                               "galactose_Scer_12-upstream_32kb.bed",
                               weight=0,
                               penalty=0.1)
plot.plot_structures_interactive((struct1, struct2),
                                 out_path="sup6b_upstream.png")

struct1, struct2 = mm.full_mds("ctrl_Scer_12-downstream_32kb.bed",
                               "galactose_Scer_12-downstream_32kb.bed",
                               weight=0,
                               penalty=0.1)
plot.plot_structures_interactive((struct1, struct2),
                                 out_path="sup6b_downstream.png")
Exemplo n.º 9
0
path1 = sys.argv[1]
path2 = sys.argv[2]

prefix1 = os.path.basename(path1.split(".")[0])
prefix2 = os.path.basename(path2.split(".")[0])

n = 10

all_r_sq = []

ps = np.arange(0, 0.1, 0.01)

for p in ps:
	all_changes = []
	for i in range(n):
		multimds.full_mds(path1, path2, penalty=p)

		structure1 = dt.structure_from_file("{}_structure.tsv".format(os.path.basename(prefix1)))
		structure2 = dt.structure_from_file("{}_structure.tsv".format(os.path.basename(prefix2)))
		
		if p == 0:
			r, t = la.getTransformation(structure1, structure2)
			structure1.transform(r,t)

		all_changes.append(np.array([la.calcDistance(coord1, coord2) for coord1, coord2 in zip(structure1.getCoords(), structure2.getCoords())]))

	r_sq = []
	for i in range(n):
		for j in range(i):
			r, p = st.pearsonr(all_changes[i], all_changes[j])
			r_sq.append(r**2)
Exemplo n.º 10
0
import matplotlib
from matplotlib import pyplot as plt
from multimds import multimds
from multimds import linear_algebra as la

struct1, struct2 = multimds.full_mds("sim1_chr21_100kb.bed", "sim2_chr21_100kb.bed")

gen_coords = struct1.getGenCoords()
dists = [la.calcDistance(coord1, coord2) for coord1, coord2 in zip(struct1.getCoords(), struct2.getCoords())]

plt.subplot2grid((10,10), (0,0), 9, 10, frameon=False)
plt.plot(gen_coords, dists, lw=2)

#define offsets
xmin = min(gen_coords)
xmax = max(gen_coords)
x_range = xmax - xmin
x_start = xmin - x_range/25.
x_end = xmax + x_range/25.

ymin = 0
ymax = max(dists)
y_range = ymax - ymin
y_start = ymin - y_range/25.
y_end = ymax + y_range/25.

#define axes with offsets
plt.axis([x_start, x_end, y_start, y_end], frameon=False)

#plot axes (black with line width of 4)
plt.axvline(x=x_start, color="k", lw=4)
Exemplo n.º 11
0
res_kb = 100
prefix1 = "GM12878_combined"
prefix2 = "K562"

path1 = "hic_data/{}_{}_{}kb.bed".format(prefix1, chrom, res_kb)
path2 = "hic_data/{}_{}_{}kb.bed".format(prefix2, chrom, res_kb)

size1 = dt.size_from_bed(path1)
size2 = dt.size_from_bed(path2)

ps = np.arange(0, 0.6, 0.1)
errors = np.zeros_like(ps)

for i, p in enumerate(ps):
    #os.system("python ../multimds.py -P {} {} {}".format(p, path1, path2))
    structure1, structure2 = mm.full_mds(path1, path2, penalty=p)
    #structure1 = dt.structure_from_file("{}_{}_{}kb_structure.tsv".format(prefix1, chrom, res_kb))
    #structure2 = dt.structure_from_file("{}_{}_{}kb_structure.tsv".format(prefix2, chrom, res_kb))

    dists1 = dt.distmat(path1, structure1, size1)
    dists2 = dt.distmat(path2, structure2, size2)

    errors[i] = np.mean(
        (error(dists1,
               structure1.getCoords()), error(dists1, structure1.getCoords())))

xs = ps
x_int_size = 0.1
ys = errors
y_int_size = 0.05
x_start = min(xs) - x_int_size / 4.
Exemplo n.º 12
0
from multimds import plotting as plot
from multimds import multimds as mm

struct1, struct2 = mm.full_mds("hic_data/GM12878_combined_21_10kb.bed", "hic_data/K562_21_10kb.bed", weight=0)

plot.plot_structures_interactive((struct1, struct2))
Exemplo n.º 13
0
from multimds import data_tools as dt
import numpy as np
from multimds import compartment_analysis as ca
from sklearn import svm
from multimds import linear_algebra as la
from mayavi import mlab
from multimds import multimds as mm

path1 = "hic_data/GM12878_combined_21_100kb.bed"
path2 = "hic_data/K562_21_100kb.bed"

struct1, struct2 = mm.full_mds(path1, path2)

contacts1 = dt.matFromBed(path1, struct1)
enrichments1 = np.loadtxt("binding_data/GM12878_21_100kb_active_coverage.bed",
                          usecols=6)
bin_nums1 = struct1.nonzero_abs_indices() + int(
    struct1.chrom.minPos / struct1.chrom.res)
enrichments1 = enrichments1[bin_nums1]
comps1 = np.array(ca.get_compartments(contacts1, struct1, enrichments1))

contacts2 = dt.matFromBed(path2, struct2)
enrichments2 = np.loadtxt("binding_data/K562_21_100kb_active_coverage.bed",
                          usecols=6)
bin_nums2 = struct2.nonzero_abs_indices() + int(
    struct2.chrom.minPos / struct2.chrom.res)
enrichments2 = enrichments2[bin_nums2]
comps2 = np.array(ca.get_compartments(contacts2, struct2, enrichments2))

coords1 = struct1.getCoords()
coords2 = struct2.getCoords()