Пример #1
0
def correctedScalingPlot(resolution, filename, experiment, genome, mouse=False, **kwargs):
    "Paper figure to compare scaling before/after correction"
    
    global pp
    if (options.verbose):
        print >> sys.stdout, "correctedScalingPlot: res: %d file1: %s exp1:%s gen:%s" % (resolution, filename, experiment, genome)

    plt.figure()
    Tanay = binnedDataAnalysis(resolution, genome)
    Tanay.simpleLoad(filename, experiment)
    Tanay.removePoorRegions()
    Tanay.removeDiagonal()
    Tanay.plotScaling(experiment, label="Raw data", color="#A7A241")
    Tanay.iterativeCorrectWithSS()
    Tanay.plotScaling(experiment, label="Corrected", color="#344370")
    ax = plt.gca()
    plotting.removeAxes()
    fs = 6
    plt.xlabel("Genomic distance (MB)", fontsize=6)
    plt.ylabel("Contact probability", fontsize=6)
    for xlabel_i in ax.get_xticklabels():
        xlabel_i.set_fontsize(fs)
    for xlabel_i in ax.get_yticklabels():
        xlabel_i.set_fontsize(fs)
    legend = plt.legend(loc=0, prop={"size": 6})
    legend.draw_frame(False)
    plt.xscale("log")
    plt.yscale("log")
    plt.show()
    pp.savefig()
Пример #2
0
def doArmPlot(resolution, filename, experiment, genome, mouse=False, **kwargs):
    "Plot an single interarm map - paper figure"

    global pp
    plt.figure()
    if (options.verbose):
        print >> sys.stdout, "doArmPlot: res: %d file: %s exp:%s gen:%s" % (resolution, filename, experiment, genome)

    Tanay = binnedDataAnalysis(resolution, genome)
    Tanay.simpleLoad(filename, experiment)
    if mouse == True:
        Tanay.fakeTranslocations([(0, 0, None, 12, 52000000, None),
                                  (4, 45000000, None, 12, 0, 30000000),
                                  (9, 0, 50000000, 12, 0, 35000000)])
        Tanay.removeChromosome(19)
    else:
        Tanay.removeChromosome(22)
    Tanay.removeDiagonal(1)
    Tanay.removePoorRegions()
    Tanay.truncTrans()
    Tanay.fakeCis()
    #mat_img(Tanay.dataDict["GM-all"])
    #plt.figure(figsize = (3.6,3.6))
    Tanay.averageTransMap(experiment, **kwargs)

    #plotting.removeBorder()
    cb = plt.colorbar(orientation="vertical")
    #cb.set_ticks([-0.05,0.05,0.15])
    for xlabel_i in cb.ax.get_xticklabels():
        xlabel_i.set_fontsize(6)
Пример #3
0
def plotSixHeatmaps():
    "Plots 6 heatmaps to a correlation supplementary figure"
    plt.figure(figsize=(3, 3))
    for size in [10]:
        BD = binnedDataAnalysis(size * 1000000, "../../../data/hg18")
        BD.simpleLoad("../../../tcc/working/HindIII_%d.hm" % size, "HindIII")
        BD.simpleLoad("../../../tcc/working/NcoI_%d.hm" % size, "NcoI")
        BD.simpleLoad("../../../tcc/working/control_%d.hm" % size, "control")
        BD.removeDiagonal()
        BD.removePoorRegions(cutoff=5)
        BD.removeZeros()

        beg1 = BD.chromosomeStarts[0]
        beg2 = BD.chromosomeStarts[1]
        end1 = BD.chromosomeEnds[0] - 1
        end2 = BD.chromosomeEnds[1] - 1

        data1 = BD.dataDict["HindIII"][beg1:end1, beg2:end2]
        data2 = BD.dataDict["NcoI"][beg1:end1, beg2:end2]
        data3 = BD.dataDict["control"][beg1:end1, beg2:end2]

        def minmax(*args):
            mi = min([i.min() for i in args])
            ma = max([i.max() for i in args])
            return mi, ma
        vmin, vmax = minmax(data1, data2)

        plt.subplot(321)
        plt.imshow(data1, interpolation="nearest", vmin=vmin, vmax=vmax)
        plt.title("HindIII, raw")
        plt.colorbar()
        plt.subplot(322)
        plt.imshow(data2, interpolation="nearest", vmin=vmin, vmax=vmax)
        plt.colorbar()
        plt.title("NcoI, raw")

        BD.iterativeCorrectWithoutSS()
        data1 = BD.dataDict["HindIII"][beg1:end1, beg2:end2]
        data2 = BD.dataDict["NcoI"][beg1:end1, beg2:end2]
        data3 = BD.dataDict["control"][beg1:end1, beg2:end2]
        vmin, vmax = minmax(data1, data2, data3)

        plt.subplot(323)
        plt.imshow(data1, interpolation="nearest", vmin=vmin, vmax=vmax)
        plt.colorbar()
        plt.title("HindIII, IC",)
        plt.subplot(324)
        plt.imshow(data2, interpolation="nearest", vmin=vmin, vmax=vmax)
        plt.colorbar()
        plt.title("NcoI, IC")

        plt.subplot(325)
        plt.imshow(data1, interpolation="nearest", vmin=vmin, vmax=vmax)
        plt.colorbar()
        plt.title("HindIII, 50%")
        plt.subplot(326)
        plt.imshow(data3, interpolation="nearest", vmin=vmin, vmax=vmax)
        plt.colorbar()
        plt.title("HindIII, 50%")
        plt.show()
Пример #4
0
def doArmPlot(resolution, filename, experiment, genome, mouse=False, **kwargs):
    "Plot an single interarm map - paper figure"

    global pp
    plt.figure()
    if (options.verbose):
        print >> sys.stdout, "doArmPlot: res: %d file: %s exp:%s gen:%s" % (
            resolution, filename, experiment, genome)

    Tanay = binnedDataAnalysis(resolution, genome)
    Tanay.simpleLoad(filename, experiment)
    if mouse == True:
        Tanay.fakeTranslocations([(0, 0, None, 12, 52000000, None),
                                  (4, 45000000, None, 12, 0, 30000000),
                                  (9, 0, 50000000, 12, 0, 35000000)])
        Tanay.removeChromosome(19)
    else:
        Tanay.removeChromosome(22)
    Tanay.removeDiagonal(1)
    Tanay.removePoorRegions()
    Tanay.truncTrans()
    Tanay.fakeCis()
    #mat_img(Tanay.dataDict["GM-all"])
    #plt.figure(figsize = (3.6,3.6))
    Tanay.averageTransMap(experiment, **kwargs)

    #plotting.removeBorder()
    cb = plt.colorbar(orientation="vertical")
    #cb.set_ticks([-0.05,0.05,0.15])
    for xlabel_i in cb.ax.get_xticklabels():
        xlabel_i.set_fontsize(6)
Пример #5
0
def getInterValues(filenames, genome):
    allValues = []
    for i in filenames:
        BD = binnedDataAnalysis(1000000, "/home/magus/HiC2011/data/{0}".format(genome), readChrms=["#", "X"])
        BD.simpleLoad("{1}/{0}-1000k.hm".format(i, genome), "test")
        BD.truncTrans(high=0.0001)
        allValues.append(BD.interchromosomalValues("test"))
    return allValues
Пример #6
0
def compareCorrelationOfEigenvectors():
    """Plot correlation figure with eigenvector correlation between datasets
    paper figure """
    Tanay = binnedDataAnalysis(1000000, "../../../data/hg18")
    Tanay.simpleLoad("../../../ErezPaperData/hg18/GM-HindIII-hg18-1M.hm",
                     "Erez")
    Tanay.simpleLoad("../../../ErezPaperData/hg18/GM-NcoI-hg18-1M.hm", "NcoI")
    Tanay.simpleLoad("../../../tcc/hg18/tcc-HindIII-hg18-1M.hm", "TCC")
    Tanay.removeDiagonal()
    Tanay.removePoorRegions()
    Tanay.removeZeros()
    Tanay.truncTrans()
    Tanay.fakeCis()
    M = 10
    Tanay.doEig(numPCs=M)

    E1 = Tanay.EigDict["Erez"]
    E2 = Tanay.EigDict["NcoI"]
    E3 = Tanay.EigDict["TCC"]

    data = numpy.zeros((M, M))
    data2 = numpy.zeros((M, M))
    data3 = numpy.zeros((M, M))
    for i in xrange(M):
        for j in xrange(M):
            data[i][j] = abs(numpy.corrcoef(E2[i], E1[j])[0, 1])
            data2[i][j] = abs(numpy.corrcoef(E3[i], E1[j])[0, 1])
            data3[i][j] = abs(numpy.corrcoef(E3[i], E2[j])[0, 1])
    plt.figure(figsize=(7.5, 2.5))
    plt.gcf().subplots_adjust(0.2, 0.2, 0.85, 0.85)
    plt.subplot(131)
    plt.xlabel("HiC 2009, HindIII")
    plt.ylabel("HiC 2009, NcoI")
    #plt.title("Abs. correlation between eigenvectors")
    plt.imshow(data, interpolation="nearest", vmin=0, vmax=1)
    plt.colorbar()
    plt.subplot(132)
    plt.xlabel("HiC 2009, HindIII")
    plt.ylabel("TCC 2011, HindIII")
    #plt.title("Abs. correlation between eigenvectors")
    plt.imshow(data2, interpolation="nearest", vmin=0, vmax=1)
    plt.colorbar()
    plt.subplot(133)
    plt.xlabel("HiC 2009, NcoI")
    plt.ylabel("TCC 2011, HindIII")
    #plt.title("Abs. correlation between eigenvectors")
    plt.imshow(data3, interpolation="nearest", vmin=0, vmax=1)
    plt.colorbar()
    plt.show()
    raise
Пример #7
0
def compareCorrelationOfEigenvectors(
    resolution, filename1, filename2, experiment1, experiment2, genome, mouse=False, **kwargs
):
    """Plot correlation figure with eigenvector correlation between datasets
	paper figure """
    global pp
    if options.verbose:
        print >> sys.stdout, "compareCorrelationOfEigenvectors: res: %d file1: %s file2: %s exp1:%s exp2:%s gen:%s" % (
            resolution,
            filename1,
            filename2,
            experiment1,
            experiment2,
            genome,
        )

    plt.figure()
    Tanay = binnedDataAnalysis(resolution, genome)
    Tanay.simpleLoad(filename1, experiment1)
    Tanay.simpleLoad(filename2, experiment2)

    Tanay.removeDiagonal()
    Tanay.removePoorRegions()
    Tanay.removeZeros()
    Tanay.truncTrans()
    Tanay.fakeCis()
    M = 10
    Tanay.doEig(numPCs=M)

    E1 = Tanay.EigDict[experiment1]
    E2 = Tanay.EigDict[experiment2]

    data = numpy.zeros((M, M))

    for i in xrange(M):
        for j in xrange(M):
            data[i][j] = abs(numpy.corrcoef(E2[i], E1[j])[0, 1])

    plt.figure(figsize=(8, 8))
    plt.gcf().subplots_adjust(0.2, 0.2, 0.85, 0.85)
    plt.subplot(111)
    plt.xlabel(experiment1)
    plt.ylabel(experiment2)
    # plt.title("Abs. correlation between eigenvectors")
    plt.imshow(data, interpolation="nearest", vmin=0, vmax=1)
    plt.colorbar()
    plt.show()
    pp.savefig()
Пример #8
0
def compareCorrelationOfEigenvectors(resolution,
                                     filename1,
                                     filename2,
                                     experiment1,
                                     experiment2,
                                     genome,
                                     mouse=False,
                                     **kwargs):
    """Plot correlation figure with eigenvector correlation between datasets
	paper figure """
    global pp
    if (options.verbose):
        print >> sys.stdout, "compareCorrelationOfEigenvectors: res: %d file1: %s file2: %s exp1:%s exp2:%s gen:%s" % (
            resolution, filename1, filename2, experiment1, experiment2, genome)

    plt.figure()
    Tanay = binnedDataAnalysis(resolution, genome)
    Tanay.simpleLoad(filename1, experiment1)
    Tanay.simpleLoad(filename2, experiment2)

    Tanay.removeDiagonal()
    Tanay.removePoorRegions()
    Tanay.removeZeros()
    Tanay.truncTrans()
    Tanay.fakeCis()
    M = 10
    Tanay.doEig(numPCs=M)

    E1 = Tanay.EigDict[experiment1]
    E2 = Tanay.EigDict[experiment2]

    data = numpy.zeros((M, M))

    for i in xrange(M):
        for j in xrange(M):
            data[i][j] = abs(numpy.corrcoef(E2[i], E1[j])[0, 1])

    plt.figure(figsize=(8, 8))
    plt.gcf().subplots_adjust(0.2, 0.2, 0.85, 0.85)
    plt.subplot(111)
    plt.xlabel(experiment1)
    plt.ylabel(experiment2)
    #plt.title("Abs. correlation between eigenvectors")
    plt.imshow(data, interpolation="nearest", vmin=0, vmax=1)
    plt.colorbar()
    plt.show()
    pp.savefig()
Пример #9
0
def doReconstructedArmPlot(filename=GM1M, genome=myGenome,
                           usePCs=[0, 1], mouse=False,
                           **kwargs):
    "Plot an PC2-PC3 interarm map - supp paper figure"
    Tanay = binnedDataAnalysis(1000000, genome)
    Tanay.simpleLoad(filename, "GM-all")
    if mouse == True:
        Tanay.fakeTranslocations([(0, 0, None, 12, 52000000, None),
                                  (4, 45000000, None, 12, 0, 30000000),
                                  (9, 0, 50000000, 12, 0, 35000000)])
        Tanay.removeChromosome(19)
    else:
        Tanay.removeChromosome(22)
    Tanay.removeDiagonal()
    Tanay.removePoorRegions()
    Tanay.removeZeros()
    Tanay.truncTrans()
    Tanay.fakeCis()
    Tanay.doEig(numPCs=max(usePCs) + 1)
    print Tanay.eigEigenvalueDict

    Tanay.restoreZeros(value=0)
    PCs = Tanay.EigDict["GM-all"][usePCs]
    eigenvalues = Tanay.eigEigenvalueDict["GM-all"][usePCs]

    proj = reduce(lambda x, y: x + y,
                  [PCs[i][:, None] * PCs[i][None, :] * \
                   eigenvalues[i] for i in xrange(len(PCs))])
    mask = PCs[0] != 0
    mask = mask[:, None] * mask[None, :]
    data = Tanay.dataDict["GM-all"]
    datamean = numpy.mean(data[mask])
    proj[mask] += datamean
    Tanay.dataDict["BLA"] = proj
    Tanay.averageTransMap("BLA", **kwargs)
    cb = plt.colorbar(orientation="vertical")
    for xlabel_i in cb.ax.get_xticklabels():
        xlabel_i.set_fontsize(6)
Пример #10
0
def correctedScalingPlot():
    "Paper figure to compare scaling before/after correction"
    plt.figure(figsize=(4, 4))
    Tanay = binnedDataAnalysis(200000, genome=myGenome)
    Tanay.simpleLoad(GM200kBreaks, "GM-all")
    Tanay.removePoorRegions()
    Tanay.removeDiagonal()
    Tanay.plotScaling("GM-all", label="Raw data", color="#A7A241")
    Tanay.iterativeCorrectWithSS()
    Tanay.plotScaling("GM-all", label="Corrected", color="#344370")
    ax = plt.gca()
    mirnylib.plotting.removeAxes()
    fs = 6
    plt.xlabel("Genomic distance (MB)", fontsize=6)
    plt.ylabel("Contact probability", fontsize=6)
    for xlabel_i in ax.get_xticklabels():
        xlabel_i.set_fontsize(fs)
    for xlabel_i in ax.get_yticklabels():
        xlabel_i.set_fontsize(fs)
    legend = plt.legend(loc=0, prop={"size": 6})
    legend.draw_frame(False)
    plt.xscale("log")
    plt.yscale("log")
    plt.show()
Пример #11
0
def doArmPlot(filename=GM1M, genome=myGenome, mouse=False, **kwargs):
    "Plot an single interarm map - paper figure"
    Tanay = binnedDataAnalysis(1000000, genome)
    Tanay.simpleLoad(filename, "GM-all")
    if mouse == True:
        Tanay.fakeTranslocations([(0, 0, None, 12, 52000000, None),
                                  (4, 45000000, None, 12, 0, 30000000),
                                  (9, 0, 50000000, 12, 0, 35000000)])
        Tanay.removeChromosome(19)
    else:
        Tanay.removeChromosome(22)
    Tanay.removeDiagonal(1)
    Tanay.removePoorRegions()
    Tanay.truncTrans()
    Tanay.fakeCis()
    #mat_img(Tanay.dataDict["GM-all"])
    #plt.figure(figsize = (3.6,3.6))
    Tanay.averageTransMap("GM-all", **kwargs)

    #plotting.removeBorder()
    cb = plt.colorbar(orientation="vertical")
    #cb.set_ticks([-0.05,0.05,0.15])
    for xlabel_i in cb.ax.get_xticklabels():
        xlabel_i.set_fontsize(6)
Пример #12
0
def correctedScalingPlot(resolution,
                         filename,
                         experiment,
                         genome,
                         mouse=False,
                         **kwargs):
    "Paper figure to compare scaling before/after correction"

    global pp
    if (options.verbose):
        print >> sys.stdout, "correctedScalingPlot: res: %d file1: %s exp1:%s gen:%s" % (
            resolution, filename, experiment, genome)

    plt.figure()
    Tanay = binnedDataAnalysis(resolution, genome)
    Tanay.simpleLoad(filename, experiment)
    Tanay.removePoorRegions()
    Tanay.removeDiagonal()
    Tanay.plotScaling(experiment, label="Raw data", color="#A7A241")
    Tanay.iterativeCorrectWithSS()
    Tanay.plotScaling(experiment, label="Corrected", color="#344370")
    ax = plt.gca()
    plotting.removeAxes()
    fs = 6
    plt.xlabel("Genomic distance (MB)", fontsize=6)
    plt.ylabel("Contact probability", fontsize=6)
    for xlabel_i in ax.get_xticklabels():
        xlabel_i.set_fontsize(fs)
    for xlabel_i in ax.get_yticklabels():
        xlabel_i.set_fontsize(fs)
    legend = plt.legend(loc=0, prop={"size": 6})
    legend.draw_frame(False)
    plt.xscale("log")
    plt.yscale("log")
    plt.show()
    pp.savefig()
Пример #13
0
from hiclib.binnedData import binnedDataAnalysis

import os
import sys

workingGenome = "hg18"
genomeFolder = "../../../data/hg19"
if not os.path.exists(genomeFolder):
    try:
        genomeFolder = sys.argv[1]
    except:
        raise StandardError(
            "Please provide hg19 Genome folder in the code or as a first argument"
        )

a = binnedDataAnalysis(1000000, genomeFolder)
a.simpleLoad("../fragmentHiC/test-1M.hm", "test")
a.removeDiagonal()
a.removePoorRegions()
a.removeCis()
a.fakeCis()
a.removeZeros()
a.iterativeCorrectWithoutSS()
a.doEig()
a.export("test", "testHeatmap")
print "everything worked, but no verification of result was made, because we haven't written it yet..."
Пример #14
0
def compareInterarmMaps(resolution,
                        filename1,
                        filename2,
                        experiment1,
                        experiment2,
                        genome,
                        mouse=False,
                        **kwargs):
    "plots witn 8 inetrarm maps - paper supplement figure"
    global pp

    if (options.verbose):
        print >> sys.stdout, "compareInterarmMaps: res: %d file1: %s file2: %s exp1:%s exp2:%s gen:%s" % (
            resolution, filename1, filename2, experiment1, experiment2, genome)

    Tanay = binnedDataAnalysis(resolution, genome)

    Tanay.simpleLoad(filename1, experiment1)
    Tanay.simpleLoad(filename2, experiment2)
    Tanay.removeDiagonal()
    Tanay.removePoorRegions(cutoff=2)
    #Tanay.removeStandalone(3)
    fs = 10
    vmin = None
    vmax = None

    plt.figure(figsize=(12, 16))
    plt.subplot(421)
    plt.title(experiment1 + ", raw", fontsize=fs)
    Tanay.averageTransMap(experiment1, vmin=vmin, vmax=vmax)
    plt.colorbar()
    plt.subplot(422)
    plt.title(experiment2 + ", raw", fontsize=fs)
    Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax)
    plt.colorbar()

    Tanay.iterativeCorrectWithSS()
    vmin = None
    vmax = None

    plt.subplot(425)

    plt.title(experiment1 + ", with SS reads", fontsize=fs)
    Tanay.averageTransMap(experiment1, vmin=vmin, vmax=vmax)
    plt.colorbar()
    plt.subplot(426)

    plt.title(experiment2 + ", with SS reads", fontsize=fs)
    Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax)
    plt.colorbar()

    Tanay.iterativeCorrectWithoutSS()
    vmin = None
    vmax = None
    plt.subplot(423)
    plt.title(experiment1 + ", no SS reads", fontsize=fs)
    Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax)
    plt.colorbar()

    plt.subplot(424)
    plt.title(experiment2 + ", no ss reads", fontsize=fs)
    Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax)
    plt.colorbar()
    Tanay.fakeCis()

    vmin = None
    vmax = None
    plt.subplot(427)

    plt.title(experiment1 + ", trans only", fontsize=fs)
    Tanay.averageTransMap(experiment1, vmin=vmin, vmax=vmax)
    plt.colorbar()
    plt.subplot(428)

    plt.title(experiment2 + ", trans only", fontsize=fs)
    Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax)
    plt.colorbar()

    plt.show()
    pp.savefig()
Пример #15
0
def compareInterarmMaps():
    "plots witn 8 inetrarm maps - paper supplement figure"
    Tanay = binnedDataAnalysis(1000000, myGenome)

    Tanay.simpleLoad(GM1M, "GM-all")
    Tanay.simpleLoad(GM1MNcoI, "GM-NcoI")
    Tanay.removeDiagonal()
    Tanay.removePoorRegions(cutoff=2)
    #Tanay.removeStandalone(3)
    fs = 10
    vmin = None
    vmax = None

    plt.subplot(421)
    plt.title("GM, HindIII, raw", fontsize=fs)
    Tanay.averageTransMap("GM-all", vmin=vmin, vmax=vmax)
    plt.colorbar()
    plt.subplot(422)
    plt.title("GM, NcoI, raw", fontsize=fs)
    Tanay.averageTransMap("GM-NcoI", vmin=vmin, vmax=vmax)
    plt.colorbar()

    Tanay.iterativeCorrectWithSS()
    vmin = None
    vmax = None

    plt.subplot(425)

    plt.title("GM, HindIII, with SS reads", fontsize=fs)
    Tanay.averageTransMap("GM-all", vmin=vmin, vmax=vmax)
    plt.colorbar()
    plt.subplot(426)

    plt.title("GM, NcoI, with SS reads", fontsize=fs)
    Tanay.averageTransMap("GM-NcoI", vmin=vmin, vmax=vmax)
    plt.colorbar()

    Tanay.iterativeCorrectWithoutSS()
    vmin = None
    vmax = None
    plt.subplot(423)
    plt.title("GM, HindIII, no SS reads", fontsize=fs)
    Tanay.averageTransMap("GM-all", vmin=vmin, vmax=vmax)
    plt.colorbar()

    plt.subplot(424)
    plt.title("GM, NcoI, no ss reads", fontsize=fs)
    Tanay.averageTransMap("GM-NcoI", vmin=vmin, vmax=vmax)
    plt.colorbar()
    Tanay.fakeCis()

    vmin = None
    vmax = None
    plt.subplot(427)

    plt.title("GM, HindIII, trans only", fontsize=fs)
    Tanay.averageTransMap("GM-all", vmin=vmin, vmax=vmax)
    plt.colorbar()
    plt.subplot(428)

    plt.title("GM, NcoI, trans only", fontsize=fs)
    Tanay.averageTransMap("GM-NcoI", vmin=vmin, vmax=vmax)
    plt.colorbar()

    plt.show()
Пример #16
0
def plotCorrelationAtDifferentBinning():
    """Plots figure with correlation at different binning.
    Note the caching and creating of binned heatmaps flags below.
    Suppplementary paper figure
    """

    sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    setExceptionHook()

    cache = False
    create = False

    if create == True:
        if cache == True:
            #-------------------standard version code-----------------
            FR = fragmentHiC.HiCdataset("bla", "../../../data/hg18",
                                        override=False, inMemory=True)
            FR.load("../../../ErezPaperData/hg18/GM-HindIII-hg18_refined.frag")

            FR3 = fragmentHiC.HiCdataset("bla", "../../../data/hg18",
                                         override=False, inMemory=True)
            FR3.load("../../../ErezPaperData/hg18/GM-HindIII-hg18"\
                     "_refined.frag")

            FR2 = fragmentHiC.HiCdataset("bla", "../../../data/hg18",
                                         override=False, inMemory=True)
            FR2.load("../../../ErezPaperData/hg18/GM-NcoI-hg18_refined.frag")

            #----------------------cross-check code----------------
#            FR = fragmentHiC.HiCdataset("bla", "../../../data/hg18",
#                                        override=False, inMemory=True)
#            FR.load("../../../ErezPaperData/hg18/GM-NcoI-hg18_refined.frag")
#
#            FR3 = fragmentHiC.HiCdataset("bla", "../../../data/hg18",
#                                         override=False, inMemory=True)
#            FR3.load("../../../ErezPaperData/hg18/GM-NcoI-hg18_refined.frag")
#
#            FR2 = fragmentHiC.HiCdataset("bla", "../../../data/hg18",
#                                         override=False, inMemory=True)
#            FR2.load("../../../ErezPaperData/hg18/G"\
#                    "M-HindIII-hg18_refined.frag")
            #-------end corss-check code ---------------------------------
            #--------Filter only trans DS reads-----------------
            FR.maskFilter(FR.DS * (FR.chrms1 != FR.chrms2))
            FR2.maskFilter(FR2.DS * (FR2.chrms1 != FR2.chrms2))
            FR3.maskFilter(FR3.DS * (FR3.chrms1 != FR3.chrms2))

            #Now create two halfs of one dataset and down-sample second dataset
            #----------------------standard version code--------
            fraction = 0.5 * len(FR.DS) / float(len(FR2.DS))

            rarray = numpy.random.random(len(FR.DS))
            mask1 = rarray < 0.5
            mask3 = rarray >= 0.5
            mask2 = numpy.random.random(len(FR2.DS)) < fraction

            #-------------------- cross-check code---------
            #fraction = 0.5 * len(FR2.DS) / float(len(FR.DS))

            #rarray = numpy.random.random(len(FR.DS))
            #mask1 =  rarray  < fraction
            #mask3 = (rarray > fraction) * (rarray < fraction * 2)
            #mask2 =  numpy.random.random(len(FR2.DS)) > 0.5
            #-----------------------------------------

            FR.maskFilter(mask1)
            FR2.maskFilter(mask2)
            FR3.maskFilter(mask3)

            FR.save("../../../tcc/working/cache1")
            FR2.save("../../../tcc/working/cache2")
            FR3.save("../../../tcc/working/cache3")
        else:
            FR = fragmentHiC.HiCdataset("bla", "../../../data/hg18",
                                        override=False, inMemory=True)
            FR.load("../../../tcc/working/cache1")

            FR3 = fragmentHiC.HiCdataset("bla", "../../../data/hg18",
                                         override=False, inMemory=True)
            FR3.load("../../../tcc/working/cache3")

            FR2 = fragmentHiC.HiCdataset("bla", "../../../data/hg18",
                                         override=False, inMemory=True)
            FR2.load("../../../tcc/working/cache2")

        for size in sizes:
            FR.saveHeatmap("../../../tcc/working/HindIII_%d.hm" %
                           size, size * 1000000)
            FR2.saveHeatmap("../../../tcc/working/NcoI_%d.hm" %
                            size, size * 1000000)
            FR3.saveHeatmap("../../../tcc/working/control_%d.hm" %
                            size, size * 1000000)

    p1 = []
    p2 = []
    p3 = []
    p4 = []
    evs = []
    for size in sizes:

        BD = binnedDataAnalysis(size * 1000000, "../../../data/hg18")
        BD.simpleLoad("../../../tcc/working/HindIII_%d.hm" % size, "HindIII")
        BD.simpleLoad("../../../tcc/working/NcoI_%d.hm" % size, "NcoI")
        BD.simpleLoad("../../../tcc/working/control_%d.hm" % size, "control")
        BD.removeDiagonal()
        BD.removePoorRegions(cutoff=2)
        BD.removeCis()

        data1 = BD.dataDict["HindIII"]
        data2 = BD.dataDict["NcoI"]
        data3 = BD.dataDict["control"]

        mask = (numpy.sum(
            data1, axis=0) > 0) * (numpy.sum(data2, axis=0) > 0)
        validMask = mask[:, None] * mask[None, :]
        transmask = BD.chromosomeIndex[:, None] != BD.chromosomeIndex[None, :]
        cormask = transmask * validMask

        c1 = scipy.stats.spearmanr(data1[cormask], data2[cormask])[0]
        c4 = scipy.stats.spearmanr(data1[cormask], data3[cormask])[0]

        if size == 1:
            evs.append(BD.interchromosomalValues("HindIII"))
            evs.append(BD.interchromosomalValues("NcoI"))
            evs.append(BD.interchromosomalValues("control"))
        p4.append(c4)
        p1.append(c1)

        print "size\t%d\traw:" % size, c1,
        BD.removeZeros()
        BD.fakeCis()  # does iterative correction as well
        BD.restoreZeros(value=0)

        data1 = BD.dataDict["HindIII"]
        data2 = BD.dataDict["NcoI"]
        data3 = BD.dataDict["control"]
        c2 = scipy.stats.spearmanr(data1[cormask], data2[cormask])[0]
        c3 = scipy.stats.spearmanr(data1[cormask], data3[cormask])[0]

        if size == 1:
            evs.append(BD.interchromosomalValues("HindIII"))
            evs.append(BD.interchromosomalValues("NcoI"))
            evs.append(BD.interchromosomalValues("control"))
            print evs

        p3.append(c3)
        p2.append(c2)

        print "\tcorrected:", c2, "\tcontrol", c3

    plt.plot(sizes, p1, label="Raw data, between enzymes")
    plt.plot(sizes, p2, label="Iteratively corrected, between")
    plt.plot(sizes, p3, label="IC, within")
    plt.xlabel("Bin size, MB")
    plt.xticks(range(1, 11))
    plt.ylabel("Spearman correlation coefficient")
    plt.legend()
    niceShow()

    setExceptionHook()
    0 / 0
Пример #17
0
def compareInterarmMaps(resolution, filename1, filename2, experiment1, experiment2, genome, mouse=False, **kwargs):
    "plots witn 8 inetrarm maps - paper supplement figure"
    global pp

    if options.verbose:
        print >> sys.stdout, "compareInterarmMaps: res: %d file1: %s file2: %s exp1:%s exp2:%s gen:%s" % (
            resolution,
            filename1,
            filename2,
            experiment1,
            experiment2,
            genome,
        )

    Tanay = binnedDataAnalysis(resolution, genome)

    Tanay.simpleLoad(filename1, experiment1)
    Tanay.simpleLoad(filename2, experiment2)
    Tanay.removeDiagonal()
    Tanay.removePoorRegions(cutoff=2)
    # Tanay.removeStandalone(3)
    fs = 10
    vmin = None
    vmax = None

    plt.figure(figsize=(12, 16))
    plt.subplot(421)
    plt.title(experiment1 + ", raw", fontsize=fs)
    Tanay.averageTransMap(experiment1, vmin=vmin, vmax=vmax)
    plt.colorbar()
    plt.subplot(422)
    plt.title(experiment2 + ", raw", fontsize=fs)
    Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax)
    plt.colorbar()

    Tanay.iterativeCorrectWithSS()
    vmin = None
    vmax = None

    plt.subplot(425)

    plt.title(experiment1 + ", with SS reads", fontsize=fs)
    Tanay.averageTransMap(experiment1, vmin=vmin, vmax=vmax)
    plt.colorbar()
    plt.subplot(426)

    plt.title(experiment2 + ", with SS reads", fontsize=fs)
    Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax)
    plt.colorbar()

    Tanay.iterativeCorrectWithoutSS()
    vmin = None
    vmax = None
    plt.subplot(423)
    plt.title(experiment1 + ", no SS reads", fontsize=fs)
    Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax)
    plt.colorbar()

    plt.subplot(424)
    plt.title(experiment2 + ", no ss reads", fontsize=fs)
    Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax)
    plt.colorbar()
    Tanay.fakeCis()

    vmin = None
    vmax = None
    plt.subplot(427)

    plt.title(experiment1 + ", trans only", fontsize=fs)
    Tanay.averageTransMap(experiment1, vmin=vmin, vmax=vmax)
    plt.colorbar()
    plt.subplot(428)

    plt.title(experiment2 + ", trans only", fontsize=fs)
    Tanay.averageTransMap(experiment2, vmin=vmin, vmax=vmax)
    plt.colorbar()

    plt.show()
    pp.savefig()
Пример #18
0
from hiclib.binnedData import binnedDataAnalysis

import os
import sys


workingGenome = "hg18"
genomeFolder = "../../../data/hg19"
if not os.path.exists(genomeFolder):
    try:
        genomeFolder = sys.argv[1]
    except:
        raise StandardError("Please provide hg19 Genome folder in the code or as a first argument")


a = binnedDataAnalysis(1000000, genomeFolder)
a.simpleLoad("../fragmentHiC/test-1M.hm", "test")
a.removeDiagonal()
a.removePoorRegions()
a.removeCis()
a.fakeCis()
a.removeZeros()
a.iterativeCorrectWithoutSS()
a.doEig()
a.export("test", "testHeatmap")
print "everything worked, but no verification of result was made, because we haven't written it yet..."