示例#1
0
def correctedScalingPlot(resolution, filename, experiment, genome, mouse=False, **kwargs):
    "Paper figure to compare scaling before/after correction"
    
    global pp
    if (options.verbose):
        print >> sys.stdout, "correctedScalingPlot: res: %d file1: %s exp1:%s gen:%s" % (resolution, filename, experiment, genome)

    plt.figure()
    Tanay = binnedDataAnalysis(resolution, genome)
    Tanay.simpleLoad(filename, experiment)
    Tanay.removePoorRegions()
    Tanay.removeDiagonal()
    Tanay.plotScaling(experiment, label="Raw data", color="#A7A241")
    Tanay.iterativeCorrectWithSS()
    Tanay.plotScaling(experiment, label="Corrected", color="#344370")
    ax = plt.gca()
    plotting.removeAxes()
    fs = 6
    plt.xlabel("Genomic distance (MB)", fontsize=6)
    plt.ylabel("Contact probability", fontsize=6)
    for xlabel_i in ax.get_xticklabels():
        xlabel_i.set_fontsize(fs)
    for xlabel_i in ax.get_yticklabels():
        xlabel_i.set_fontsize(fs)
    legend = plt.legend(loc=0, prop={"size": 6})
    legend.draw_frame(False)
    plt.xscale("log")
    plt.yscale("log")
    plt.show()
    pp.savefig()
示例#2
0
def plotCrossValidation():
    "main figure subplot with corss-validation"
    matplotlib.rcParams['font.sans-serif'] = 'Arial'
    plt.figure(figsize=(1, 1))
    FG = HiCdataset(workingFile1, myGenome)
    FG.load(GMFrag)

    Tanay = binnedData(1000000)
    Tanay.simpleLoad("GM-all-10p", "GM-1")
        #need to create these datasets using fragment-level analysis
    Tanay.simpleLoad("GM-all-90p", "GM-9")
    Tanay.removePoorRegions()
    Tanay.iterativeCorrectWithSS()
    Tanay.removeZeros()
    b1, b2 = (Tanay.biasDict["GM-1"], Tanay.biasDict["GM-9"])
    cPickle.dump((b1, b2), open("CrossValidatioN", 'wb'))
    ax = plt.gca()
    b1, b2 = cPickle.load(open("CrossValidatioN", 'rb'))
    print cr(b1, b2)
    plt.scatter(b1, b2, s=.7, color="k", linewidth=0)
    plt.xlabel(r"10% reads", fontsize=8)
    plt.ylabel(r"90% reads", fontsize=8)
    plt.xlim((0, 1.5))
    plt.ylim((0, 1.5))
    plt.xticks([0, 0.5, 1, 1.5])
    plt.yticks([0, 0.5, 1, 1.5])
    removeAxes(shift=0)
    fs = 6
    for xlabel_i in ax.get_xticklabels():
        xlabel_i.set_fontsize(fs)
    for xlabel_i in ax.get_yticklabels():
        xlabel_i.set_fontsize(fs)
    plt.show()
示例#3
0
def allDirectionalityRatios(ratioFunction):
    """
    A simple plot which calculates all directionality ratios, plots them
    and puts lines at 20 top highly expressed genes (Supp figure from our paper)
    This is mostly matplotlib code.
    """
    if not os.path.exists("savedHeatmaps"):
        os.mkdir("savedHeatmaps")
    wildRatio = np.log(ratioFunction("Wildtype_0min_BglII_rep1"))
    for j, dataset in enumerate(datasets):
        ax = plt.subplot(len(datasets), 1, j + 1)
        curRatio = (ratioFunction(dataset))
        plt.title("{1},  r = {0:.2f}, p={2:.2e}".format(pearsonr(curRatio, wildRatio)[0], names[dataset],
                                                      pearsonr(curRatio, wildRatio)[1]), fontsize=10)
        plt.tick_params(axis='both', which='major', labelsize=10)
        plt.tick_params(axis='both', which='minor', labelsize=8)
        plt.plot(curRatio)
        plt.ylim((0.25, 0.75))
        plt.xlim((0, len(curRatio)))
        #plt.ylim((0, 1))
        plt.yticks((0.25, 0.5, 0.75))
        geneCoor = [1162773, 3509071, 1180887, 543099, 1953250, 2522439, 3328524, 1503879, 900483, 242693, 3677144, 3931680, 3677704, 3762707, 3480870, 3829656, 1424678, 901855, 1439056, 3678537]
        genePos = [i / 10000. for i in geneCoor]
        #genePos = []
        for lpos in genePos:
            plt.vlines(lpos , -.8, .8, alpha=0.2, linewidth=1, color="black")
        plt.xticks([0, 50, 100, 150, 200, 250, 300, 350, 400], ["" for i in xrange(9)], fontsize=98)
        removeAxes(ax=ax)
        plt.subplots_adjust(0.07, 0.05, 0.94, 0.95, 0.2, 0.5)



    plt.show()
    exit()
示例#4
0
def allDirectionalityRatios(ratioFunction):
    """
    A simple plot which calculates all directionality ratios, plots them
    and puts lines at 20 top highly expressed genes (Supp figure from our paper)
    This is mostly matplotlib code.
    """
    if not os.path.exists("savedHeatmaps"):
        os.mkdir("savedHeatmaps")
    wildRatio = np.log(ratioFunction("Wildtype_0min_BglII_rep1"))
    for j, dataset in enumerate(datasets):
        ax = plt.subplot(len(datasets), 1, j + 1)
        curRatio = (ratioFunction(dataset))
        plt.title("{1},  r = {0:.2f}, p={2:.2e}".format(
            pearsonr(curRatio, wildRatio)[0], names[dataset],
            pearsonr(curRatio, wildRatio)[1]),
                  fontsize=10)
        plt.tick_params(axis='both', which='major', labelsize=10)
        plt.tick_params(axis='both', which='minor', labelsize=8)
        plt.plot(curRatio)
        plt.ylim((0.25, 0.75))
        plt.xlim((0, len(curRatio)))
        #plt.ylim((0, 1))
        plt.yticks((0.25, 0.5, 0.75))
        geneCoor = [
            1162773, 3509071, 1180887, 543099, 1953250, 2522439, 3328524,
            1503879, 900483, 242693, 3677144, 3931680, 3677704, 3762707,
            3480870, 3829656, 1424678, 901855, 1439056, 3678537
        ]
        genePos = [i / 10000. for i in geneCoor]
        #genePos = []
        for lpos in genePos:
            plt.vlines(lpos, -.8, .8, alpha=0.2, linewidth=1, color="black")
        plt.xticks([0, 50, 100, 150, 200, 250, 300, 350, 400],
                   ["" for i in xrange(9)],
                   fontsize=98)
        removeAxes(ax=ax)
        plt.subplots_adjust(0.07, 0.05, 0.94, 0.95, 0.2, 0.5)

    plt.show()
    exit()
示例#5
0
def correctedScalingPlot(resolution,
                         filename,
                         experiment,
                         genome,
                         mouse=False,
                         **kwargs):
    "Paper figure to compare scaling before/after correction"

    global pp
    if (options.verbose):
        print >> sys.stdout, "correctedScalingPlot: res: %d file1: %s exp1:%s gen:%s" % (
            resolution, filename, experiment, genome)

    plt.figure()
    Tanay = binnedDataAnalysis(resolution, genome)
    Tanay.simpleLoad(filename, experiment)
    Tanay.removePoorRegions()
    Tanay.removeDiagonal()
    Tanay.plotScaling(experiment, label="Raw data", color="#A7A241")
    Tanay.iterativeCorrectWithSS()
    Tanay.plotScaling(experiment, label="Corrected", color="#344370")
    ax = plt.gca()
    plotting.removeAxes()
    fs = 6
    plt.xlabel("Genomic distance (MB)", fontsize=6)
    plt.ylabel("Contact probability", fontsize=6)
    for xlabel_i in ax.get_xticklabels():
        xlabel_i.set_fontsize(fs)
    for xlabel_i in ax.get_yticklabels():
        xlabel_i.set_fontsize(fs)
    legend = plt.legend(loc=0, prop={"size": 6})
    legend.draw_frame(False)
    plt.xscale("log")
    plt.yscale("log")
    plt.show()
    pp.savefig()
示例#6
0
def plotDiagonalCorrelation(resolution,
                            filename1,
                            filename2,
                            experiment1,
                            experiment2,
                            genome,
                            mouse=False,
                            **kwargs):
    "Correlation of diagonal bins - paper figure"
    global pp

    if (options.verbose):
        print >> sys.stdout, "plotDiagonalCorrelation: res: %d file1: %s file2: %s exp1:%s exp2:%s gen:%s" % (
            resolution, filename1, filename2, experiment1, experiment2, genome)

    S = 50
    x = numpy.arange(2, S)
    Tanay = binnedData(resolution, genome)
    Tanay.simpleLoad(filename1, experiment1)
    Tanay.simpleLoad(filename2, experiment2)
    Tanay.removeDiagonal(1)
    Tanay.removePoorRegions()
    Tanay.removeZeros()

    pairs = [(experiment1, experiment2)]

    cors = [[] for _ in pairs]
    for i in x:
        for j, pair in enumerate(pairs):
            cors[j].append(
                cr(numpy.diagonal(Tanay.dataDict[pair[0]], i),
                   numpy.diagonal(Tanay.dataDict[pair[1]], i))[0])

    Tanay.iterativeCorrectWithoutSS(M=1)
    cors2 = [[] for _ in pairs]
    for i in x:
        for j, pair in enumerate(pairs):
            cors2[j].append(
                cr(numpy.diagonal(Tanay.dataDict[pair[0]], i),
                   numpy.diagonal(Tanay.dataDict[pair[1]], i))[0])
    Tanay.iterativeCorrectWithoutSS(M=20)
    cors3 = [[] for _ in pairs]
    for i in x:
        for j, pair in enumerate(pairs):
            cors3[j].append(
                cr(numpy.diagonal(Tanay.dataDict[pair[0]], i),
                   numpy.diagonal(Tanay.dataDict[pair[1]], i))[0])

    matplotlib.rcParams['font.sans-serif'] = 'Arial'

    print "Eigenvectors"
    print cors
    print cors2
    print cors3
    plt.figure(figsize=(8, 4))
    ax = plt.gca()
    for j, pair in enumerate(pairs):
        plt.subplot(1, len(pairs), j)
        fs = 8
        for xlabel_i in ax.get_xticklabels():
            xlabel_i.set_fontsize(fs)
        for xlabel_i in ax.get_yticklabels():
            xlabel_i.set_fontsize(fs)
        plt.title("%s vs %s" % pair)
        plt.plot(x / 5., cors3[j], color="#E5A826", label="Iterative")
        plt.plot(x / 5., cors2[j], color="#28459A", label="Single")
        plt.plot(x / 5., cors[j], color="#E55726", label="Raw")
        plt.xlabel("Genomic Separation, MB", fontsize=8)
        plt.ylabel("Spearman correlation", fontsize=8)
        plt.legend()

        legend = plt.legend(prop={"size": 6}, loc=9, handlelength=2)
        legend.draw_frame(False)
        plt.ylim((0, 1))
        removeAxes(shift=0)

    plt.show()
    pp.savefig()
示例#7
0
def plotFigure2c():
    TR = HiCdataset()
    TR.load("GM-all.refined")
    hm = TR.buildHeatmap(1, 1, 1000000, False, False)
    TR.calculateWeights()
    TR.weights = np.ones(len(TR.weights), float)  # if you want to correct just by fragment density, not by length dependence
    hm2 = TR.buildHeatmap(1, 1, 1000000, False, weights=True)
    hm2[np.isnan(hm2)] = 0
    mask = np.sum(hm, axis=0) > 0
    """p1-6 are 6 lines to be plotted, below is plotting only"""
    p1 = np.sum(hm, axis=0)[mask]
    p3 = np.sum(correct(hm), axis=0)[mask]
    p5 = np.sum(ultracorrect(hm, 40), axis=0)[mask]
    p4 = np.sum(correct(hm2), axis=0)[mask]
    p2 = np.sum(hm2, axis=0)[mask]
    p6 = np.sum(ultracorrect(hm2, 40), axis=0)[mask]
    matplotlib.rcParams['font.sans-serif'] = 'Arial'
    dashstyle = (3, 3)
    plt.figure(figsize=(4, 4))

    ax = plt.subplot(2, 1, 1)
    plt.xlim((0, 80))
    plt.ylim((0, 2))
    plt.ylabel("Total coverage", fontsize=8)

    line21 = plt.plot(p1 / p1.mean(), "-", linewidth=1, color="#e5a826")[0]
    line22 = plt.plot(
        p3 / p3.mean(), "--", linewidth=1, color="#e5a826")[0]
    line22.set_dashes(dashstyle)
    line23 = plt.plot(p5 / p5.mean(), linewidth=1, color="grey")[0]

    for xlabel_i in ax.get_xticklabels():
        xlabel_i.set_fontsize(8)
    for xlabel_i in ax.get_yticklabels():
        xlabel_i.set_fontsize(8)
    legend = plt.legend([line21, line22, line23],
                        ["Raw data", "Single correction", "Iterative correction"], prop={"size": 6}, loc=1, handlelength=2)
    legend.draw_frame(False)
    removeAxes(shift=0, ax=ax)

    for i in ax.spines.values():
        i.set_color('none')
    ax.axhline(linewidth=1, color='black')
    ax.axvline(linewidth=1, color='black')

    ax2 = plt.subplot(2, 1, 2, sharex=ax)
    plt.xlim((0, 80))
    plt.ylim((0, 2))
    plt.xlabel("Position on chom 1 (MB)", fontsize=8)
    plt.ylabel("Total coverage", fontsize=8)

    line1 = plt.plot(p4 / p4.mean(), "--", color="#9b3811", linewidth=1)[0]
    line1.set_dashes(dashstyle)
    line2 = plt.plot(p2 / p2.mean(), "-", color="#9b3811", linewidth=1)[0]
    line3 = plt.plot(p6 / p6.mean(), linewidth=1, color="grey")[0]

    for xlabel_i in ax2.get_xticklabels():
        xlabel_i.set_fontsize(8)
    for xlabel_i in ax2.get_yticklabels():
        xlabel_i.set_fontsize(8)

    legend = plt.legend([line2, line1, line3],
                        ["HindIII corrected", "Single correction", "Iterative correction"], prop={"size": 6}, loc=1, handlelength=2)
    legend.draw_frame(False)
    removeAxes(shift=0, ax=ax2)
    plotting.niceShow()
示例#8
0
def showAllDatasets():
    setExceptionHook()

    #plt.figure(figsize=(25, 15))
    fig = plt.figure()

    #size of the figure
    fw = fig.get_figwidth() * fig.get_dpi()
    fh = fig.get_figheight() * fig.get_dpi()

    #get subplot configuration
    sx, sy = subplots(len(datasets))

    for  j, dataset in enumerate(datasets):
        curPlot = plt.subplot(sx, sy, j + 1)
        heatmap = 1. * h5dict(hm(dataset), 'r')["heatmap"]

        #fill in gaps - obsolete, as heatmaps are with overlaps
        for _ in range(1):
            zeros = np.sum(heatmap, axis=0) == 0
            zeros = np.nonzero(zeros)[0]
            heatmap[zeros] = heatmap[zeros - 1]
            heatmap[:, zeros] = heatmap[:, zeros - 1]

        #regular IC protocol
        mirnylib.numutils.fillDiagonal(heatmap, 0, 0)
        mirnylib.numutils.fillDiagonal(heatmap, 0, 1)
        mirnylib.numutils.fillDiagonal(heatmap, 0, -1)
        heatmap = trunc(heatmap, low=0, high=0.0001)
        heatmap = ultracorrect(heatmap)
        diag2value = np.mean(np.diagonal(heatmap, 2))
        mirnylib.numutils.fillDiagonal(heatmap, 1.5 * diag2value, 0)
        mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, 1)
        mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, -1)
        newHeatmap = heatmap

        #Top highly expressed genes
        #genePos = [18, 56, 77, 117, 143, 215, 234, 256, 266, 286, 300, 326, 336, 367, 379]
        geneCoor = [1162773, 3509071, 1180887, 543099, 1953250, 2522439, 3328524, 1503879, 900483, 242693, 3677144, 3931680, 3677704, 3762707, 3480870, 3829656, 1424678, 901855, 1439056, 3678537]

        # here we commited to 10kb resolution - change below if you're not
        genePos = [i / 10000. for i in geneCoor]

        genePos = []

        #putting lines at highly expressed genes
        for lpos in genePos:
            plt.hlines(lpos , 0, 500, linewidth=0.7, color="black", alpha=0.2, zorder=1)
            plt.vlines(lpos , 0, 500, linewidth=0.7, color="black", alpha=0.2, zorder=1)
            pass

        #performing adaptive smoothing
        smoothedHeatmap = adaptiveSmoothing(newHeatmap, 20)
        smoothedHeatmap /= np.mean(np.sum(heatmap, axis=0))

        #print dataset, sum([np.diagonal(smoothedHeatmap, i).sum() for i in range(60, 140)])
        #maps = [[smoothedHeatmap, smoothedHeatmap[:30]],
        #         [smoothedHeatmap[:, :30], smoothedHeatmap[:30, :30]]]
        #smoothedHeatmap = np.hstack([np.vstack(i) for i in maps])

        allx = []
        ally = []

        plt.title(dataset, fontsize=10)
        plt.imshow((smoothedHeatmap), interpolation="none", vmax=0.035, cmap="acidblues", zorder=0)
        #plt.imshow((smoothedHeatmap), interpolation="nearest", vmin=0, vmax=np.exp(-4.5), cmap="fall", zorder=0)
        plt.xticks([])
        plt.yticks([])





        plt.subplots_adjust(left=0.05,  # the left side of the subplots of the figure
      right=0.95,  # the right side of the subplots of the figure
      bottom=0.05,  # the bottom of the subplots of the figure
      top=0.95 ,  # the top of the subplots of the figure
      wspace=0.1,  # the amount of width reserved for blank space between subplots
      hspace=0.2)
        #cPickle.dump(scaling, open(dataset.split("/")[-1] + "scaling", 'w'))
        #plt.ylim((400, 200))
        #plt.xlim((0, 200))

        #code below just puts the P(s) over the heatmap
        N = len(smoothedHeatmap)
        pts = np.array([[1, 0], [N, N], [N, 0]])
        p = Polygon(pts, closed=True, facecolor=(0.8, 0.8, 0.8), linewidth=0, alpha=0.7, zorder=2)
        ax = plt.gca()
        ax.add_patch(p)

        Bbox = matplotlib.transforms.Bbox.from_bounds(.55, .55, .35, .42)
        tBbox = matplotlib.transforms.TransformedBbox(Bbox, ax.transAxes).get_points()
        l, b, w, h = tBbox[0, 0] / fw, tBbox[0, 1] / fh, (tBbox[1, 0] - tBbox[0, 0]) / fw, (tBbox[1, 1] - tBbox[0, 1]) / fh
        axins = fig.add_axes([l, b, w, h], axisbg=(0, 0, 0, 0), xscale="log", yscale="log")
        removeAxes(ax=axins)
        for xlabel_i in axins.get_xticklabels(): xlabel_i.set_fontsize(6)
        for xlabel_i in axins.get_yticklabels(): xlabel_i.set_fontsize(6)

        N = len(smoothedHeatmap)
        st = int(0.05 * N)
        end = int(0.45 * N)
        st2 = int(0.55 * N)
        end2 = int(0.95 * N)
        axins.plot(*scaling(0.5 * (smoothedHeatmap[st:end, st:end] + smoothedHeatmap[st2:end2, st2:end2])), color="blue", label="intra-arm")
        if (dataset in ['Wildtype_0min_BglII_rep1', "ML2000_0hr"]):
            myscaling = scaling(0.5 * (smoothedHeatmap[st:end, st:end] + smoothedHeatmap[st2:end2, st2:end2]))
        #axins.plot(*scaling(smoothedHeatmap[st:end, end2:st2:-1]), color="green", label="inter-arm")
        axins.set_xlabel("kb", fontsize=6)
        axins.set_ylabel("Pc", fontsize=6)
        axins.grid()

        if "myscaling" in locals():
            axins.plot(*myscaling, color="grey")

        #axins.set_xticks([])
        #axins.set_yticks([])
        #axins.tick_params(color="red")

        #axins.set_xlabel("Mb")
        #axins.set_ylabel("Pc")
        for i, line in enumerate(axins.get_xticklines() + axins.get_yticklines()):
            if i % 2 == 1:  # odd indices
                line.set_visible(False)

        #if dataset != "Wildtype_0min_BglII_rep1":
        #    data = cPickle.load(open("scalings/{0}".format(dataset)))
        #    axins.plot(*data, color="blue")

        #axins.xscale("log")
        #axins.yscale("log")

        #end strange code





    plt.show()
示例#9
0
def plotFigure2c():
    TR = HiCdataset()
    TR.load("GM-all.refined")
    hm = TR.buildHeatmap(1, 1, 1000000, False, False)
    TR.calculateWeights()
    TR.weights = np.ones(
        len(TR.weights), float
    )  # if you want to correct just by fragment density, not by length dependence
    hm2 = TR.buildHeatmap(1, 1, 1000000, False, weights=True)
    hm2[np.isnan(hm2)] = 0
    mask = np.sum(hm, axis=0) > 0
    """p1-6 are 6 lines to be plotted, below is plotting only"""
    p1 = np.sum(hm, axis=0)[mask]
    p3 = np.sum(correct(hm), axis=0)[mask]
    p5 = np.sum(ultracorrect(hm, 40), axis=0)[mask]
    p4 = np.sum(correct(hm2), axis=0)[mask]
    p2 = np.sum(hm2, axis=0)[mask]
    p6 = np.sum(ultracorrect(hm2, 40), axis=0)[mask]
    matplotlib.rcParams['font.sans-serif'] = 'Arial'
    dashstyle = (3, 3)
    plt.figure(figsize=(4, 4))

    ax = plt.subplot(2, 1, 1)
    plt.xlim((0, 80))
    plt.ylim((0, 2))
    plt.ylabel("Total coverage", fontsize=8)

    line21 = plt.plot(p1 / p1.mean(), "-", linewidth=1, color="#e5a826")[0]
    line22 = plt.plot(p3 / p3.mean(), "--", linewidth=1, color="#e5a826")[0]
    line22.set_dashes(dashstyle)
    line23 = plt.plot(p5 / p5.mean(), linewidth=1, color="grey")[0]

    for xlabel_i in ax.get_xticklabels():
        xlabel_i.set_fontsize(8)
    for xlabel_i in ax.get_yticklabels():
        xlabel_i.set_fontsize(8)
    legend = plt.legend(
        [line21, line22, line23],
        ["Raw data", "Single correction", "Iterative correction"],
        prop={"size": 6},
        loc=1,
        handlelength=2)
    legend.draw_frame(False)
    removeAxes(shift=0, ax=ax)

    for i in ax.spines.values():
        i.set_color('none')
    ax.axhline(linewidth=1, color='black')
    ax.axvline(linewidth=1, color='black')

    ax2 = plt.subplot(2, 1, 2, sharex=ax)
    plt.xlim((0, 80))
    plt.ylim((0, 2))
    plt.xlabel("Position on chom 1 (MB)", fontsize=8)
    plt.ylabel("Total coverage", fontsize=8)

    line1 = plt.plot(p4 / p4.mean(), "--", color="#9b3811", linewidth=1)[0]
    line1.set_dashes(dashstyle)
    line2 = plt.plot(p2 / p2.mean(), "-", color="#9b3811", linewidth=1)[0]
    line3 = plt.plot(p6 / p6.mean(), linewidth=1, color="grey")[0]

    for xlabel_i in ax2.get_xticklabels():
        xlabel_i.set_fontsize(8)
    for xlabel_i in ax2.get_yticklabels():
        xlabel_i.set_fontsize(8)

    legend = plt.legend(
        [line2, line1, line3],
        ["HindIII corrected", "Single correction", "Iterative correction"],
        prop={"size": 6},
        loc=1,
        handlelength=2)
    legend.draw_frame(False)
    removeAxes(shift=0, ax=ax2)
    plotting.niceShow()
示例#10
0
def showAllDatasets():
    setExceptionHook()

    #plt.figure(figsize=(25, 15))
    fig = plt.figure()

    #size of the figure
    fw = fig.get_figwidth() * fig.get_dpi()
    fh = fig.get_figheight() * fig.get_dpi()

    #get subplot configuration
    sx, sy = subplots(len(datasets))

    for j, dataset in enumerate(datasets):
        curPlot = plt.subplot(sx, sy, j + 1)
        heatmap = 1. * h5dict(hm(dataset), 'r')["heatmap"]

        #fill in gaps - obsolete, as heatmaps are with overlaps
        for _ in range(1):
            zeros = np.sum(heatmap, axis=0) == 0
            zeros = np.nonzero(zeros)[0]
            heatmap[zeros] = heatmap[zeros - 1]
            heatmap[:, zeros] = heatmap[:, zeros - 1]

        #regular IC protocol
        mirnylib.numutils.fillDiagonal(heatmap, 0, 0)
        mirnylib.numutils.fillDiagonal(heatmap, 0, 1)
        mirnylib.numutils.fillDiagonal(heatmap, 0, -1)
        heatmap = trunc(heatmap, low=0, high=0.0001)
        heatmap = ultracorrect(heatmap)
        diag2value = np.mean(np.diagonal(heatmap, 2))
        mirnylib.numutils.fillDiagonal(heatmap, 1.5 * diag2value, 0)
        mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, 1)
        mirnylib.numutils.fillDiagonal(heatmap, 1.2 * diag2value, -1)
        newHeatmap = heatmap

        #Top highly expressed genes
        #genePos = [18, 56, 77, 117, 143, 215, 234, 256, 266, 286, 300, 326, 336, 367, 379]
        geneCoor = [
            1162773, 3509071, 1180887, 543099, 1953250, 2522439, 3328524,
            1503879, 900483, 242693, 3677144, 3931680, 3677704, 3762707,
            3480870, 3829656, 1424678, 901855, 1439056, 3678537
        ]

        # here we commited to 10kb resolution - change below if you're not
        genePos = [i / 10000. for i in geneCoor]

        genePos = []

        #putting lines at highly expressed genes
        for lpos in genePos:
            plt.hlines(lpos,
                       0,
                       500,
                       linewidth=0.7,
                       color="black",
                       alpha=0.2,
                       zorder=1)
            plt.vlines(lpos,
                       0,
                       500,
                       linewidth=0.7,
                       color="black",
                       alpha=0.2,
                       zorder=1)
            pass

        #performing adaptive smoothing
        smoothedHeatmap = adaptiveSmoothing(newHeatmap, 20)
        smoothedHeatmap /= np.mean(np.sum(heatmap, axis=0))

        #print dataset, sum([np.diagonal(smoothedHeatmap, i).sum() for i in range(60, 140)])
        #maps = [[smoothedHeatmap, smoothedHeatmap[:30]],
        #         [smoothedHeatmap[:, :30], smoothedHeatmap[:30, :30]]]
        #smoothedHeatmap = np.hstack([np.vstack(i) for i in maps])

        allx = []
        ally = []

        plt.title(dataset, fontsize=10)
        plt.imshow((smoothedHeatmap),
                   interpolation="none",
                   vmax=0.035,
                   cmap="acidblues",
                   zorder=0)
        #plt.imshow((smoothedHeatmap), interpolation="nearest", vmin=0, vmax=np.exp(-4.5), cmap="fall", zorder=0)
        plt.xticks([])
        plt.yticks([])

        plt.subplots_adjust(
            left=0.05,  # the left side of the subplots of the figure
            right=0.95,  # the right side of the subplots of the figure
            bottom=0.05,  # the bottom of the subplots of the figure
            top=0.95,  # the top of the subplots of the figure
            wspace=
            0.1,  # the amount of width reserved for blank space between subplots
            hspace=0.2)
        #cPickle.dump(scaling, open(dataset.split("/")[-1] + "scaling", 'w'))
        #plt.ylim((400, 200))
        #plt.xlim((0, 200))

        #code below just puts the P(s) over the heatmap
        N = len(smoothedHeatmap)
        pts = np.array([[1, 0], [N, N], [N, 0]])
        p = Polygon(pts,
                    closed=True,
                    facecolor=(0.8, 0.8, 0.8),
                    linewidth=0,
                    alpha=0.7,
                    zorder=2)
        ax = plt.gca()
        ax.add_patch(p)

        Bbox = matplotlib.transforms.Bbox.from_bounds(.55, .55, .35, .42)
        tBbox = matplotlib.transforms.TransformedBbox(
            Bbox, ax.transAxes).get_points()
        l, b, w, h = tBbox[0, 0] / fw, tBbox[0, 1] / fh, (
            tBbox[1, 0] - tBbox[0, 0]) / fw, (tBbox[1, 1] - tBbox[0, 1]) / fh
        axins = fig.add_axes([l, b, w, h],
                             axisbg=(0, 0, 0, 0),
                             xscale="log",
                             yscale="log")
        removeAxes(ax=axins)
        for xlabel_i in axins.get_xticklabels():
            xlabel_i.set_fontsize(6)
        for xlabel_i in axins.get_yticklabels():
            xlabel_i.set_fontsize(6)

        N = len(smoothedHeatmap)
        st = int(0.05 * N)
        end = int(0.45 * N)
        st2 = int(0.55 * N)
        end2 = int(0.95 * N)
        axins.plot(*scaling(0.5 * (smoothedHeatmap[st:end, st:end] +
                                   smoothedHeatmap[st2:end2, st2:end2])),
                   color="blue",
                   label="intra-arm")
        if (dataset in ['Wildtype_0min_BglII_rep1', "ML2000_0hr"]):
            myscaling = scaling(0.5 * (smoothedHeatmap[st:end, st:end] +
                                       smoothedHeatmap[st2:end2, st2:end2]))
        #axins.plot(*scaling(smoothedHeatmap[st:end, end2:st2:-1]), color="green", label="inter-arm")
        axins.set_xlabel("kb", fontsize=6)
        axins.set_ylabel("Pc", fontsize=6)
        axins.grid()

        if "myscaling" in locals():
            axins.plot(*myscaling, color="grey")

        #axins.set_xticks([])
        #axins.set_yticks([])
        #axins.tick_params(color="red")

        #axins.set_xlabel("Mb")
        #axins.set_ylabel("Pc")
        for i, line in enumerate(axins.get_xticklines() +
                                 axins.get_yticklines()):
            if i % 2 == 1:  # odd indices
                line.set_visible(False)

        #if dataset != "Wildtype_0min_BglII_rep1":
        #    data = cPickle.load(open("scalings/{0}".format(dataset)))
        #    axins.plot(*data, color="blue")

        #axins.xscale("log")
        #axins.yscale("log")

        #end strange code

    plt.show()
示例#11
0
def plotDiagonalCorrelation():
    "Correlation of diagonal bins - paper figure"
    S = 50
    x = numpy.arange(2, S)
    Tanay = binnedData(200000, myGenome)
    Tanay.simpleLoad(GM200k, "GM-HindIII")
    Tanay.simpleLoad(GM200kNcoI, "GM-NcoI")
    Tanay.simpleLoad(tcc200k, "TCC")
    Tanay.removeDiagonal(1)
    Tanay.removePoorRegions()
    Tanay.removeZeros()
    pairs = [("GM-HindIII", "GM-NcoI"), ("GM-HindIII", "TCC"), (
        "GM-NcoI", "TCC")]
    cors = [[] for _ in pairs]
    for i in x:
        for j, pair in enumerate(pairs):
            cors[j].append(cr(
                           numpy.diagonal(Tanay.dataDict[pair[0]], i),
                           numpy.diagonal(Tanay.dataDict[pair[1]], i)
                           )[0])

    Tanay.iterativeCorrectWithoutSS(M=1)
    cors2 = [[] for _ in pairs]
    for i in x:
        for j, pair in enumerate(pairs):
            cors2[j].append(cr(
                            numpy.diagonal(Tanay.dataDict[pair[0]], i),
                            numpy.diagonal(Tanay.dataDict[pair[1]], i)
                            )[0])
    Tanay.iterativeCorrectWithoutSS(M=20)
    cors3 = [[] for _ in pairs]
    for i in x:
        for j, pair in enumerate(pairs):
            cors3[j].append(cr(
                            numpy.diagonal(Tanay.dataDict[pair[0]], i),
                            numpy.diagonal(Tanay.dataDict[pair[1]], i)
                            )[0])

    matplotlib.rcParams['font.sans-serif'] = 'Arial'

    #plt.figure(figsize = (2.3,1.8))
    print cors
    print cors2
    print cors3
    plt.figure(figsize=(10, 3))
    ax = plt.gca()
    for j, pair in enumerate(pairs):
        plt.subplot(1, len(pairs), j)
        fs = 8
        for xlabel_i in ax.get_xticklabels():
            xlabel_i.set_fontsize(fs)
        for xlabel_i in ax.get_yticklabels():
            xlabel_i.set_fontsize(fs)
        plt.title("%s vs %s" % pair)
        plt.plot(x / 5., cors3[j], color="#E5A826", label="Iterative")
        plt.plot(x / 5., cors2[j], color="#28459A", label="Single")
        plt.plot(x / 5., cors[j], color="#E55726", label="Raw")
        plt.xlabel("Genomic Separation, MB", fontsize=8)
        plt.ylabel("Spearman correlation", fontsize=8)
        plt.legend()

        legend = plt.legend(prop={"size": 6}, loc=9, handlelength=2)
        legend.draw_frame(False)
        plt.ylim((0, 1))
        removeAxes(shift=0)

    plt.show()
示例#12
0
def plotDiagonalCorrelation(resolution, filename1, filename2, experiment1, experiment2, genome, mouse=False, **kwargs):
    "Correlation of diagonal bins - paper figure"
    global pp

    if options.verbose:
        print >> sys.stdout, "plotDiagonalCorrelation: res: %d file1: %s file2: %s exp1:%s exp2:%s gen:%s" % (
            resolution,
            filename1,
            filename2,
            experiment1,
            experiment2,
            genome,
        )

    S = 50
    x = numpy.arange(2, S)
    Tanay = binnedData(resolution, genome)
    Tanay.simpleLoad(filename1, experiment1)
    Tanay.simpleLoad(filename2, experiment2)
    Tanay.removeDiagonal(1)
    Tanay.removePoorRegions()
    Tanay.removeZeros()

    pairs = [(experiment1, experiment2)]

    cors = [[] for _ in pairs]
    for i in x:
        for j, pair in enumerate(pairs):
            cors[j].append(
                cr(numpy.diagonal(Tanay.dataDict[pair[0]], i), numpy.diagonal(Tanay.dataDict[pair[1]], i))[0]
            )

    Tanay.iterativeCorrectWithoutSS(M=1)
    cors2 = [[] for _ in pairs]
    for i in x:
        for j, pair in enumerate(pairs):
            cors2[j].append(
                cr(numpy.diagonal(Tanay.dataDict[pair[0]], i), numpy.diagonal(Tanay.dataDict[pair[1]], i))[0]
            )
    Tanay.iterativeCorrectWithoutSS(M=20)
    cors3 = [[] for _ in pairs]
    for i in x:
        for j, pair in enumerate(pairs):
            cors3[j].append(
                cr(numpy.diagonal(Tanay.dataDict[pair[0]], i), numpy.diagonal(Tanay.dataDict[pair[1]], i))[0]
            )

    matplotlib.rcParams["font.sans-serif"] = "Arial"

    print "Eigenvectors"
    print cors
    print cors2
    print cors3
    plt.figure(figsize=(8, 4))
    ax = plt.gca()
    for j, pair in enumerate(pairs):
        plt.subplot(1, len(pairs), j)
        fs = 8
        for xlabel_i in ax.get_xticklabels():
            xlabel_i.set_fontsize(fs)
        for xlabel_i in ax.get_yticklabels():
            xlabel_i.set_fontsize(fs)
        plt.title("%s vs %s" % pair)
        plt.plot(x / 5.0, cors3[j], color="#E5A826", label="Iterative")
        plt.plot(x / 5.0, cors2[j], color="#28459A", label="Single")
        plt.plot(x / 5.0, cors[j], color="#E55726", label="Raw")
        plt.xlabel("Genomic Separation, MB", fontsize=8)
        plt.ylabel("Spearman correlation", fontsize=8)
        plt.legend()

        legend = plt.legend(prop={"size": 6}, loc=9, handlelength=2)
        legend.draw_frame(False)
        plt.ylim((0, 1))
        removeAxes(shift=0)

    plt.show()
    pp.savefig()