示例#1
0
文件: pacbio.py 项目: sequana/sequana
    def hist_snr(self, bins=50, alpha=0.5, hold=False, fontsize=12,
                grid=True, xlabel="SNR", ylabel="#",title="", clip_upper_SNR=30):
        """Plot histogram of the ACGT SNRs for all reads

        :param int bins: binning for the histogram. Note that the range starts
            at 0 and ends at clip_upper_SNR
        :param float alpha: transparency of the histograms
        :param bool hold:
        :param int fontsize:
        :param bool grid:
        :param str xlabel:
        :param str ylabel:
        :param str title:

        .. plot::
            :include-source:

            from sequana.pacbio import PacbioSubreads
            from sequana import sequana_data
            b = PacbioSubreads(sequana_data("test_pacbio_subreads.bam"))
            b.hist_snr()

        """
        if self._df is None:
            self._get_df()

        # old pacbio format has no SNR stored
        if len(self._df['snr_A'].dropna()) == 0:
            # nothing to plot
            from sequana import sequana_data
            pylab.clf()
            pylab.imshow(pylab.imread(sequana_data("no_data.jpg")))
            pylab.gca().axis('off')
            return

        if hold is False:
            pylab.clf()

        maxSNR = 0
        for letter in "ACGT":
            m = self._df.loc[:,"snr_{}".format(letter)].max()
            if m > maxSNR:
                maxSNR = m

        if maxSNR > clip_upper_SNR:
            maxSNR = clip_upper_SNR

        bins = pylab.linspace(0, maxSNR, bins)

        pylab.hist(self._df.loc[:,'snr_A'].clip_upper(maxSNR), alpha=alpha, label="A", bins=bins)
        pylab.hist(self._df.loc[:,'snr_C'].clip_upper(maxSNR), alpha=alpha, label="C", bins=bins)
        pylab.hist(self._df.loc[:,'snr_G'].clip_upper(maxSNR), alpha=alpha, label="G", bins=bins)
        pylab.hist(self._df.loc[:,'snr_T'].clip_upper(maxSNR), alpha=alpha, label="T", bins=bins)
        pylab.legend()
        pylab.xlabel(xlabel, fontsize=fontsize)
        pylab.ylabel(ylabel, fontsize=fontsize)
        pylab.title(title,fontsize=fontsize)
        if grid is True:
            pylab.grid(True)
示例#2
0
文件: isoseq.py 项目: sequana/sequana
 def plot_corr(self):
     lengths = self.SIRV_data.SIRV.get_lengths_as_dict()
     spikes = self.spikes_found()
     spikes["lengths"] = [lengths[k] for k in spikes.index]
     corr = spikes.corr()
     pylab.imshow(corr)
     N = len(spikes.columns)
     pylab.xticks(range(N), spikes.columns, rotation=90)
     pylab.yticks(range(N), spikes.columns)
     pylab.clim(0,1)
     pylab.colorbar()
示例#3
0
 def plot_corr(self):
     lengths = self.SIRV_data.SIRV.get_lengths_as_dict()
     spikes = self.spikes_found()
     spikes["lengths"] = [lengths[k] for k in spikes.index]
     corr = spikes.corr()
     pylab.imshow(corr)
     N = len(spikes.columns)
     pylab.xticks(range(N), spikes.columns, rotation=90)
     pylab.yticks(range(N), spikes.columns)
     pylab.clim(0, 1)
     pylab.colorbar()
示例#4
0
    def hist_snr(self,
                 bins=50,
                 alpha=0.5,
                 hold=False,
                 fontsize=12,
                 grid=True,
                 xlabel="SNR",
                 ylabel="#",
                 title=""):
        """Plot histogram of the ACGT SNRs for all reads

        :param int bins: binning for the histogram
        :param float alpha: transparency of the histograms
        :param bool hold:
        :param int fontsize:
        :param bool grid:
        :param str xlabel:
        :param str ylabel:
        :param str title:

        .. plot::
            :include-source:

            from sequana.pacbio import BAMPacbio
            from sequana import sequana_data
            b = BAMPacbio(sequana_data("test_pacbio_subreads.bam"))
            b.hist_snr()

        """
        if self._df is None:
            self._get_df()

        # old pacbio format has no SNR stored
        if len(self._df['snr_A'].dropna()) == 0:
            # nothing to plot
            from sequana import sequana_data
            pylab.clf()
            pylab.imshow(pylab.imread(sequana_data("no_data.jpg")))
            pylab.gca().axis('off')
            return

        if hold is False:
            pylab.clf()
        pylab.hist(self._df.loc[:, 'snr_A'], alpha=alpha, label="A", bins=bins)
        pylab.hist(self._df.loc[:, 'snr_C'], alpha=alpha, label="C", bins=bins)
        pylab.hist(self._df.loc[:, 'snr_G'], alpha=alpha, label="G", bins=bins)
        pylab.hist(self._df.loc[:, 'snr_T'], alpha=alpha, label="T", bins=bins)
        pylab.legend()
        pylab.xlabel(xlabel, fontsize=fontsize)
        pylab.ylabel(ylabel, fontsize=fontsize)
        pylab.title(title, fontsize=fontsize)
        if grid is True:
            pylab.grid(True)
示例#5
0
    def plot(self, interpolation='None', aspect='auto', cmap='hot', tight_layout=True,
        colorbar=True, fontsize_x=None, fontsize_y=None, rotation_x=90,
        xticks_on=True, yticks_on=True, **kargs):
        """wrapper around imshow to plot a dataframe

        :param interpolation: set to None
        :param aspect: set to 'auto'
        :param cmap: colormap to be used.
        :param tight_layout:
        :param colorbar: add a colobar (default to True)
        :param fontsize_x: fontsize on xlabels
        :param fontsize_y: fontsize on ylabels
        :param rotation_x: rotate labels on xaxis
        :param xticks_on: switch off the xticks and labels
        :param yticks_on: switch off the yticks and labels

        """

        data = self.df
        pylab.clf()
        pylab.imshow(data, interpolation=interpolation, aspect=aspect, cmap=cmap, **kargs)

        if fontsize_x == None:
            fontsize_x = 16 #FIXME use default values
        if fontsize_y == None:
            fontsize_y = 16 #FIXME use default values

        if yticks_on is True:
            pylab.yticks(range(0, len(data.index)), data.index, 
                fontsize=fontsize_y)
        else:
            pylab.yticks([])
        if xticks_on is True:
            pylab.xticks(range(0, len(data.columns[:])), data.columns, 
                fontsize=fontsize_x, rotation=rotation_x)
        else:
            pylab.xticks([])

        if colorbar is True:
            pylab.colorbar()

        if tight_layout:
            pylab.tight_layout()
示例#6
0
    def hist_snr(self,
                 bins=50,
                 alpha=0.5,
                 hold=False,
                 fontsize=12,
                 grid=True,
                 xlabel="SNR",
                 ylabel="#",
                 title="",
                 clip_upper_SNR=30):
        """Plot histogram of the ACGT SNRs for all reads

        :param int bins: binning for the histogram. Note that the range starts
            at 0 and ends at clip_upper_SNR
        :param float alpha: transparency of the histograms
        :param bool hold:
        :param int fontsize:
        :param bool grid:
        :param str xlabel:
        :param str ylabel:
        :param str title:

        .. plot::
            :include-source:

            from sequana.pacbio import PacbioSubreads
            from sequana import sequana_data
            b = PacbioSubreads(sequana_data("test_pacbio_subreads.bam"))
            b.hist_snr()

        """
        if self._df is None:
            self._get_df()

        # old pacbio format has no SNR stored
        if len(self._df['snr_A'].dropna()) == 0:
            # nothing to plot
            from sequana import sequana_data
            pylab.clf()
            pylab.imshow(pylab.imread(sequana_data("no_data.jpg")))
            pylab.gca().axis('off')
            return

        if hold is False:
            pylab.clf()

        maxSNR = 0
        for letter in "ACGT":
            m = self._df.loc[:, "snr_{}".format(letter)].max()
            if m > maxSNR:
                maxSNR = m

        if maxSNR > clip_upper_SNR:
            maxSNR = clip_upper_SNR

        bins = pylab.linspace(0, maxSNR, bins)

        pylab.hist(self._df.loc[:, 'snr_A'].clip_upper(maxSNR),
                   alpha=alpha,
                   label="A",
                   bins=bins)
        pylab.hist(self._df.loc[:, 'snr_C'].clip_upper(maxSNR),
                   alpha=alpha,
                   label="C",
                   bins=bins)
        pylab.hist(self._df.loc[:, 'snr_G'].clip_upper(maxSNR),
                   alpha=alpha,
                   label="G",
                   bins=bins)
        pylab.hist(self._df.loc[:, 'snr_T'].clip_upper(maxSNR),
                   alpha=alpha,
                   label="T",
                   bins=bins)
        pylab.legend()
        pylab.xlabel(xlabel, fontsize=fontsize)
        pylab.ylabel(ylabel, fontsize=fontsize)
        pylab.title(title, fontsize=fontsize)
        if grid is True:
            pylab.grid(True)