Exemplo n.º 1
0
 def get_log(self):
     tps_utils.make_dir(os.path.join(self.experiment_settings.get_rdir(), 'logs'))
     log = os.path.join(
       self.experiment_settings.get_rdir(),
       'logs',
       '%(sample_name)s.log' %
        {'sample_name': self.sample_name})
     return log
Exemplo n.º 2
0
 def initialize_libs(self):
     self.settings.write_to_log('initializing libraries, counting reads')
     tps_utils.make_dir(self.rdir_path('sequence_counts'))
     self.libs = []
     map(lambda lib_settings: self.initialize_lib(lib_settings),
         self.settings.iter_lib_settings())
     self.settings.write_to_log(
         'initializing libraries, counting reads, done')
Exemplo n.º 3
0
 def make_plots(self):
     tps_utils.make_dir(self.rdir_path('plots'))
     self.plot_AUG_reads()
     self.plot_AUG_reads(unique_only=True, )
     self.plot_last_AUG_reads()
     self.plot_last_AUG_reads(unique_only=True, )
     self.plot_AUG_reads(which_AUG=2, unique_only=True)
     self.plot_AUG_reads(which_AUG=2)
Exemplo n.º 4
0
 def make_plots(self):
     tps_utils.make_dir(self.rdir_path('plots'))
     self.plot_AUG_reads()
     self.plot_AUG_reads(unique_only = True,)
     self.plot_last_AUG_reads()
     self.plot_last_AUG_reads(unique_only = True,)
     self.plot_AUG_reads(which_AUG = 2, unique_only = True)
     self.plot_AUG_reads(which_AUG = 2)
Exemplo n.º 5
0
    def remove_primer(self):
        if not self.settings.get_property('force_retrim'):
            for lib_settings in self.settings.iter_lib_settings():
                if not lib_settings.primerless_reads_exist():
                    break
            else:
                return

        if self.settings.get_property('trim_adaptor'):
            tps_utils.make_dir(self.rdir_path('primer_removed'))
            bzUtils.parmap(lambda lib_setting: self.remove_primer_one_lib(lib_setting), self.settings.iter_lib_settings(), nprocs = self.threads)
Exemplo n.º 6
0
 def __init__(self, tpse, experiment_settings, threads):
     """
     Constructor for Library class
     """
     self.threads = threads
     self.tpse = tpse
     self.experiment_settings = experiment_settings
     self.get_property = self.experiment_settings.get_property
     self.get_rdir = experiment_settings.get_rdir
     self.get_wdir = experiment_settings.get_wdir
     tps_utils.make_dir(self.tpse.rdir_path('QC'))
Exemplo n.º 7
0
 def __init__(self, tpse, experiment_settings, threads):
     """
     Constructor for Library class
     """
     self.threads = threads
     self.tpse = tpse
     self.experiment_settings = experiment_settings
     self.get_property = self.experiment_settings.get_property
     self.get_rdir = experiment_settings.get_rdir
     self.get_wdir = experiment_settings.get_wdir
     tps_utils.make_dir(self.tpse.rdir_path("QC"))
Exemplo n.º 8
0
 def build_bowtie_index(self):
     """
     builds a bowtie 2 index from the input fasta file
     recommend including barcode+PCR sequences just in case of some no-insert amplicons
     """
     self.settings.write_to_log('building bowtie index')
     if self.settings.get_property('force_index_rebuild') or not self.settings.bowtie_index_exists():
         tps_utils.make_dir(self.rdir_path('bowtie_indices'))
         index = self.settings.get_bowtie_index()
         subprocess.Popen('bowtie2-build -f --offrate 0 %s %s 1>>%s 2>>%s' % (self.settings.get_trimmed_pool_fasta(),
                                                                   self.settings.get_bowtie_index(), self.settings.get_log()+'.bwt',
                                                                   self.settings.get_log()+'.bwt'), shell=True).wait()
     self.settings.write_to_log('building bowtie index complete')
Exemplo n.º 9
0
    def remove_primer(self):
        if not self.settings.get_property('force_retrim'):
            for lib_settings in self.settings.iter_lib_settings():
                if not lib_settings.primerless_reads_exist():
                    break
            else:
                return

        if self.settings.get_property('trim_adaptor'):
            tps_utils.make_dir(self.rdir_path('primer_removed'))
            bzUtils.parmap(
                lambda lib_setting: self.remove_primer_one_lib(lib_setting),
                self.settings.iter_lib_settings(),
                nprocs=self.threads)
Exemplo n.º 10
0
 def trim_reads(self):
     """
     Trim reads by given amount, removing potential random barcoding sequences from 5' end
     Trimming from 3' end can also help if mapping is problematic by reducing chance for indels to prevent mapping
     :return:
     """
     self.settings.write_to_log( 'trimming reads')
     if not self.settings.get_property('force_retrim'):
         for lib_settings in self.settings.iter_lib_settings():
             if not lib_settings.trimmed_reads_exist():
                 break
         else:
             return
     tps_utils.make_dir(self.rdir_path('trimmed_reads'))
     bzUtils.parmap(lambda lib_setting: self.trim_one_fasta_file(lib_setting), self.settings.iter_lib_settings(), nprocs = self.threads)
     self.settings.write_to_log( 'trimming reads complete')
Exemplo n.º 11
0
    def plot_pcr_bias(self):
        tps_utils.make_dir(
            os.path.join(self.experiment_settings.get_rdir(), 'QC',
                         'collapsed_fracs'))
        collapsed_read_fractions = map(
            lambda lib_settings: self.get_collapsed_read_fractions(lib_settings
                                                                   ),
            self.experiment_settings.iter_lib_settings())
        fig = plt.figure(figsize=(8, 8))
        plot = fig.add_subplot(111)
        color_index = 0
        for col_tuple in collapsed_read_fractions:
            sample_name, read_fractions = col_tuple
            read_fractions = sorted(read_fractions, reverse=True)
            cumulative_read_fractions = read_fractions[:1]
            for read_frac in read_fractions[1:]:
                cumulative_read_fractions.append(
                    cumulative_read_fractions[-1] + read_frac)
            cumulative_seq_fractions = np.array(
                range(1,
                      len(cumulative_read_fractions) + 1)) / float(
                          len(cumulative_read_fractions))

            plot.plot(cumulative_read_fractions,
                      cumulative_seq_fractions,
                      color=bzUtils.rainbow[color_index / 2],
                      linestyle=bzUtils.line_styles[color_index % 2],
                      label=sample_name,
                      lw=1)
            color_index += 1
        plot.plot(cumulative_seq_fractions,
                  cumulative_seq_fractions,
                  color=bzUtils.rainbow[color_index / 2],
                  linestyle=bzUtils.line_styles[2],
                  label='expected',
                  lw=1)
        plot.set_xlabel("fraction of reads")
        plot.set_ylabel("fraction of sequences")
        plot.set_xlim(0, 1)
        plot.set_ylim(0, 1)
        lg = plt.legend(loc=2, prop={'size': 10}, labelspacing=0.2)
        lg.draw_frame(False)
        out_name = os.path.join(self.experiment_settings.get_rdir(), 'QC',
                                'pcr_bias.pdf')
        plt.savefig(out_name, transparent='True', format='pdf')
        plt.clf()
Exemplo n.º 12
0
    def plot_pcr_bias(self):
        tps_utils.make_dir(os.path.join(self.experiment_settings.get_rdir(), "QC", "collapsed_fracs"))
        collapsed_read_fractions = map(
            lambda lib_settings: self.get_collapsed_read_fractions(lib_settings),
            self.experiment_settings.iter_lib_settings(),
        )
        fig = plt.figure(figsize=(8, 8))
        plot = fig.add_subplot(111)
        color_index = 0
        for col_tuple in collapsed_read_fractions:
            sample_name, read_fractions = col_tuple
            read_fractions = sorted(read_fractions, reverse=True)
            cumulative_read_fractions = read_fractions[:1]
            for read_frac in read_fractions[1:]:
                cumulative_read_fractions.append(cumulative_read_fractions[-1] + read_frac)
            cumulative_seq_fractions = np.array(range(1, len(cumulative_read_fractions) + 1)) / float(
                len(cumulative_read_fractions)
            )

            plot.plot(
                cumulative_read_fractions,
                cumulative_seq_fractions,
                color=bzUtils.rainbow[color_index / 2],
                linestyle=bzUtils.line_styles[color_index % 2],
                label=sample_name,
                lw=1,
            )
            color_index += 1
        plot.plot(
            cumulative_seq_fractions,
            cumulative_seq_fractions,
            color=bzUtils.rainbow[color_index / 2],
            linestyle=bzUtils.line_styles[2],
            label="expected",
            lw=1,
        )
        plot.set_xlabel("fraction of reads")
        plot.set_ylabel("fraction of sequences")
        plot.set_xlim(0, 1)
        plot.set_ylim(0, 1)
        lg = plt.legend(loc=2, prop={"size": 10}, labelspacing=0.2)
        lg.draw_frame(False)
        out_name = os.path.join(self.experiment_settings.get_rdir(), "QC", "pcr_bias.pdf")
        plt.savefig(out_name, transparent="True", format="pdf")
        plt.clf()
Exemplo n.º 13
0
 def collapse_identical_reads(self):
     """
     collapses all identical reads using FASTX toolkit
     :return:
     """
     self.settings.write_to_log('collapsing reads')
     if not self.settings.get_property('force_recollapse'):
         for lib_settings in self.settings.iter_lib_settings():
             if not lib_settings.collapsed_reads_exist():
                 break
         else:
             return
     tps_utils.make_dir(self.rdir_path('collapsed_reads'))
     if self.settings.get_property('collapse_identical_reads'):
         bzUtils.parmap(lambda lib_setting: self.collapse_one_fastq_file(lib_setting), self.settings.iter_lib_settings(), nprocs = self.threads)
     else:
         bzUtils.parmap(lambda lib_setting: self.fastq_to_fasta(lib_setting), self.settings.iter_lib_settings(), nprocs = self.threads)
     self.settings.write_to_log('collapsing reads complete')
Exemplo n.º 14
0
 def build_bowtie_index(self):
     """
     builds a bowtie 2 index from the input fasta file
     recommend including barcode+PCR sequences just in case of some no-insert amplicons
     """
     self.settings.write_to_log('building bowtie index')
     if self.settings.get_property(
             'force_index_rebuild'
     ) or not self.settings.bowtie_index_exists():
         tps_utils.make_dir(self.rdir_path('bowtie_indices'))
         index = self.settings.get_bowtie_index()
         subprocess.Popen(
             'bowtie2-build -f --offrate 0 %s %s 1>>%s 2>>%s' %
             (self.settings.get_trimmed_pool_fasta(),
              self.settings.get_bowtie_index(), self.settings.get_log() +
              '.bwt', self.settings.get_log() + '.bwt'),
             shell=True).wait()
     self.settings.write_to_log('building bowtie index complete')
Exemplo n.º 15
0
 def trim_reads(self):
     """
     Trim reads by given amount, removing potential random barcoding sequences from 5' end
     Trimming from 3' end can also help if mapping is problematic by reducing chance for indels to prevent mapping
     :return:
     """
     self.settings.write_to_log('trimming reads')
     if not self.settings.get_property('force_retrim'):
         for lib_settings in self.settings.iter_lib_settings():
             if not lib_settings.trimmed_reads_exist():
                 break
         else:
             return
     tps_utils.make_dir(self.rdir_path('trimmed_reads'))
     bzUtils.parmap(
         lambda lib_setting: self.trim_one_fasta_file(lib_setting),
         self.settings.iter_lib_settings(),
         nprocs=self.threads)
     self.settings.write_to_log('trimming reads complete')
Exemplo n.º 16
0
 def collapse_identical_reads(self):
     """
     collapses all identical reads using FASTX toolkit
     :return:
     """
     self.settings.write_to_log('collapsing reads')
     if not self.settings.get_property('force_recollapse'):
         for lib_settings in self.settings.iter_lib_settings():
             if not lib_settings.collapsed_reads_exist():
                 break
         else:
             return
     tps_utils.make_dir(self.rdir_path('collapsed_reads'))
     if self.settings.get_property('collapse_identical_reads'):
         bzUtils.parmap(
             lambda lib_setting: self.collapse_one_fastq_file(lib_setting),
             self.settings.iter_lib_settings(),
             nprocs=self.threads)
     else:
         bzUtils.parmap(
             lambda lib_setting: self.fastq_to_fasta(lib_setting),
             self.settings.iter_lib_settings(),
             nprocs=self.threads)
     self.settings.write_to_log('collapsing reads complete')
Exemplo n.º 17
0
    def map_reads(self):
        """
        map all reads using bowtie
        :return:
        """
        self.settings.write_to_log('mapping reads')
        if not self.settings.get_property('force_remapping'):
            for lib_settings in self.settings.iter_lib_settings():
                if not lib_settings.mapped_reads_exist():
                    break
            else:
                return
        tps_utils.make_dir(self.rdir_path('mapped_reads'))
        tps_utils.make_dir(self.rdir_path('mapping_stats'))
        tps_utils.make_dir(self.rdir_path('unmapped_reads'))

        bzUtils.parmap(lambda lib_setting: self.map_one_library(lib_setting), self.settings.iter_lib_settings(),
                       nprocs = self.threads)
        self.settings.write_to_log( 'finished mapping reads')
Exemplo n.º 18
0
    def map_reads(self):
        """
        map all reads using bowtie
        :return:
        """
        self.settings.write_to_log('mapping reads')
        if not self.settings.get_property('force_remapping'):
            for lib_settings in self.settings.iter_lib_settings():
                if not lib_settings.mapped_reads_exist():
                    break
            else:
                return
        tps_utils.make_dir(self.rdir_path('mapped_reads'))
        tps_utils.make_dir(self.rdir_path('mapping_stats'))
        tps_utils.make_dir(self.rdir_path('unmapped_reads'))

        bzUtils.parmap(lambda lib_setting: self.map_one_library(lib_setting),
                       self.settings.iter_lib_settings(),
                       nprocs=self.threads)
        self.settings.write_to_log('finished mapping reads')
Exemplo n.º 19
0
 def make_tables(self):
     tps_utils.make_dir(self.rdir_path('tables'))
     self.make_counts_table()
Exemplo n.º 20
0
 def initialize_libs(self):
     self.settings.write_to_log('initializing libraries, counting reads')
     tps_utils.make_dir(self.rdir_path('sequence_counts'))
     self.libs = []
     map(lambda lib_settings: self.initialize_lib(lib_settings), self.settings.iter_lib_settings())
     self.settings.write_to_log('initializing libraries, counting reads, done')
Exemplo n.º 21
0
 def make_tables(self):
     tps_utils.make_dir(self.rdir_path('tables'))
     self.make_counts_table()
Exemplo n.º 22
0
 def get_wdir(self):
     tps_utils.make_dir(self.wdir)
     return self.wdir