示例#1
0
 def get_log(self):
     mod_utils.make_dir(
         os.path.join(self.experiment_settings.get_rdir(), 'logs'))
     log = os.path.join(
         self.experiment_settings.get_rdir(), 'logs',
         '%(sample_name)s.log' % {'sample_name': self.sample_name})
     return log
示例#2
0
 def write_wigs(self,
                suffix,
                subtract_background=False,
                subtract_control=False):
     mod_utils.make_dir(self.rdir_path('mutation_wigs'))
     mod_utils.make_dir(self.rdir_path('rt_stop_wigs'))
     if subtract_background or subtract_control:
         libs_to_write = self.get_normalizable_libs()
     else:
         libs_to_write = self.libs
     #will also write a file to make batch import into mochiview easier
     f = open(
         os.path.join(self.rdir_path('mutation_wigs'),
                      'mochi_batch_' + suffix + '.txt'), 'w')
     f.write('SEQUENCE_SET\tFILE_NAME\tDATA_TYPE\tNAME\n')
     for lib in libs_to_write:
         f.write('<replace>\t%s\t<replace>\t%s\n' %
                 (lib.lib_settings.sample_name + '_' + suffix + '.wig.gz',
                  lib.lib_settings.sample_name + '_' + suffix))
         lib.write_mutation_rates_to_wig(
             os.path.join(self.rdir_path('mutation_wigs'),
                          lib.lib_settings.sample_name + '_' + suffix),
             subtract_background=subtract_background,
             subtract_control=subtract_control)
         lib.write_rt_stops_to_wig(
             os.path.join(self.rdir_path('rt_stop_wigs'),
                          lib.lib_settings.sample_name + '_' + suffix))
     f.close()
示例#3
0
 def get_log(self):
     mod_utils.make_dir(os.path.join(self.experiment_settings.get_rdir(), 'logs'))
     log = os.path.join(
       self.experiment_settings.get_rdir(),
       'logs',
       '%(sample_name)s.log' %
        {'sample_name': self.sample_name})
     return log
示例#4
0
    def make_plots(self, exclude_constitutive=False):
        if exclude_constitutive:
            mod_utils.make_dir(self.rdir_path('plots', 'exclude_constitutive'))
            mod_utils.make_dir(self.rdir_path('plots', 'exclude_constitutive', 'functional_groups'))
            mod_utils.make_dir(self.rdir_path('plots', 'exclude_constitutive', 'interactive'))
            rdir = self.rdir_path('plots','exclude_constitutive')
            file_tag = '_exclude_constitutive'
            mod_plotting.generate_roc_curves(self.settings.get_property('tptn_file_25s'), self.settings.rRNA_seqs, os.path.join(rdir, '25S_ROC_curves'), self.get_modified_libs(), 'S.c.25S__rRNA', self.settings.get_property('affected_nucleotides'))
            mod_plotting.generate_roc_curves(self.settings.get_property('tptn_file_18s'), self.settings.rRNA_seqs, os.path.join(rdir, '18S_ROC_curves'), self.get_modified_libs(), 'S.c.18S_rRNA', self.settings.get_property('affected_nucleotides'))
            mod_plotting.plot_functional_group_changes(self.get_normalizable_libs(), os.path.join(rdir, 'functional_groups', 'group_changes'),
                                                       self.settings.get_property('functional_groupings'),
                                                       nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                                       exclude_constitutive=exclude_constitutive,
                                                       max_fold_reduction=0.001, max_fold_increase=100)

        else:
            mod_utils.make_dir(self.rdir_path('plots'))
            mod_utils.make_dir(self.rdir_path('plots', 'interactive'))
            rdir = self.rdir_path('plots')
            file_tag = ''

        mod_plotting.plot_mutated_nts_pie(self.libs, os.path.join(rdir, 'raw_mutation_fractions'+file_tag), exclude_constitutive=exclude_constitutive)
        mod_plotting.plot_mutation_breakdown_pie(self.libs, os.path.join(rdir, 'raw_mutation_types'+file_tag), exclude_constitutive=exclude_constitutive)

        mod_plotting.plot_mutated_nts_pie(self.libs,
                                          os.path.join(rdir, 'background_sub_mutation_fractions'+file_tag),
                                          subtract_background = True, exclude_constitutive=exclude_constitutive)
        mod_plotting.plot_mutation_rate_cdfs(self.libs, os.path.join(rdir, 'mutation_rate_cdf'+file_tag),
                                             nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                             exclude_constitutive=exclude_constitutive)

        mod_plotting.plot_mutation_rate_violins(self.libs, os.path.join(rdir, 'mutation_rate_cdf'+file_tag),
                                             nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                             exclude_constitutive=exclude_constitutive)

        mod_plotting.plot_changes_vs_control(self.get_normalizable_libs(), os.path.join(rdir, 'changes'+file_tag),
                                             nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                             exclude_constitutive=exclude_constitutive)
        mod_plotting.ma_plots(self.get_normalizable_libs(), os.path.join(rdir, 'MA'+file_tag),
                                             nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                             exclude_constitutive=exclude_constitutive)
        if self.settings.get_property('make_interactive_plots'):

                # mod_plotting.plot_changes_vs_control_interactive(self.get_normalizable_libs(), os.path.join(rdir, 'interactive', 'changes'+file_tag),
                #                                          nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                #                                          exclude_constitutive=False)


                mod_plotting.ma_plots_interactive(self.get_normalizable_libs(), os.path.join(rdir, 'interactive', 'MA'+file_tag),
                                                         nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                                         exclude_constitutive=False)
示例#5
0
 def generate_mapping_index(self):
     """
     builds a STAR index from the input fasta file
     """
     self.settings.write_to_log('building STAR index')
     if not self.settings.star_index_exists():
         mod_utils.make_dir(self.settings.get_star_index())
         subprocess.Popen(
             'STAR --runThreadN %d --runMode genomeGenerate --genomeDir %s --genomeFastaFiles %s --genomeSAindexNbases 4 1>>%s 2>>%s'
             % (self.threads, self.settings.get_star_index(),
                self.settings.get_rRNA_fasta(), self.settings.get_log(),
                self.settings.get_log()),
             shell=True).wait()
     self.settings.write_to_log('building STAR index complete')
示例#6
0
 def write_wigs(self, suffix, subtract_background=False, subtract_control=False):
     mod_utils.make_dir(self.rdir_path('wigs'))
     if subtract_background or subtract_control:
         libs_to_write = self.get_normalizable_libs()
     else:
         libs_to_write = self.libs
     #will also write a file to make batch import into mochiview easier
     f = open(os.path.join(self.rdir_path('wigs'), 'mochi_batch_'+suffix+'.txt'), 'w')
     f.write('SEQUENCE_SET\tFILE_NAME\tDATA_TYPE\tNAME\n')
     for lib in libs_to_write:
         f.write('<replace>\t%s\t<replace>\t%s\n' % (lib.lib_settings.sample_name+'_'+suffix+'.wig.gz', lib.lib_settings.sample_name+'_'+suffix))
         lib.write_mutation_rates_to_wig(os.path.join(self.rdir_path('wigs'), lib.lib_settings.sample_name+'_'+suffix),
                                   subtract_background=subtract_background, subtract_control=subtract_control)
     f.close()
示例#7
0
 def annotate_structures(self, exclude_constitutive=False):
     if exclude_constitutive:
         mod_utils.make_dir(
             self.rdir_path('structures', 'protections_highlighted',
                            'exclude_constitutive'))
         mod_utils.make_dir(
             self.rdir_path('structures', 'colored_by_change',
                            'exclude_constitutive'))
         file_tag = '_exclude_constitutive'
     else:
         mod_utils.make_dir(
             self.rdir_path('structures', 'protections_highlighted'))
         mod_utils.make_dir(
             self.rdir_path('structures', 'colored_by_change'))
         file_tag = ''
     if exclude_constitutive:
         mod_plotting.highlight_structure(
             self.get_normalizable_libs(),
             self.rdir_path('structures', 'protections_highlighted',
                            'exclude_constitutive'),
             nucleotides_to_count=self.settings.get_property(
                 'affected_nucleotides'),
             exclude_constitutive=exclude_constitutive)
         # mod_plotting.color_by_change(self.get_normalizable_libs(), self.rdir_path('structures', 'colored_by_change', 'exclude_constitutive'),
         #                              nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
         #                              exclude_constitutive=exclude_constitutive)
     else:
         mod_plotting.highlight_structure(
             self.get_normalizable_libs(),
             self.rdir_path('structures', 'protections_highlighted'),
             nucleotides_to_count=self.settings.get_property(
                 'affected_nucleotides'),
             exclude_constitutive=exclude_constitutive)
示例#8
0
    def remove_adaptor(self):
        if not self.settings.get_property('force_retrim'):
            for lib_settings in self.settings.iter_lib_settings():
                if not lib_settings.adaptorless_reads_exist():
                    break
            else:
                return

        if self.settings.get_property('trim_adaptor'):
            self.settings.write_to_log( 'trimming adaptors')
            mod_utils.make_dir(self.rdir_path('adaptor_removed'))
            mod_utils.parmap(lambda lib_setting: self.remove_adaptor_one_lib(lib_setting),
                           self.settings.iter_lib_settings(), nprocs=self.threads)
            self.settings.write_to_log( 'trimming adaptors done')
示例#9
0
 def trim_reads(self):
     """
     Trim reads by given amount, removing potential random barcoding sequences from 5' end
     Trimming from 3' end can also help if mapping is problematic by reducing chance for indels to prevent mapping
     :return:
     """
     self.settings.write_to_log( 'trimming reads')
     if not self.settings.get_property('force_retrim'):
         for lib_settings in self.settings.iter_lib_settings():
             if not lib_settings.trimmed_reads_exist():
                 break
         else:
             return
     mod_utils.make_dir(self.rdir_path('trimmed_reads'))
     mod_utils.parmap(lambda lib_setting: self.trim_one_lib(lib_setting), self.settings.iter_lib_settings(),
                    nprocs = self.threads)
     self.settings.write_to_log('trimming reads complete')
示例#10
0
 def remove_adaptor(self):
     self.settings.write_to_log('removing adaptors with skewer')
     for lib_settings in self.settings.iter_lib_settings():
         if not lib_settings.adaptorless_reads_exist():
             break
     else:
         self.settings.write_to_log('using existing adaptor-trimmed reads')
         return
     mod_utils.make_dir(self.rdir_path('adaptor_removed'))
     num_datasets = len([lib for lib in self.settings.iter_lib_settings()])
     num_instances = min(num_datasets, self.threads)
     threads_per_instance = self.threads / num_instances
     mod_utils.parmap(lambda lib_setting: self.remove_adaptor_one_lib(
         lib_setting, threads_per_instance),
                      self.settings.iter_lib_settings(),
                      nprocs=num_instances)
     self.settings.write_to_log('removing adaptors done')
示例#11
0
 def collapse_identical_reads(self):
     """
     collapses all identical reads using FASTX toolkit
     :return:
     """
     self.settings.write_to_log('collapsing reads')
     if not self.settings.get_property('force_recollapse'):
         for lib_settings in self.settings.iter_lib_settings():
             if not lib_settings.collapsed_reads_exist():
                 break
         else:
             return
     mod_utils.make_dir(self.rdir_path('collapsed_reads'))
     if self.settings.get_property('collapse_identical_reads'):
         mod_utils.parmap(lambda lib_setting: self.collapse_one_fastq_file(lib_setting), self.settings.iter_lib_settings(),
                          nprocs = self.threads)
     else:
         mod_utils.parmap(lambda lib_setting: self.fastq_to_fasta(lib_setting), self.settings.iter_lib_settings(),
                          nprocs = self.threads)
     self.settings.write_to_log('collapsing reads complete')
示例#12
0
 def trim_reads(self):
     """
     Trim reads by given amount, removing potential random barcoding sequences from 5' end
     Trimming from 3' end can also help if mapping is problematic by reducing chance for indels to prevent mapping
     :return:
     """
     self.settings.write_to_log('trimming reads with seqtk')
     for lib_settings in self.settings.iter_lib_settings():
         if not lib_settings.trimmed_reads_exist():
             break
     else:
         self.settings.write_to_log('using existing trimmed reads')
         return
     mod_utils.make_dir(self.rdir_path('trimmed_reads'))
     num_datasets = len([lib for lib in self.settings.iter_lib_settings()])
     num_instances = min(num_datasets, self.threads)
     threads_per_instance = max((self.threads / num_instances) - 1, 1)
     mod_utils.parmap(lambda lib_setting: self.trim_one_lib(
         lib_setting, threads_per_instance),
                      self.settings.iter_lib_settings(),
                      nprocs=self.threads)
     self.settings.write_to_log('trimming reads complete')
示例#13
0
 def run_shapemapper(self):
     """
     runs shapemapper2.0 on the samples in batches
     :return:
     """
     self.settings.write_to_log('running shapemapper')
     if self.need_to_run_shapemapper():
         mod_utils.make_dir(self.rdir_path('shapemapper'))
         all_settings = [
             lib_setting
             for lib_setting in self.settings.iter_lib_settings()
         ]
         num_datasets = len(all_settings)
         num_instances = min(num_datasets, self.threads)
         threads_per_instance = self.threads / num_instances
         mod_utils.parmap(lambda lib_setting: self.run_single_shapemapper(
             lib_setting, threads_per_instance),
                          all_settings,
                          nprocs=num_instances)
     else:
         self.settings.write_to_log('using existing shapemapper output')
     self.settings.write_to_log('done running shapemapper')
示例#14
0
 def map_reads(self):
     """
     map all reads using STAR
     :return:
     """
     self.settings.write_to_log('mapping reads')
     for lib_settings in self.settings.iter_lib_settings():
         if not lib_settings.mapped_reads_exist():
             break
     else:
         return
     mod_utils.make_dir(self.rdir_path('mapped_reads'))
     all_settings = [
         lib_setting for lib_setting in self.settings.iter_lib_settings()
     ]
     num_datasets = len(all_settings)
     num_instances = min(num_datasets, self.threads)
     threads_per_instance = max(self.threads / num_instances - 1, 1)
     mod_utils.parmap(lambda lib_setting: self.map_one_library(
         lib_setting, threads_per_instance),
                      all_settings,
                      nprocs=num_instances)
     self.settings.write_to_log('finished mapping reads')
def main():
    outfolder, genome_fasta, normalization_file_name = sys.argv[1:4]
    experimental_file_names = sys.argv[4:]
    mod_utils.make_dir(outfolder)
    normalization_dict = mod_utils.unPickle(normalization_file_name)
    norm_name = '.'.join(os.path.basename(normalization_file_name).split('.')[:-2])
    experimental_dict_names = ['.'.join(os.path.basename(file_name).split('.')[:-2]) for file_name in experimental_file_names]
    experimental_dicts = [mod_utils.unPickle(file_name) for file_name in experimental_file_names]

    normed_mutation_rate_histogram(experimental_dicts, experimental_dict_names, os.path.join(outfolder, 'mutation_rate_histogram'), title='nonzero positions')
    background_subtracted_sets = []
    write_wig(normalization_dict, norm_name, os.path.join(outfolder, norm_name))
    for i in range(len(experimental_dict_names)):
        write_wig(experimental_dicts[i], experimental_dict_names[i], os.path.join(outfolder, experimental_dict_names[i]))
        background_subtracted = subtract_background(experimental_dicts[i], normalization_dict)
        background_subtracted_sets.append(background_subtracted)
        mod_utils.makePickle(background_subtracted, os.path.join(outfolder, experimental_dict_names[i]+'_subtracted.pkl'))
        write_wig(background_subtracted, experimental_dict_names[i]+'_subtracted', os.path.join(outfolder, experimental_dict_names[i]+'_subtracted'))
        try:
            plot_weighted_nts_pie(background_subtracted, genome_fasta, '%s backround-subtracted fractions' % experimental_dict_names[i], os.path.join(outfolder, experimental_dict_names[i]+'_sub_pie'))
        except:
            pass
    normed_mutation_rate_histogram(background_subtracted_sets, experimental_dict_names, os.path.join(outfolder, 'back_subtracted_mutation_rate_histogram'), title = 'nonzero positions, background subtracted')
示例#16
0
 def make_tables(self, exclude_constitutive=False):
     subfolders = ['raw', 'background_subtracted', 'control_subtracted', 'fold_change']
     for subfolder in subfolders:
         mod_utils.make_dir(self.rdir_path('tables', subfolder))
         mod_utils.make_dir(self.rdir_path('pickles', subfolder))
         mod_utils.make_dir(self.rdir_path('tables', subfolder, 'exclude_constitutive'))
         mod_utils.make_dir(self.rdir_path('pickles', subfolder, 'exclude_constitutive'))
     self.pickle_mutation_rates('mutation_rates.pkl', exclude_constitutive=exclude_constitutive)
     self.pickle_mutation_rates('back_subtracted_mutation_rates.pkl', subtract_background=True, exclude_constitutive=exclude_constitutive)
     self.pickle_mutation_rates('control_subtracted_mutation_rates.pkl', subtract_control=True, exclude_constitutive=exclude_constitutive)
     self.pickle_fold_changes('mutation_rate_fold_changes.pkl', exclude_constitutive=True)
     self.write_wigs('')
     self.write_wigs('back_subtract', subtract_background=True)
     self.write_wigs('control_subtract', subtract_control=True)
     self.write_mutation_rates_tsv('mutation_rates.tsv', exclude_constitutive=exclude_constitutive)
     self.write_mutation_rates_tsv('back_subtracted_mutation_rates.tsv', subtract_background=True, exclude_constitutive=exclude_constitutive)
     self.write_mutation_rates_tsv('control_subtracted_mutation_rates.tsv', subtract_control=True, exclude_constitutive=exclude_constitutive)
     self.write_combined_mutation_rates_tsv()
     self.write_combined_mutation_rates_tsv(exclude_constitutive=True)
示例#17
0
    def make_tables(self, exclude_constitutive=False):
        #subfolders = ['raw', 'background_subtracted', 'control_subtracted', 'fold_change']
        subfolders = ['raw', 'fold_change']
        for subfolder in subfolders:
            mod_utils.make_dir(self.rdir_path('rt_stop_tables', subfolder))
            mod_utils.make_dir(self.rdir_path('mutation_tables', subfolder))
            #mod_utils.make_dir(self.rdir_path('pickles', subfolder))
            mod_utils.make_dir(
                self.rdir_path('rt_stop_tables', subfolder,
                               'exclude_constitutive'))
            mod_utils.make_dir(
                self.rdir_path('mutation_tables', subfolder,
                               'exclude_constitutive'))
            #mod_utils.make_dir(self.rdir_path('pickles', subfolder, 'exclude_constitutive'))
        #self.pickle_mutation_rates('mutation_rates.pkl', exclude_constitutive=exclude_constitutive)
        #self.pickle_mutation_rates('back_subtracted_mutation_rates.pkl', subtract_background=True, exclude_constitutive=exclude_constitutive)
        #self.pickle_mutation_rates('control_subtracted_mutation_rates.pkl', subtract_control=True, exclude_constitutive=exclude_constitutive)
        #self.pickle_fold_changes('mutation_rate_fold_changes.pkl', exclude_constitutive=True)
        self.write_wigs('')
        #self.write_wigs('back_subtract', subtract_background=True)
        #self.write_wigs('control_subtract', subtract_control=True)
        self.write_mutation_rates_tsv(
            'mutation_rates.tsv', exclude_constitutive=exclude_constitutive)
        #self.write_mutation_rates_tsv('back_subtracted_mutation_rates.tsv', subtract_background=True, exclude_constitutive=exclude_constitutive)
        self.write_mutation_rates_tsv(
            'control_subtracted_mutation_rates_lowess.tsv',
            subtract_control=True,
            exclude_constitutive=exclude_constitutive,
            lowess_correct=True)
        self.write_mutation_rates_tsv(
            'control_subtracted_mutation_rates.tsv',
            subtract_control=True,
            exclude_constitutive=exclude_constitutive,
            lowess_correct=False)

        #self.write_mutation_rates_tsv('lowess_control_subtracted_mutation_rates.tsv', subtract_control=True,
        #                              exclude_constitutive=exclude_constitutive, lowess_correct=True)
        self.write_combined_mutation_rates_tsv()
        self.write_combined_mutation_counts_tsv()
        self.write_combined_rt_stop_tsv(type='rpm')
        self.write_combined_rt_stop_tsv(type='count')
        self.write_combined_rt_stop_tsv(type='score')
示例#18
0
 def annotate_structures(self, exclude_constitutive=False):
     if exclude_constitutive:
         mod_utils.make_dir(self.rdir_path('structures', 'protections_highlighted', 'exclude_constitutive'))
         mod_utils.make_dir(self.rdir_path('structures', 'colored_by_change', 'exclude_constitutive'))
         file_tag = '_exclude_constitutive'
     else:
         mod_utils.make_dir(self.rdir_path('structures', 'protections_highlighted'))
         mod_utils.make_dir(self.rdir_path('structures', 'colored_by_change'))
         file_tag = ''
     if exclude_constitutive:
         mod_plotting.highlight_structure(self.get_normalizable_libs(), self.rdir_path('structures', 'protections_highlighted', 'exclude_constitutive'),
                                          nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                          exclude_constitutive=exclude_constitutive)
         mod_plotting.color_by_change(self.get_normalizable_libs(), self.rdir_path('structures', 'colored_by_change', 'exclude_constitutive'),
                                      nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                      exclude_constitutive=exclude_constitutive)
     else:
         mod_plotting.highlight_structure(self.get_normalizable_libs(), self.rdir_path('structures', 'protections_highlighted'),
                              nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                              exclude_constitutive=exclude_constitutive)
         mod_plotting.color_by_change(self.get_normalizable_libs(), self.rdir_path('structures', 'colored_by_change'),
                                      nucleotides_to_count=self.settings.get_property('affected_nucleotides'),
                                      exclude_constitutive=exclude_constitutive)
示例#19
0
    def process_settings(self, settings_file):
        """
        - reads the settings file and converts str to float, list, etc.
        - stores result in self.settings as a dict()
        """
        int_keys = [
            'first_base_to_keep', 'last_base_to_keep',
            'min_post_adaptor_length', 'min_base_quality',
            'min_mapping_quality'
        ]
        float_keys = [
            'confidence_interval_cutoff', 'fold_change_cutoff',
            'winsorization_upper_limit'
        ]
        str_keys = [
            'adaptor_sequence', 'rrna_fasta', 'experiment_name',
            'affected_nucleotides', 'pymol_base_script',
            'pymol_base_script_colorchange', 'tptn_file_18s', 'tptn_file_25s'
        ]
        boolean_keys = ['make_interactive_plots']
        list_str_keys = [
            'fastq_gz_files', 'sample_names', 'experimentals',
            'no_mod_controls', 'with_mod_controls', 'exclude_constitutive'
        ]
        #list_float_keys = ['probe_concentrations']
        config = ConfigParser.ConfigParser()
        config.read(settings_file)
        settings = {}
        for section in config.sections():
            for option in config.options(section):
                settings[option] = config.get(section, option)
                settings[section] = True
        for k in int_keys:
            settings[k] = int(settings[k])
        for k in str_keys:
            settings[k] = settings[k]
        for k in float_keys:
            settings[k] = float(settings[k])
        for k in boolean_keys:
            if not settings[k].lower() in ['true', 'false']:
                raise ValueError('Boolean value %s must be "true" or "false"' %
                                 k)
            settings[k] = settings[k].lower() == 'true'
        #for k in list_float_keys:
        #    settings[k] = map(float, simplejson.loads(settings[k]))
        #for k in list_int_keys:
        #    settings[k] = map(int, simplejson.loads(settings[k]))
        for k in list_str_keys:
            settings[k] = simplejson.loads(settings[k])
        self.fqdir = settings['fastq_dir']
        self.sample_names = settings['sample_names']
        self.experimentals = settings['experimentals']
        self.no_mod_controls = settings['no_mod_controls']
        self.with_mod_controls = settings['with_mod_controls']
        self.exclude_constitutive = settings['exclude_constitutive']
        try:
            assert len(self.experimentals) == len(self.no_mod_controls)
            assert len(self.experimentals) == len(self.with_mod_controls)
        except:
            print 'error: experimentals, no_mod_controls, and with_mod_controls should all be the same length'
            print 'for mutation rate purposes, its ok to reuse a dataset here, it really doesnt matter'
        try:
            for sample_name in self.experimentals + self.no_mod_controls + self.with_mod_controls:
                assert sample_name in self.sample_names
        except:
            print sample_name, ' not in sample names, make sure you are using regular quotation marks'

        self.fastq_gz_file_handles = [
            os.path.join(self.fqdir, fastq_gz_file)
            for fastq_gz_file in settings['fastq_gz_files']
        ]
        for file_handle in self.fastq_gz_file_handles:
            assert mod_utils.file_exists(file_handle)
        self.settings = settings
        self.rdir = settings['results_dir']
        mod_utils.make_dir(self.rdir)
        shutil.copy(settings_file, self.rdir)
示例#20
0
 def get_wdir(self):
     mod_utils.make_dir(self.wdir)
     return self.wdir
示例#21
0
 def get_wdir(self):
     mod_utils.make_dir(self.wdir)
     return self.wdir
示例#22
0
    def process_settings(self, settings_file):
        """
        - reads the settings file and converts str to float, list, etc.
        - stores result in self.settings as a dict()
        """
        int_keys = [ 'first_base_to_keep', 'last_base_to_keep', 'min_post_adaptor_length', 'min_base_quality', 'min_mapping_quality']
        float_keys = ['confidence_interval_cutoff', 'fold_change_cutoff']
        str_keys = ['adaptor_sequence', 'rrna_fasta', 'experiment_name', 'shapemapper_ref_file', 'affected_nucleotides', 'pymol_base_script', 'pymol_base_script_colorchange', 'tptn_file_18s', 'tptn_file_25s', 'functional_groupings']
        boolean_keys = ['collapse_identical_reads', 'force_read_resplit', 'force_remapping', 'force_recollapse',
                        'force_recount', 'force_index_rebuild', 'force_retrim', 'trim_adaptor', 'discard_untrimmed', 'force_shapemapper',
                        'make_interactive_plots']
        list_str_keys = ['fastq_gz_files', 'sample_names', 'experimentals', 'no_mod_controls', 'with_mod_controls', 'exclude_constitutive']
        #list_float_keys = ['probe_concentrations']
        config = ConfigParser.ConfigParser()
        config.read(settings_file)
        settings = {}
        for section in config.sections():
            for option in config.options(section):
                settings[option] = config.get(section, option)
                settings[section] = True
        for k in int_keys:
            settings[k] = int(settings[k])
        for k in str_keys:
            settings[k] = settings[k]
        for k in float_keys:
            settings[k] = float(settings[k])
        for k in boolean_keys:
            if not settings[k].lower() in ['true', 'false']:
                raise ValueError(
                  'Boolean value %s must be "true" or "false"' % k)
            settings[k] = settings[k].lower() == 'true'
        #for k in list_float_keys:
        #    settings[k] = map(float, simplejson.loads(settings[k]))
        #for k in list_int_keys:
        #    settings[k] = map(int, simplejson.loads(settings[k]))
        for k in list_str_keys:
            settings[k] = simplejson.loads(settings[k])
        self.fqdir = settings['fastq_dir']
        self.sample_names = settings['sample_names']
        self.experimentals = settings['experimentals']
        self.no_mod_controls = settings['no_mod_controls']
        self.with_mod_controls = settings['with_mod_controls']
        self.exclude_constitutive = settings['exclude_constitutive']
        try:
            assert len(self.experimentals) == len(self.no_mod_controls)
            assert len(self.experimentals) == len(self.with_mod_controls)
        except:
            print 'error: experimentals, no_mod_controls, and with_mod_controls should all be the same length'
            print 'for mutation rate purposes, its ok to reuse a dataset here, it really doesnt matter'
        try:
            for sample_name in self.experimentals+self.no_mod_controls+self.with_mod_controls:
                assert sample_name in self.sample_names
        except:
            print sample_name, ' not in sample names, make sure you are using regular quotation marks'

        self.fastq_gz_file_handles = [os.path.join(self.fqdir, fastq_gz_file) for fastq_gz_file in
                                      settings['fastq_gz_files']]
        for file_handle in self.fastq_gz_file_handles:
            assert mod_utils.file_exists(file_handle)
        self.settings = settings
        self.rdir = settings['results_dir']
        mod_utils.make_dir(self.rdir)
        shutil.copy(settings_file, self.rdir)
示例#23
0
    def make_plots(self, exclude_constitutive=False):
        if exclude_constitutive:
            mod_utils.make_dir(
                self.rdir_path('mutation_plots', 'exclude_constitutive'))
            mod_utils.make_dir(
                self.rdir_path('mutation_plots', 'exclude_constitutive',
                               'interactive'))
            mut_dir = self.rdir_path('mutation_plots', 'exclude_constitutive')
            mod_utils.make_dir(
                self.rdir_path('rt_stop_plots', 'exclude_constitutive'))
            mod_utils.make_dir(
                self.rdir_path('rt_stop_plots', 'exclude_constitutive',
                               'interactive'))
            stop_dir = self.rdir_path('rt_stop_plots', 'exclude_constitutive')
            file_tag = '_exclude_constitutive'
            #TODO: the names for the ROC curve chromosomes are hard coded and need to be changed between samples
            #mod_plotting.generate_roc_curves(self.settings.get_property('tptn_file_25s'), self.settings.rRNA_seqs, os.path.join(rdir, '23S_ROC_curves'), self.get_modified_libs(), 'E.c.23S_rRNA', self.settings.get_property('affected_nucleotides'))
            #mod_plotting.generate_roc_curves(self.settings.get_property('tptn_file_18s'), self.settings.rRNA_seqs, os.path.join(rdir, '16S_ROC_curves'), self.get_modified_libs(), 'E.c.16S_rRNA', self.settings.get_property('affected_nucleotides'))
        else:
            mod_utils.make_dir(self.rdir_path('mutation_plots'))
            mod_utils.make_dir(self.rdir_path('mutation_plots', 'interactive'))
            mut_dir = self.rdir_path('mutation_plots')
            mod_utils.make_dir(self.rdir_path('rt_stop_plots'))
            mod_utils.make_dir(self.rdir_path('rt_stop_plots', 'interactive'))
            stop_dir = self.rdir_path('rt_stop_plots')
            file_tag = ''
        #MUTATION PLOTS
        mod_plotting.plot_mutated_nts_pie(
            self.libs,
            os.path.join(mut_dir, 'raw_mutation_fractions' + file_tag),
            exclude_constitutive=exclude_constitutive)
        mod_plotting.plot_mutation_breakdown_pie(
            self.libs,
            os.path.join(mut_dir, 'raw_mutation_types' + file_tag),
            exclude_constitutive=exclude_constitutive)

        mod_plotting.plot_mutated_nts_pie(
            self.libs,
            os.path.join(mut_dir,
                         'background_sub_mutation_fractions' + file_tag),
            subtract_background=True,
            exclude_constitutive=exclude_constitutive)

        mod_plotting.plot_mutation_rate_cdfs(
            self.libs,
            os.path.join(mut_dir, 'mutation_rate_cdf' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive)

        mod_plotting.plot_mutation_rate_violins(
            self.libs,
            os.path.join(mut_dir, 'mutation_rate_violin' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive)
        mod_plotting.ma_plots_by_count(
            self.get_normalizable_libs(),
            os.path.join(mut_dir, 'MA_raw_counts' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive)
        mod_plotting.ma_plots_by_count(
            self.get_normalizable_libs(),
            os.path.join(mut_dir, 'MA_raw_counts_lowess' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive,
            lowess_correct=True)
        mod_plotting.mutation_rate_scatter(
            self.get_normalizable_libs(),
            os.path.join(mut_dir, 'scatter_mismatch_rate' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive)

        if self.settings.get_property('make_interactive_plots'):
            mod_plotting.scatter_interactive(
                self.get_normalizable_libs(),
                os.path.join(mut_dir, 'interactive', 'scatter' + file_tag),
                nucleotides_to_count=self.settings.get_property(
                    'affected_nucleotides'),
                exclude_constitutive=False)
            mod_plotting.ma_plots_interactive_by_count(
                self.get_normalizable_libs(),
                os.path.join(mut_dir, 'interactive', 'MA_counts' + file_tag),
                nucleotides_to_count=self.settings.get_property(
                    'affected_nucleotides'),
                exclude_constitutive=False)
            mod_plotting.ma_plots_interactive_by_count(
                self.get_normalizable_libs(),
                os.path.join(mut_dir, 'interactive',
                             'MA_counts_lowess' + file_tag),
                nucleotides_to_count=self.settings.get_property(
                    'affected_nucleotides'),
                exclude_constitutive=False,
                lowess_correct=True)

    #RT STOP PLOTS
        mod_plotting.plot_rt_stop_pie(
            self.libs,
            os.path.join(stop_dir, 'raw_rt_stops' + file_tag),
            exclude_constitutive=exclude_constitutive)
        mod_plotting.plot_rt_stop_pie(
            self.libs,
            os.path.join(stop_dir, 'back_sub_rt_stops' + file_tag),
            subtract_background=True,
            exclude_constitutive=exclude_constitutive)
        mod_plotting.plot_rt_stop_cdfs(
            self.libs,
            os.path.join(stop_dir, 'rt_stop_cdf' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive)
        mod_plotting.plot_rt_stop_violins(
            self.libs,
            os.path.join(stop_dir, 'rt_stop_violin' + file_tag),
            nucleotides_to_count=self.settings.get_property(
                'affected_nucleotides'),
            exclude_constitutive=exclude_constitutive)