Пример #1
0
 def homer2narrow(self, options, peak_files, output_dir=None):
     '''
     Convert passed Homer peak files to narrowPeak files as specified by 
     the IdrUtilities object.
     
     Returns the set of filenames for generated narrowPeak files.
     '''
     output_dir = output_dir or options.output_dir
     self.check_output_dir(output_dir)
          
     idrutils = IdrUtilities()
     output_files = []
     for peak_file in peak_files:
         # Get extensionless name of file
         basename = os.path.splitext(os.path.basename(peak_file))[0]
         # Add a randint to avoid name collision
         basename = basename + '_' + str(randint(1,999))
         output_file = os.path.join(output_dir, basename + '.narrowPeak')
         
         data = idrutils.import_homer_peaks(peak_file)
         idrutils.homer_to_narrow_peaks(data, output_file)
         
         print('NarrowPeak file output to {}'.format(output_file))
         output_files.append(output_file)
     return output_files
Пример #2
0
    def homer2narrow(self, options, peak_files, output_dir=None):
        """
        Convert passed Homer peak files to narrowPeak files as specified by 
        the IdrUtilities object.
        
        Returns the set of filenames for generated narrowPeak files.
        """
        output_dir = output_dir or options.output_dir
        self.check_output_dir(output_dir)

        idrutils = IdrUtilities()
        output_files = []
        for peak_file in peak_files:
            # Get extensionless name of file
            basename = os.path.splitext(os.path.basename(peak_file))[0]
            # Add a randint to avoid name collision
            basename = basename + "_" + str(randint(1, 999))
            output_file = os.path.join(output_dir, basename + ".narrowPeak")

            data = idrutils.import_homer_peaks(peak_file)
            idrutils.homer_to_narrow_peaks(data, output_file)

            print("NarrowPeak file output to {}".format(output_file))
            output_files.append(output_file)
        return output_files
Пример #3
0
    def truncate(self, options, peak_files, output_dir=None):
        '''
        Truncate SORTED narrowPeak files so that they are all the same length.
        '''
        self.check_output_dir(output_dir or options.output_dir)

        idrutils = IdrUtilities()
        output_files = idrutils.standardize_peak_counts(peak_files, output_dir)

        return output_files
Пример #4
0
    def truncate(self, options, peak_files, output_dir=None):
        """
        Truncate SORTED narrowPeak files so that they are all the same length.
        """
        self.check_output_dir(output_dir or options.output_dir)

        idrutils = IdrUtilities()
        output_files = idrutils.standardize_peak_counts(peak_files, output_dir)

        return output_files
Пример #5
0
    def get_threshold(self, options, number_of_peaks, pooled=False):
        idrutil = IdrUtilities()

        # Determine our threshold
        if not pooled and options.threshold:
            threshold = options.threshold
        elif pooled and options.pooled_threshold:
            threshold = options.pooled_threshold
        else:
            threshold = idrutil.determine_threshold(number_of_peaks, pooled=pooled)

        return threshold
Пример #6
0
 def get_threshold(self, options, number_of_peaks, pooled=False):
     idrutil = IdrUtilities()
         
     # Determine our threshold
     if not pooled and options.threshold:
         threshold = options.threshold
     elif pooled and options.pooled_threshold:
         threshold = options.pooled_threshold
     else:
         threshold = idrutil.determine_threshold(number_of_peaks, 
                                                 pooled=pooled)
     
     return threshold
Пример #7
0
 def pool_pseudoreplicates(self, options):
     '''
     Generate pseudoreplicates for each directory, then pool the pseudoreps.
     '''
     if not options.pooled_dir_name:
         raise Exception('A name for the pooled directory is needed. '
                         + 'Please indicate one with the --pooled-dir-name option.')
         
     pseudorep_sets = self.pseudoreplicate(options, suffix='Pooling-Pseudorep')
     
     idrutils = IdrUtilities()
     for i, pseudorep_set in enumerate(pseudorep_sets):
         idrutils.clean_up_pseudoreps(os.path.join(options.output_dir,
                                         options.pooled_dir_name + 
                                         '-Pseudorep' + str(i + 1)), 
                                  pseudorep_set)
Пример #8
0
 def pool_pseudoreplicates(self, options):
     '''
     Generate pseudoreplicates for each directory, then pool the pseudoreps.
     '''
     if not options.pooled_dir_name:
         raise Exception('A name for the pooled directory is needed. '
                         + 'Please indicate one with the --pooled-dir-name option.')
         
     pseudorep_sets = self.pseudoreplicate(options, suffix='Pooling-Pseudorep')
     
     idrutils = IdrUtilities()
     for i, pseudorep_set in enumerate(pseudorep_sets):
         idrutils.clean_up_pseudoreps(os.path.join(options.output_dir,
                                         options.pooled_dir_name + 
                                         '-Pseudorep' + str(i + 1)), 
                                  pseudorep_set)
Пример #9
0
    def slice_pooled_peaks(self,
                           threshold,
                           pooled_threshold,
                           rep_files,
                           pseudorep_files,
                           pooled_files,
                           pooled_peaks,
                           output_dir,
                           ranking_measure='tag-count'):
        idrutil = IdrUtilities()
        # Determine how many peaks we want to keep.
        keep_count = idrutil.get_peaks_within_threshold(threshold, rep_files)
        idrutil.get_peaks_within_threshold(threshold, pseudorep_files)
        pooled_count = idrutil.get_peaks_within_threshold(
            pooled_threshold, pooled_files)

        # Pooled count should be within 2-fold of keep_count
        if abs(math.log(keep_count / pooled_count, 2)) > 1:
            print('!! Warning: The number of peaks within the replicate ' +
                  'threshold is not within two-fold of the number of ' +
                  'peaks within the pooled threshold. This could indicate ' +
                  'inconsistencies in the datasets.\n' +
                  'Replicate count: {}, Pooled count: {}'.format(
                      keep_count, pooled_count))

        # Slice our pooled peak file accordingly.
        output_file = idrutil.slice_peaks(pooled_peaks, keep_count,
                                          ranking_measure, output_dir)
        print('{} peaks output to {}'.format(keep_count, output_file))
Пример #10
0
    def pseudoreplicate(self, options, suffix="Pseudorep"):
        """
        Generate pseudoreplicates for passed tag directory by splitting randomly.
        
        Returns sets of pseudoreps such that each numbered rep is grouped together:
        [(Sample1-Pseudorep1, Sample2-Pseudorep1, Sample3-Pseudorep1),
        (Sample1-Pseudorep2, Sample2-Pseudorep2, Sample3-Pseudorep2)...]
        """
        self.check_output_dir(options.output_dir)

        idrutils = IdrUtilities()
        pseudoreps = []
        for tag_dir in options.tag_dirs:
            print("Generating {} pseudoreplicate tag directories for {}".format(options.pseudorep_count, tag_dir))
            pseudoreps.append(
                idrutils.create_pseudoreps(tag_dir, options.output_dir, count=options.pseudorep_count, suffix=suffix)
            )

        return list(zip(*pseudoreps))
Пример #11
0
 def pseudoreplicate(self, options, suffix='Pseudorep'):
     '''
     Generate pseudoreplicates for passed tag directory by splitting randomly.
     
     Returns sets of pseudoreps such that each numbered rep is grouped together:
     [(Sample1-Pseudorep1, Sample2-Pseudorep1, Sample3-Pseudorep1),
     (Sample1-Pseudorep2, Sample2-Pseudorep2, Sample3-Pseudorep2)...]
     '''
     self.check_output_dir(options.output_dir)
     
     idrutils = IdrUtilities()
     pseudoreps = []
     for tag_dir in options.tag_dirs:
         print('Generating {} pseudoreplicate tag directories for {}'.format(
                             options.pseudorep_count, tag_dir))
         pseudoreps.append(idrutils.create_pseudoreps(tag_dir, 
                                     options.output_dir, 
                                     count=options.pseudorep_count,
                                     suffix=suffix))
     
     return list(zip(*pseudoreps))
Пример #12
0
    def slice_pooled_peaks(
        self,
        threshold,
        pooled_threshold,
        rep_files,
        pseudorep_files,
        pooled_files,
        pooled_peaks,
        output_dir,
        ranking_measure="tag-count",
    ):
        idrutil = IdrUtilities()
        # Determine how many peaks we want to keep.
        keep_count = idrutil.get_peaks_within_threshold(threshold, rep_files)
        idrutil.get_peaks_within_threshold(threshold, pseudorep_files)
        pooled_count = idrutil.get_peaks_within_threshold(pooled_threshold, pooled_files)

        # Pooled count should be within 2-fold of keep_count
        if abs(math.log(keep_count / pooled_count, 2)) > 1:
            print(
                "!! Warning: The number of peaks within the replicate "
                + "threshold is not within two-fold of the number of "
                + "peaks within the pooled threshold. This could indicate "
                + "inconsistencies in the datasets.\n"
                + "Replicate count: {}, Pooled count: {}".format(keep_count, pooled_count)
            )

        # Slice our pooled peak file accordingly.
        output_file = idrutil.slice_peaks(pooled_peaks, keep_count, ranking_measure, output_dir)
        print("{} peaks output to {}".format(keep_count, output_file))
Пример #13
0
 def homer2narrow(self, options, peak_files, output_dir=None):
     '''
     Convert passed Homer peak files to narrowPeak files as specified by 
     the IdrUtilities object.
     
     Returns the set of filenames for generated narrowPeak files.
     '''
     self.check_output_dir(output_dir or options.output_dir)
          
     idrutils = IdrUtilities()
     output_files = []
     for peak_file in peak_files:
         # Get extensionless name of file
         basename = os.path.splitext(os.path.basename(peak_file))[0]
         output_file = os.path.join(output_dir, basename + '.narrowPeak')
         
         data = idrutils.import_homer_peaks(peak_file)
         idrutils.homer_to_narrow_peaks(data, output_file)
         
         print('NarrowPeak file output to {}'.format(output_file))
         output_files.append(output_file)
     return output_files