def homer2narrow(self, options, peak_files, output_dir=None): ''' Convert passed Homer peak files to narrowPeak files as specified by the IdrUtilities object. Returns the set of filenames for generated narrowPeak files. ''' output_dir = output_dir or options.output_dir self.check_output_dir(output_dir) idrutils = IdrUtilities() output_files = [] for peak_file in peak_files: # Get extensionless name of file basename = os.path.splitext(os.path.basename(peak_file))[0] # Add a randint to avoid name collision basename = basename + '_' + str(randint(1,999)) output_file = os.path.join(output_dir, basename + '.narrowPeak') data = idrutils.import_homer_peaks(peak_file) idrutils.homer_to_narrow_peaks(data, output_file) print('NarrowPeak file output to {}'.format(output_file)) output_files.append(output_file) return output_files
def homer2narrow(self, options, peak_files, output_dir=None): """ Convert passed Homer peak files to narrowPeak files as specified by the IdrUtilities object. Returns the set of filenames for generated narrowPeak files. """ output_dir = output_dir or options.output_dir self.check_output_dir(output_dir) idrutils = IdrUtilities() output_files = [] for peak_file in peak_files: # Get extensionless name of file basename = os.path.splitext(os.path.basename(peak_file))[0] # Add a randint to avoid name collision basename = basename + "_" + str(randint(1, 999)) output_file = os.path.join(output_dir, basename + ".narrowPeak") data = idrutils.import_homer_peaks(peak_file) idrutils.homer_to_narrow_peaks(data, output_file) print("NarrowPeak file output to {}".format(output_file)) output_files.append(output_file) return output_files
def truncate(self, options, peak_files, output_dir=None): ''' Truncate SORTED narrowPeak files so that they are all the same length. ''' self.check_output_dir(output_dir or options.output_dir) idrutils = IdrUtilities() output_files = idrutils.standardize_peak_counts(peak_files, output_dir) return output_files
def truncate(self, options, peak_files, output_dir=None): """ Truncate SORTED narrowPeak files so that they are all the same length. """ self.check_output_dir(output_dir or options.output_dir) idrutils = IdrUtilities() output_files = idrutils.standardize_peak_counts(peak_files, output_dir) return output_files
def get_threshold(self, options, number_of_peaks, pooled=False): idrutil = IdrUtilities() # Determine our threshold if not pooled and options.threshold: threshold = options.threshold elif pooled and options.pooled_threshold: threshold = options.pooled_threshold else: threshold = idrutil.determine_threshold(number_of_peaks, pooled=pooled) return threshold
def pool_pseudoreplicates(self, options): ''' Generate pseudoreplicates for each directory, then pool the pseudoreps. ''' if not options.pooled_dir_name: raise Exception('A name for the pooled directory is needed. ' + 'Please indicate one with the --pooled-dir-name option.') pseudorep_sets = self.pseudoreplicate(options, suffix='Pooling-Pseudorep') idrutils = IdrUtilities() for i, pseudorep_set in enumerate(pseudorep_sets): idrutils.clean_up_pseudoreps(os.path.join(options.output_dir, options.pooled_dir_name + '-Pseudorep' + str(i + 1)), pseudorep_set)
def slice_pooled_peaks(self, threshold, pooled_threshold, rep_files, pseudorep_files, pooled_files, pooled_peaks, output_dir, ranking_measure='tag-count'): idrutil = IdrUtilities() # Determine how many peaks we want to keep. keep_count = idrutil.get_peaks_within_threshold(threshold, rep_files) idrutil.get_peaks_within_threshold(threshold, pseudorep_files) pooled_count = idrutil.get_peaks_within_threshold( pooled_threshold, pooled_files) # Pooled count should be within 2-fold of keep_count if abs(math.log(keep_count / pooled_count, 2)) > 1: print('!! Warning: The number of peaks within the replicate ' + 'threshold is not within two-fold of the number of ' + 'peaks within the pooled threshold. This could indicate ' + 'inconsistencies in the datasets.\n' + 'Replicate count: {}, Pooled count: {}'.format( keep_count, pooled_count)) # Slice our pooled peak file accordingly. output_file = idrutil.slice_peaks(pooled_peaks, keep_count, ranking_measure, output_dir) print('{} peaks output to {}'.format(keep_count, output_file))
def pseudoreplicate(self, options, suffix="Pseudorep"): """ Generate pseudoreplicates for passed tag directory by splitting randomly. Returns sets of pseudoreps such that each numbered rep is grouped together: [(Sample1-Pseudorep1, Sample2-Pseudorep1, Sample3-Pseudorep1), (Sample1-Pseudorep2, Sample2-Pseudorep2, Sample3-Pseudorep2)...] """ self.check_output_dir(options.output_dir) idrutils = IdrUtilities() pseudoreps = [] for tag_dir in options.tag_dirs: print("Generating {} pseudoreplicate tag directories for {}".format(options.pseudorep_count, tag_dir)) pseudoreps.append( idrutils.create_pseudoreps(tag_dir, options.output_dir, count=options.pseudorep_count, suffix=suffix) ) return list(zip(*pseudoreps))
def pseudoreplicate(self, options, suffix='Pseudorep'): ''' Generate pseudoreplicates for passed tag directory by splitting randomly. Returns sets of pseudoreps such that each numbered rep is grouped together: [(Sample1-Pseudorep1, Sample2-Pseudorep1, Sample3-Pseudorep1), (Sample1-Pseudorep2, Sample2-Pseudorep2, Sample3-Pseudorep2)...] ''' self.check_output_dir(options.output_dir) idrutils = IdrUtilities() pseudoreps = [] for tag_dir in options.tag_dirs: print('Generating {} pseudoreplicate tag directories for {}'.format( options.pseudorep_count, tag_dir)) pseudoreps.append(idrutils.create_pseudoreps(tag_dir, options.output_dir, count=options.pseudorep_count, suffix=suffix)) return list(zip(*pseudoreps))
def slice_pooled_peaks( self, threshold, pooled_threshold, rep_files, pseudorep_files, pooled_files, pooled_peaks, output_dir, ranking_measure="tag-count", ): idrutil = IdrUtilities() # Determine how many peaks we want to keep. keep_count = idrutil.get_peaks_within_threshold(threshold, rep_files) idrutil.get_peaks_within_threshold(threshold, pseudorep_files) pooled_count = idrutil.get_peaks_within_threshold(pooled_threshold, pooled_files) # Pooled count should be within 2-fold of keep_count if abs(math.log(keep_count / pooled_count, 2)) > 1: print( "!! Warning: The number of peaks within the replicate " + "threshold is not within two-fold of the number of " + "peaks within the pooled threshold. This could indicate " + "inconsistencies in the datasets.\n" + "Replicate count: {}, Pooled count: {}".format(keep_count, pooled_count) ) # Slice our pooled peak file accordingly. output_file = idrutil.slice_peaks(pooled_peaks, keep_count, ranking_measure, output_dir) print("{} peaks output to {}".format(keep_count, output_file))
def homer2narrow(self, options, peak_files, output_dir=None): ''' Convert passed Homer peak files to narrowPeak files as specified by the IdrUtilities object. Returns the set of filenames for generated narrowPeak files. ''' self.check_output_dir(output_dir or options.output_dir) idrutils = IdrUtilities() output_files = [] for peak_file in peak_files: # Get extensionless name of file basename = os.path.splitext(os.path.basename(peak_file))[0] output_file = os.path.join(output_dir, basename + '.narrowPeak') data = idrutils.import_homer_peaks(peak_file) idrutils.homer_to_narrow_peaks(data, output_file) print('NarrowPeak file output to {}'.format(output_file)) output_files.append(output_file) return output_files