def get_set_of_histogram_paths(input_file, seen_summed_hists = False): global existing_bins, to_be_created, filter_on_folders, filter_on_histograms histogram_list = [] add_histogram = histogram_list.append checked_n_entries = 0 for histogram_path in input_file.readlines(): checked_n_entries += 1 if checked_n_entries % 10000 == 0: logging.debug( 'Checked %d' %checked_n_entries) if not filter_string(histogram_path, filter_on_folders): continue if not filter_string(histogram_path, filter_on_histograms): continue histogram_path = histogram_path.rstrip(' \n') directory, histogram_name, b_tag_bin = get_histogram_info_tuple(histogram_path) logging.debug('Searching %s' % histogram_path) logging.debug('Found directory %s' % directory) logging.debug('Found histogram_name %s' % histogram_name) logging.debug('Found b_tag_bin %s' % b_tag_bin) if b_tag_bin in existing_bins: histogram_name = '_'.join(histogram_name.split('_')[:-1]) logging.debug('Adding histogram %s' % (directory + histogram_name)) add_histogram(directory + histogram_name) if b_tag_bin in to_be_created: seen_summed_hists = True return set(histogram_list)#only unique ones
def sum_b_tag_bins_in_file(file_in_path): global existing_bins, to_be_created, existing_histogram_file, input_folder, output_folder logging.debug('Processing file %s' % file_in_path) output_file_name = file_in_path.replace('.root', '_summed.root') output_file_name = output_file_name.replace(input_folder, output_folder) #run rootinfo on file #or read the output (histogram list) input_file = open(existing_histogram_file) seen_summed_hists = False histogram_set = get_set_of_histogram_paths(input_file, seen_summed_hists) logging.debug('Found %d unique b-tag binned histograms' %len(histogram_set)) if seen_summed_hists: logging.warn('Summed histograms have been detected. Will skip this part') return input_file.close() directories = [] for path in histogram_set: histogram_path, histogram_name, b_tag_bin = get_histogram_info_tuple(path) directories.append(histogram_path) logging.debug('opening file %s ' % output_file_name) output_file = File(output_file_name, 'recreate') cd = output_file.cd logging.debug( 'creating folder structure') create_folder_structure(output_file, directories) logging.debug( 'created folder structure') logging.debug('opening file %s ' % file_in_path) input_file = File(file_in_path, 'read') get_histogram = input_file.Get logging.debug('opened file') new_histograms = {} for histogram in histogram_set: cd() logging.debug('Processing histogram: %s' % histogram) histogram_path, histogram_name, b_tag_bin = get_histogram_info_tuple(histogram) logging.debug('Found histogram_path %s' % histogram_path) logging.debug('Found histogram_name %s' % histogram_name) cd(histogram_path) existing_histograms = [get_histogram(histogram + '_' + existing_bin).Clone() for existing_bin in existing_bins] for bin_i, b_tag_bin in enumerate(existing_bins):#write existing b-tag bins current_histogram_name = histogram_name + '_' + b_tag_bin existing_histograms[bin_i].Write(current_histogram_name) for bin_i, b_tag_bin in enumerate(to_be_created):#write new b-tag bins current_histogram_name = histogram_name + '_' + b_tag_bin new_histogram = existing_histograms[bin_i].Clone(current_histogram_name) for existing_histogram in existing_histograms[bin_i + 1:]: new_histogram.Add(existing_histogram) new_histogram.Write(current_histogram_name) input_file.Close() output_file.Close() logging.debug( 'Finished %s' % file_in_path) logging.debug( 'Output: %s' % output_file_name) del new_histograms, histogram_set, input_file, output_file return