def read_fit_templates_and_results_as_histograms( category, channel ):
    global path_to_JSON, variable, met_type, phase_space
    templates = read_data_from_JSON( path_to_JSON + '/fit_results/' + category + '/templates_' + channel + '_' + met_type + '.txt' )

    data_values = read_data_from_JSON( path_to_JSON + '/fit_results/' + category + '/initial_values_' + channel + '_' + met_type + '.txt' )['data']
    fit_results = read_data_from_JSON( path_to_JSON + '/fit_results/' + category + '/fit_results_' + channel + '_' + met_type + '.txt' )
    fit_variables = templates.keys()
    template_histograms = {fit_variable: {} for fit_variable in fit_variables}
    fit_results_histograms = {fit_variable: {} for fit_variable in fit_variables}

    variableBins = None
    if phase_space == 'VisiblePS':
        variableBins = variable_bins_visiblePS_ROOT
    elif phase_space == 'FullPS':
        variableBins = variable_bins_ROOT

    for bin_i, variable_bin in enumerate( variableBins[variable] ):
        for fit_variable in fit_variables:
            h_template_data = value_tuplelist_to_hist( templates[fit_variable]['data'][bin_i], fit_variable_bin_edges[fit_variable] )
            h_template_ttjet =  value_tuplelist_to_hist( templates[fit_variable]['TTJet'][bin_i], fit_variable_bin_edges[fit_variable] )
            h_template_singletop =  value_tuplelist_to_hist( templates[fit_variable]['SingleTop'][bin_i], fit_variable_bin_edges[fit_variable] )
            h_template_VJets = value_tuplelist_to_hist( templates[fit_variable]['V+Jets'][bin_i], fit_variable_bin_edges[fit_variable] )
            h_template_QCD = value_tuplelist_to_hist( templates[fit_variable]['QCD'][bin_i], fit_variable_bin_edges[fit_variable] )
            template_histograms[fit_variable][variable_bin] = {
                                        'TTJet' : h_template_ttjet,
                                        'SingleTop' : h_template_singletop,
                                        'V+Jets':h_template_VJets,
                                        'QCD':h_template_QCD
                                        }
            h_data = h_template_data.Clone()
            h_ttjet = h_template_ttjet.Clone()
            h_singletop = h_template_singletop.Clone()
            h_VJets = h_template_VJets.Clone()
            h_QCD = h_template_QCD.Clone()

            data_normalisation = data_values[bin_i][0]
            n_ttjet = fit_results['TTJet'][bin_i][0]
            n_singletop = fit_results['SingleTop'][bin_i][0]
            VJets_normalisation = fit_results['V+Jets'][bin_i][0]
            QCD_normalisation = fit_results['QCD'][bin_i][0]

            h_data.Scale( data_normalisation )
            h_ttjet.Scale( n_ttjet )
            h_singletop.Scale( n_singletop )
            h_VJets.Scale( VJets_normalisation )
            h_QCD.Scale( QCD_normalisation )
            h_background = h_VJets + h_QCD + h_singletop

            for bin_i_data in range( len( h_data ) ):
                h_data.SetBinError( bin_i_data + 1, sqrt( h_data.GetBinContent( bin_i_data + 1 ) ) )

            fit_results_histograms[fit_variable][variable_bin] = {
                                                    'data' : h_data,
                                                    'signal' : h_ttjet,
                                                    'background' : h_background
                                                    }

    return template_histograms, fit_results_histograms
def compare_unfolding_methods(measurement='normalised_xsection',
                              add_before_unfolding=False, channel='combined'):
    file_template = '/hdfs/TopQuarkGroup/run2/dpsData/'
    file_template += 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += '{measurement}_{channel}_RooUnfold{method}.txt'

    variables = ['MET', 'HT', 'ST', 'NJets',
                 'lepton_pt', 'abs_lepton_eta', 'WPT']
    for variable in variables:
        svd = file_template.format(
            variable=variable,
            method='Svd',
            channel=channel,
            measurement=measurement)
        bayes = file_template.format(
            variable=variable,
            method='Bayes', channel=channel,
            measurement=measurement)
        data = read_data_from_JSON(svd)
        before_unfolding = data['TTJet_measured_withoutFakes']
        svd_data = data['TTJet_unfolded']
        bayes_data = read_data_from_JSON(bayes)['TTJet_unfolded']
        h_svd = value_error_tuplelist_to_hist(
            svd_data, bin_edges_vis[variable])
        h_bayes = value_error_tuplelist_to_hist(
            bayes_data, bin_edges_vis[variable])
        h_before_unfolding = value_error_tuplelist_to_hist(
            before_unfolding, bin_edges_vis[variable])

        properties = Histogram_properties()
        properties.name = '{0}_compare_unfolding_methods_{1}_{2}'.format(
            measurement, variable, channel)
        properties.title = 'Comparison of unfolding methods'
        properties.path = 'plots'
        properties.has_ratio = True
        properties.xerr = True
        properties.x_limits = (
            bin_edges_vis[variable][0], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        if 'xsection' in measurement:
            properties.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + \
                variables_latex[variable] + '}$'
        else:
            properties.y_axis_title = r'$t\bar{t}$ normalisation'

        histograms = {'SVD': h_svd, 'Bayes': h_bayes}
        if add_before_unfolding:
            histograms['before unfolding'] = h_before_unfolding
            properties.name += '_ext'
            properties.has_ratio = False
        plot = Plot(histograms, properties)
        plot.draw_method = 'errorbar'
        compare_histograms(plot)
def compare_combine_before_after_unfolding(measurement='normalised_xsection',
                              add_before_unfolding=False):
    file_template = 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += '{measurement}_{channel}_RooUnfold{method}.txt'

    variables = ['MET', 'HT', 'ST', 'NJets',
                 'lepton_pt', 'abs_lepton_eta', 'WPT']
    for variable in variables:
        combineBefore = file_template.format(
            variable=variable,
            method='Svd',
            channel='combinedBeforeUnfolding',
            measurement=measurement)
        combineAfter = file_template.format(
            variable=variable,
            method='Svd',
            channel='combined',
            measurement=measurement)
        data = read_data_from_JSON(combineBefore)
        before_unfolding = data['TTJet_measured']
        combineBefore_data = data['TTJet_unfolded']
        combineAfter_data = read_data_from_JSON(combineAfter)['TTJet_unfolded']
        h_combineBefore = value_error_tuplelist_to_hist(
            combineBefore_data, bin_edges_vis[variable])
        h_combineAfter = value_error_tuplelist_to_hist(
            combineAfter_data, bin_edges_vis[variable])
        h_before_unfolding = value_error_tuplelist_to_hist(
            before_unfolding, bin_edges_vis[variable])

        properties = Histogram_properties()
        properties.name = '{0}_compare_combine_before_after_unfolding_{1}'.format(
            measurement, variable)
        properties.title = 'Comparison of combining before/after unfolding'
        properties.path = 'plots'
        properties.has_ratio = True
        properties.xerr = True
        properties.x_limits = (
            bin_edges_vis[variable][0], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        if 'xsection' in measurement:
            properties.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + \
                variables_latex[variable] + '}$'
        else:
            properties.y_axis_title = r'$t\bar{t}$ normalisation'

        histograms = {'Combine before unfolding': h_combineBefore, 'Combine after unfolding': h_combineAfter}
        if add_before_unfolding:
            histograms['before unfolding'] = h_before_unfolding
            properties.name += '_ext'
            properties.has_ratio = False
        plot = Plot(histograms, properties)
        plot.draw_method = 'errorbar'
        compare_histograms(plot)
def compare_unfolding_uncertainties():
    file_template = '/hdfs/TopQuarkGroup/run2/dpsData/'
    file_template += 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += 'unfolded_normalisation_combined_RooUnfold{method}.txt'

    variables = ['MET', 'HT', 'ST', 'NJets',
                 'lepton_pt', 'abs_lepton_eta', 'WPT']
#     variables = ['ST']
    for variable in variables:
        svd = file_template.format(
            variable=variable, method='Svd')
        bayes = file_template.format(
            variable=variable, method='Bayes')
        data = read_data_from_JSON(svd)
        before_unfolding = data['TTJet_measured_withoutFakes']
        svd_data = data['TTJet_unfolded']
        bayes_data = read_data_from_JSON(bayes)['TTJet_unfolded']

        before_unfolding = [e / v * 100 for v, e in before_unfolding]
        svd_data = [e / v * 100 for v, e in svd_data]
        bayes_data = [e / v * 100 for v, e in bayes_data]

        h_svd = value_tuplelist_to_hist(
            svd_data, bin_edges_vis[variable])
        h_bayes = value_tuplelist_to_hist(
            bayes_data, bin_edges_vis[variable])
        h_before_unfolding = value_tuplelist_to_hist(
            before_unfolding, bin_edges_vis[variable])

        properties = Histogram_properties()
        properties.name = 'compare_unfolding_uncertainties_{0}'.format(
            variable)
        properties.title = 'Comparison of unfolding uncertainties'
        properties.path = 'plots'
        properties.has_ratio = False
        properties.xerr = True
        properties.x_limits = (
            bin_edges_vis[variable][0], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        properties.y_axis_title = 'relative uncertainty (\\%)'
        properties.legend_location = (0.98, 0.95)

        histograms = {'SVD': h_svd, 'Bayes': h_bayes,
                      'before unfolding': h_before_unfolding}
        plot = Plot(histograms, properties)
        plot.draw_method = 'errorbar'
        compare_histograms(plot)
def compare_combine_before_after_unfolding_uncertainties():
    file_template = 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += 'unfolded_normalisation_{channel}_RooUnfoldSvd.txt'

    variables = ['MET', 'HT', 'ST', 'NJets',
                 'lepton_pt', 'abs_lepton_eta', 'WPT']
#     variables = ['ST']
    for variable in variables:
        beforeUnfolding = file_template.format(
            variable=variable, channel='combinedBeforeUnfolding')
        afterUnfolding = file_template.format(
            variable=variable, channel='combined')
        data = read_data_from_JSON(beforeUnfolding)
        before_unfolding = data['TTJet_measured']
        beforeUnfolding_data = data['TTJet_unfolded']
        afterUnfolding_data = read_data_from_JSON(afterUnfolding)['TTJet_unfolded']

        before_unfolding = [e / v * 100 for v, e in before_unfolding]
        beforeUnfolding_data = [e / v * 100 for v, e in beforeUnfolding_data]
        afterUnfolding_data = [e / v * 100 for v, e in afterUnfolding_data]

        h_beforeUnfolding = value_tuplelist_to_hist(
            beforeUnfolding_data, bin_edges_vis[variable])
        h_afterUnfolding = value_tuplelist_to_hist(
            afterUnfolding_data, bin_edges_vis[variable])
        h_before_unfolding = value_tuplelist_to_hist(
            before_unfolding, bin_edges_vis[variable])

        properties = Histogram_properties()
        properties.name = 'compare_combine_before_after_unfolding_uncertainties_{0}'.format(
            variable)
        properties.title = 'Comparison of unfolding uncertainties'
        properties.path = 'plots'
        properties.has_ratio = False
        properties.xerr = True
        properties.x_limits = (
            bin_edges_vis[variable][0], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        properties.y_axis_title = 'relative uncertainty (\\%)'
        properties.legend_location = (0.98, 0.95)

        histograms = {'Combine before unfolding': h_beforeUnfolding, 'Combine after unfolding': h_afterUnfolding,
                      # 'before unfolding': h_before_unfolding
                      }
        plot = Plot(histograms, properties)
        plot.draw_method = 'errorbar'
        compare_histograms(plot)
def read_unfolded_xsections(channel):
    global path_to_JSON, variable, k_value, met_type, b_tag_bin
    TTJet_xsection_unfolded = {}
    for category in categories:
        normalised_xsections = read_data_from_JSON(path_to_JSON + '/' + variable + '/xsection_measurement_results' + '/kv' + str(k_value) + '/' + category + '/normalised_xsection_' + channel + '_' + met_type + '.txt')
        TTJet_xsection_unfolded[category] = normalised_xsections['TTJet_unfolded']
    return TTJet_xsection_unfolded
def read_normalised_xsection_measurement(options, category):
    '''
    Returns the normalised measurement and normalised unfolded measurement for 
    the file associated with the variable under study
    '''
    variable=options['variable']
    variables_no_met=options['variables_no_met']
    met_specific_systematics=options['met_specific_systematics']
    path_to_JSON=options['path_to_JSON']
    method=options['method']
    channel=options['channel']
    filename = '{path}/{category}/normalised_xsection_{channel}_{method}.txt'
    # Disregarding Met Uncertainties if variable does not use MET
    if (category in met_specific_systematics) and (variable in variables_no_met):
        filename = filename.format(
            path = path_to_JSON,
            channel = channel,
            category = 'central',
            method = method,
        )
    else:
        filename = filename.format(
            path = path_to_JSON,
            channel = channel,
            category = category,
            method = method
        )
    normalised_xsection = read_data_from_JSON( filename )
    measurement = normalised_xsection['TTJet_measured']#should this be measured without fakes???
    measurement_unfolded = normalised_xsection['TTJet_unfolded']
    return measurement, measurement_unfolded  
def get_fitted_normalisation(variable, channel):
    global path_to_JSON, category, met_type
    fit_results = read_data_from_JSON(path_to_JSON + variable + '/fit_results/' + category + '/fit_results_' + channel + '_' + met_type + '.txt')

    N_fit_ttjet = [0, 0]
    N_fit_singletop = [0, 0]
    N_fit_vjets = [0, 0]
    N_fit_qcd = [0, 0]

    bins = variable_bins_ROOT[variable]
    for bin_i, _ in enumerate(bins):
        #central values
        N_fit_ttjet[0] += fit_results['TTJet'][bin_i][0]
        N_fit_singletop[0] += fit_results['SingleTop'][bin_i][0]
        N_fit_vjets[0] += fit_results['V+Jets'][bin_i][0]
        N_fit_qcd[0] += fit_results['QCD'][bin_i][0]

        #errors
        N_fit_ttjet[1] += fit_results['TTJet'][bin_i][1]
        N_fit_singletop[1] += fit_results['SingleTop'][bin_i][1]
        N_fit_vjets[1] += fit_results['V+Jets'][bin_i][1]
        N_fit_qcd[1] += fit_results['QCD'][bin_i][1]

    fitted_normalisation = {
                'TTJet': N_fit_ttjet,
                'SingleTop': N_fit_singletop,
                'V+Jets': N_fit_vjets,
                'QCD': N_fit_qcd
                }
    return fitted_normalisation
def read_xsection_measurement_results(category, channel):
    global path_to_JSON, variable, k_value, met_type
    normalised_xsection_unfolded = read_data_from_JSON(path_to_JSON + '/' + variable + '/xsection_measurement_results' + '/kv' + str(k_value) + '/' + category + '/normalised_xsection_' + channel + '_' + met_type + '.txt')
    h_normalised_xsection = value_error_tuplelist_to_hist(normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable])
    h_normalised_xsection_unfolded = value_error_tuplelist_to_hist(normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable])
    h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist(normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable])
    h_normalised_xsection_POWHEG = value_error_tuplelist_to_hist(normalised_xsection_unfolded['POWHEG'], bin_edges[variable])
    h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist(normalised_xsection_unfolded['MCATNLO'], bin_edges[variable])
    h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist(normalised_xsection_unfolded['matchingup'], bin_edges[variable])
    h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist(normalised_xsection_unfolded['matchingdown'], bin_edges[variable])
    h_normalised_xsection_scaleup = value_error_tuplelist_to_hist(normalised_xsection_unfolded['scaleup'], bin_edges[variable])
    h_normalised_xsection_scaledown = value_error_tuplelist_to_hist(normalised_xsection_unfolded['scaledown'], bin_edges[variable])
    
    histograms_normalised_xsection_different_generators = {
                  'measured':h_normalised_xsection,
                  'unfolded':h_normalised_xsection_unfolded,
                  'MADGRAPH':h_normalised_xsection_MADGRAPH,
                  'POWHEG':h_normalised_xsection_POWHEG,
                  'MCATNLO':h_normalised_xsection_MCATNLO
                  }
    
    histograms_normalised_xsection_systematics_shifts = {
                  'measured':h_normalised_xsection,
                  'unfolded':h_normalised_xsection_unfolded,
                  'matchingdown': h_normalised_xsection_mathchingdown,
                  'matchingup': h_normalised_xsection_mathchingup,
                  'scaledown': h_normalised_xsection_scaledown,
                  'scaleup': h_normalised_xsection_scaleup
                  }
    
    return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def main(options, args):
    config = XSectionConfig(options.CoM)
    variables = ['MET', 'HT', 'ST', 'WPT']
    channels = ['electron', 'muon', 'combined']
    m_file = 'normalised_xsection_patType1CorrectedPFMet.txt'
    m_with_errors_file = 'normalised_xsection_patType1CorrectedPFMet_with_errors.txt'
    path_template = args[0]
    output_file = 'measurement_{0}TeV.root'.format(options.CoM)
    f = File(output_file, 'recreate')
    for channel in channels:
        d = f.mkdir(channel)
        d.cd()
        for variable in variables:
            dv = d.mkdir(variable)
            dv.cd()
            if channel == 'combined':
                path = path_template.format(variable=variable,
                                            channel=channel,
                                            centre_of_mass_energy=options.CoM)
            else:
                kv = channel + \
                    '/kv{0}/'.format(config.k_values[channel][variable])
                path = path_template.format(variable=variable,
                                            channel=kv,
                                            centre_of_mass_energy=options.CoM)

            m = read_data_from_JSON(path + '/' + m_file)
            m_with_errors = read_data_from_JSON(
                path + '/' + m_with_errors_file)

            for name, result in m.items():
                h = make_histogram(result, bin_edges_full[variable])
                h.SetName(name)
                h.write()

            for name, result in m_with_errors.items():
                if not 'TTJet' in name:
                    continue
                h = make_histogram(result, bin_edges_full[variable])
                h.SetName(name + '_with_syst')
                h.write()
            dv.write()
            d.cd()
        d.write()
    f.write()
    f.close()
def main():
    global config, options
    parser = OptionParser()
    parser.add_option( "-p", "--path", dest = "path", default = 'data/fit_checks/no_merging',
                  help = "set path to JSON files" )
    parser.add_option( '--create_fit_data', dest = "create_fit_data", action = "store_true",
                      help = "create the fit data for testing." )
    parser.add_option( '--refit', dest = "refit", action = "store_true",
                      help = "Fit again even if the output already exists" )
    parser.add_option( '--test', dest = "test", action = "store_true",
                      help = "Test only: run just one selected sample" )
    variables = config.histogram_path_templates.keys()
    fit_variables = fit_var_inputs
    mc_samples = ['TTJet', 'SingleTop', 'QCD', 'V+Jets']
    tests = closure_tests
    channels = ['electron', 'muon']
    COMEnergies = ['7', '8']
    
    ( options, _ ) = parser.parse_args()
    print 'Running from path', options.path 
    
    if ( options.create_fit_data ):
        create_fit_data( options.path, variables, fit_variables, mc_samples,
                        COMEnergies = COMEnergies, channels = channels )
     
    output_file = options.path + '/fit_test_output.txt'
    if options.test:
        output_file = options.path + '/fit_test_output_test.txt'
    if options.refit or not os.path.isfile( output_file ) or options.test:
        if os.path.isfile( options.path + '/fit_check_data.txt' ):
            fit_data = read_data_from_JSON( options.path + '/fit_check_data.txt' )
            results = run_tests( fit_data,
                                COMEnergies,
                                variables,
                                fit_variables,
                                mc_samples,
                                channels,
                                tests )
            write_data_to_JSON(results, output_file )
        else:
            print 'Please run bin/prepare_data_for_fit_checks first'
            print 'Then run this script with the option --create_fit_data.'
     
    results = read_data_from_JSON(output_file)
    plot_results( results )
def read_fit_templates_and_results_as_histograms(category, channel):
    global path_to_JSON, variable, met_type
    templates = read_data_from_JSON(path_to_JSON + '/' + variable + '/fit_results/' + category + '/templates_' + channel + '_' + met_type + '.txt')
    data_values = read_data_from_JSON(path_to_JSON + '/' + variable + '/fit_results/' + category + '/initial_values_' + channel + '_' + met_type + '.txt')['data']
    fit_results = read_data_from_JSON(path_to_JSON + '/' + variable + '/fit_results/' + category + '/fit_results_' + channel + '_' + met_type + '.txt')
    template_histograms = {}
    fit_results_histograms = {}
    for bin_i, variable_bin in enumerate(variable_bins_ROOT[variable]):
        h_template_data = value_tuplelist_to_hist(templates['data'][bin_i], eta_bin_edges)
        h_template_signal = value_tuplelist_to_hist(templates['signal'][bin_i], eta_bin_edges)
        h_template_VJets = value_tuplelist_to_hist(templates['V+Jets'][bin_i], eta_bin_edges)
        h_template_QCD = value_tuplelist_to_hist(templates['QCD'][bin_i], eta_bin_edges)
        template_histograms[variable_bin] = {
                                    'signal':h_template_signal,
                                    'V+Jets':h_template_VJets,
                                    'QCD':h_template_QCD
                                    }
        h_data = h_template_data.Clone()
        h_signal = h_template_signal.Clone()
        h_VJets = h_template_VJets.Clone()
        h_QCD = h_template_QCD.Clone()
        
        data_normalisation = data_values[bin_i]
        signal_normalisation = fit_results['signal'][bin_i][0]
        VJets_normalisation = fit_results['V+Jets'][bin_i][0]
        QCD_normalisation = fit_results['QCD'][bin_i][0]
        
        h_data.Scale(data_normalisation)
        h_signal.Scale(signal_normalisation)
        h_VJets.Scale(VJets_normalisation)
        h_QCD.Scale(QCD_normalisation)
        h_background = h_VJets + h_QCD
        
        for bin_i in range(len(h_data)):
            h_data.SetBinError(bin_i+1, sqrt(h_data.GetBinContent(bin_i+1)))
        
        fit_results_histograms[variable_bin] = {
                                                'data':h_data,
                                                'signal':h_signal,
                                                'background':h_background
                                                }
        
    return template_histograms, fit_results_histograms
def main():
    '''
    1 - Read Config file for normalisation measurement
    2 - Run measurement
    3 - Combine measurement before unfolding
    '''
    results = {}

    # config file template
    input_template = 'config/measurements/background_subtraction/{com}TeV/{ch}/{var}/{ps}/'

    ps = 'FullPS'
    if args.visiblePS:
        ps = 'VisiblePS'

    for ch in ['electron', 'muon']:
        for var in measurement_config.variables:
            if args.variable not in var: continue

            # Create measurement_filepath
            measurement_filepath = input_template.format(
                com = args.CoM,
                ch = ch,
                var = var,
                ps = ps,
            )
            
            # Get all config files in measurement_filepath
            measurement_files = get_files_in_path(measurement_filepath, file_ending='.json')

            for f in sorted(measurement_files):
                if args.test:
                    if 'central' not in f: continue
                print('Processing file ' + f)
                # Read in Measurement JSON
                config = read_data_from_JSON(f)

                if 'electron' in ch:
                    # Create Measurement Class using JSON
                    electron_measurement = Measurement(config)
                    electron_measurement.calculate_normalisation()
                    electron_measurement.save(ps)
                elif 'muon' in ch:
                    # Create Measurement Class using JSON
                    muon_measurement = Measurement(config)
                    muon_measurement.calculate_normalisation()
                    muon_measurement.save(ps)
                # break

    # Combining the channels before unfolding
    combined_measurement = electron_measurement
    combined_measurement.combine(muon_measurement)
    combined_measurement.save(ps)
    return
示例#14
0
def read_from_fit_results_folder(path_to_JSON='data',
                                 variable='MET',
                                 category='central',
                                 channel='combined',
                                 met_type='patType1CorrectedPFMet',
                                 data_type='fit_results'):
    filename = path_to_JSON + '/' + category + '/'
    filename += data_type + '_' + channel + '_' + met_type + '.txt'
    results = read_data_from_JSON(filename)

    return results
def get_data(file_name, subset=''):
    # this takes a LOT of memory, please use subset!!
    all_data = []
    extend = all_data.extend
    data = read_data_from_JSON(file_name)

    if subset:
        for entry in data:  # loop over all data entries
            extend(entry[subset])
    else:
        extend(data)

    return all_data
示例#16
0
def read_unfolded_normalisation(
        path_to_JSON='data',
        variable='MET',
        category='central',
        channel='combined',
        met_type='patType1CorrectedPFMet'):
    new_path = '{path}/xsection_measurement_results/{channel}/{category}/{file}'
    result_file = 'normalisation_{0}.txt'.format(met_type)
    new_path = new_path.format(
        path=path_to_JSON,
        channel=channel,
        category=category,
        file=result_file,
    )
    return read_data_from_JSON(new_path)
def parse_options():
    parser = OptionParser( __doc__ )

    ( options, args ) = parser.parse_args()
    
    input_values_sets = []
    json_input_files = []
    add_set = input_values_sets.append
    add_json_file = json_input_files.append
    for arg in args:
        input_values = read_data_from_JSON( arg )
        add_set( input_values )
        add_json_file( arg )

    return options, input_values_sets, json_input_files
    def __set_unfolding_histograms__( self ):
        # at the moment only one file is supported for the unfolding input
        files = set( [self.truth['file'],
                     self.gen_vs_reco['file'],
                     self.measured['file']]
                    )
        if len( files ) > 1:
            print "Currently not supported to have different files for truth, gen_vs_reco and measured"
            sys.exit()
            
        input_file = files.pop()
        visiblePS = self.phaseSpace

        t, m, r, f = get_unfold_histogram_tuple( 
            File(input_file),
            self.variable,
            self.channel,
            centre_of_mass = self.centre_of_mass_energy,
            ttbar_xsection=self.measurement_config.ttbar_xsection,
            luminosity=self.measurement_config.luminosity,
            load_fakes = True,
            visiblePS = visiblePS
        )

        self.h_truth = asrootpy ( t )
        self.h_response = asrootpy ( r )
        self.h_measured = asrootpy ( m )
        self.h_fakes = asrootpy ( f )
        self.h_refolded = None

        data_file = self.data['file']
        if data_file.endswith('.root'):
            self.h_data = get_histogram_from_file(self.data['histogram'], self.data['file'])
        elif data_file.endswith('.json') or data_file.endswith('.txt'):
            data_key = self.data['histogram']
            # assume configured bin edges
            edges = []
            edges = reco_bin_edges_vis[self.variable]

            json_input = read_data_from_JSON(data_file)

            if data_key == "": # JSON file == histogram
                self.h_data = value_error_tuplelist_to_hist(json_input, edges)
            else:
                self.h_data = value_error_tuplelist_to_hist(json_input[data_key], edges)
        else:
            print 'Unkown file extension', data_file.split('.')[-1]
def parse_options():
    '''
    parse the config jsons from command line and read the contents of the json files
    '''
    parser = ArgumentParser( __doc__ )
    parser.add_argument("in_files",
        nargs='*',
        help="List of the input files")
    parser.add_argument( "-t", "--test", 
        dest = "run_measured_as_data",
        action = "store_true", 
        help = "For debugging - run the measured distribution as data." ) 
    parser.add_argument( "-v", "--vary_measured_test", 
        dest = "perform_varied_measured_unfolding_test",
        action = "store_true", 
        help = "Unfolding test. Vary measured vals by Poisson then find ChiSq" ) 
    parser.add_argument( "-p", "--refold_plots", 
        dest = "run_refold_plots",
        action = "store_true", 
        help = "For debugging - output unfolded vs refolded for each tau" ) 
    parser.add_argument( "-n", "--n_ticks_in_log", 
        dest = "n_ticks_in_log",
        default = 10,
        type = int,
        help = "How many taus in the range do you want" ) 
    parser.add_argument( "-u", "--unfolded_binning", 
        dest = "unfolded_binning",
        action = "store_true", 
        help = "Run the tau scans for unfolded (gen) binning" ) 
    args = parser.parse_args()

    if args.unfolded_binning:
        print "Calculating the chi2 in the unfolded (gen) binning scheme"

    input_values_sets = []
    json_input_files = []
    add_set = input_values_sets.append
    add_json_file = json_input_files.append
    for arg in args.in_files:
        input_values = read_data_from_JSON( arg )
        add_set( input_values )
        add_json_file( arg )

    return args, input_values_sets, json_input_files
def parse_options():
    parser = OptionParser( __doc__ )
    parser.add_option( "-c", "--compare", dest = "compare", action = "store_true",
                      help = "Compare to current values (k vs tau)", default = False )
    parser.add_option( "-t", "--table-style", dest = "style", default = 'simple',
                      help = "Style for table printing: simple|latex|twiki (default = simple)" )

    ( options, args ) = parser.parse_args()
    
    input_values_sets = []
    json_input_files = []
    add_set = input_values_sets.append
    add_json_file = json_input_files.append
    for arg in args:
        input_values = read_data_from_JSON( arg )
        add_set( input_values )
        add_json_file( arg )

    return options, input_values_sets, json_input_files
def main():
    '''
    1 - Read Config file for normalisation measurement
    2 - Run measurement
    3 - Combine measurement before unfolding
    '''
    results = {}

    # config file template
    input_template = 'config/measurements/background_subtraction/{com}TeV/{ch}/{var}/{ps}/'
    output_folder_template = 'data/normalisation/background_subtraction/{com}TeV/{var}/{ps}/{cat}/'

    ps = 'FullPS'
    if args.visiblePS:
        ps = 'VisiblePS'

    channels = [
        'electron',
        'muon',
    ]

    for ch in channels:
        for var in measurement_config.variables:
            if not args.variable == var: continue
            qcd_transfer_factor = {}

            # Create measurement_filepath
            measurement_filepath = input_template.format(
                com=args.CoM,
                ch=ch,
                var=var,
                ps=ps,
            )

            # Get all config files in measurement_filepath
            measurement_files = get_files_in_path(measurement_filepath,
                                                  file_ending='.json')

            for f in sorted(measurement_files):
                if args.test:
                    if 'central' not in f: continue
                print('Processing file ' + f)
                sample = get_filename_without_extension(f)

                # Read in Measurement JSON
                config = read_data_from_JSON(f)

                if 'electron' in ch:
                    # Create Measurement Class using JSON
                    electron_measurement = Measurement(config)
                    electron_measurement.calculate_normalisation()
                    electron_measurement.save(ps)
                    qcd_transfer_factor[sample] = [
                        electron_measurement.t_factor
                    ]

                elif 'muon' in ch:
                    # Create Measurement Class using JSON
                    muon_measurement = Measurement(config)
                    muon_measurement.calculate_normalisation()
                    muon_measurement.save(ps)
                    qcd_transfer_factor[sample] = [muon_measurement.t_factor]

            output_folder = output_folder_template.format(
                com=args.CoM,
                var=var,
                ps=ps,
                cat='central',
            )
            store_transfer_factor(qcd_transfer_factor, output_folder, ch)
    return
def get_variable_from(variable='MET',
                      path_to_JSON='data/8TeV',
                      category='central',
                      signal='Higgs',
                      measurement_type='unfolded'):
    global met_type

    channel = 'electron'
    electron_results = read_data_from_JSON(
        path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' +
        category + '/normalisation_' + channel + '_' + met_type + '.txt')
    electron_results_signal = read_data_from_JSON(
        path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' +
        category + '/normalisation_' + channel + '_' + met_type + '_' +
        signal + '.txt')
    channel = 'muon'
    muon_results = read_data_from_JSON(path_to_JSON + '/' + variable +
                                       '/xsection_measurement_results/kv4/' +
                                       category + '/normalisation_' + channel +
                                       '_' + met_type + '.txt')
    muon_results_signal = read_data_from_JSON(
        path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' +
        category + '/normalisation_' + channel + '_' + met_type + '_' +
        signal + '.txt')
    channel = 'combined'
    combined_results = read_data_from_JSON(
        path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' +
        category + '/normalisation_' + channel + '_' + met_type + '.txt')
    combined_results_signal = read_data_from_JSON(
        path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' +
        category + '/normalisation_' + channel + '_' + met_type + '_' +
        signal + '.txt')

    # we are only interested in the measured ttbar, unfolded TTbar for all categories
    # for_all_categories = ['TTJet_unfolded', 'TTJet_measured']
    # and the scale_up/down, matching up/down and the generators for central (all)
    electron_results_selected = {
        'Higgs_125': electron_results_signal['TTJet_' + measurement_type],
    }
    muon_results_selected = {
        'Higgs_125': muon_results_signal['TTJet_' + measurement_type],
    }
    combined_results_selected = {
        'Higgs_125': combined_results_signal['TTJet_' + measurement_type],
    }

    # note on systematics:
    # theta does not understand DATA systematics so they have to be migrated to MADGRAPH
    # there are two ways of doing so:
    # 1. calculate a scale factor based on data central measurement and MADPGRAPH
    # 2. calculate a scale factor based on data central and systematic measurement
    # then apply the scale factor (1- [a-b]/a) to MADGRAPH
    # The second approach seems more right as it takes into account the effect of the systematic instead of the difference between data and Madgraph
    # A way to test this is to calculate the scale factor for ttjet_matching up/down and compare it to the existing distributions
    if measurement_type == 'unfolded':
        electron_results_selected['TTJet'] = electron_results['MADGRAPH']
        muon_results_selected['TTJet'] = muon_results['MADGRAPH']
        combined_results_selected['TTJet'] = combined_results['MADGRAPH']

        # theta does not understand DATA with systematics
        if category == 'central':
            electron_results_selected['DATA'] = electron_results[
                'TTJet_unfolded']
            muon_results_selected['DATA'] = muon_results['TTJet_unfolded']
            combined_results_selected['DATA'] = combined_results[
                'TTJet_unfolded']
        else:  # now let's do the systematics
            # read central results
            channel = 'electron'
            tmp_category = 'central'
            tmp_electron_results = read_data_from_JSON(
                path_to_JSON + '/' + variable +
                '/xsection_measurement_results/kv4/' + tmp_category +
                '/normalisation_' + channel + '_' + met_type + '.txt')
            channel = 'muon'
            tmp_muon_results = read_data_from_JSON(
                path_to_JSON + '/' + variable +
                '/xsection_measurement_results/kv4/' + tmp_category +
                '/normalisation_' + channel + '_' + met_type + '.txt')
            channel = 'combined'
            tmp_combined_results = read_data_from_JSON(
                path_to_JSON + '/' + variable +
                '/xsection_measurement_results/kv4/' + tmp_category +
                '/normalisation_' + channel + '_' + met_type + '.txt')

            electron_central = tmp_electron_results['TTJet_unfolded']
            muon_central = tmp_muon_results['TTJet_unfolded']
            combined_central = tmp_combined_results['TTJet_unfolded']
            # set systematics
            electron_systematic = electron_results['TTJet_unfolded']
            muon_systematic = muon_results['TTJet_unfolded']
            combined_systematic = combined_results['TTJet_unfolded']
            # calculate scale factors
            electron_results_selected['TTJet'] = morph_systematic(
                electron_central, electron_systematic,
                electron_results_selected['TTJet'])
            muon_results_selected['TTJet'] = morph_systematic(
                muon_central, muon_systematic, muon_results_selected['TTJet'])
            combined_results_selected['TTJet'] = morph_systematic(
                combined_central, combined_systematic,
                combined_results_selected['TTJet'])

    else:
        if category == 'JES_up':
            met_type += 'JetEnUp'
        elif category == 'JES_down':
            met_type += 'JetEnDown'
        # read initial values
        channel = 'electron'
        electron_initial = read_data_from_JSON(path_to_JSON + '/' + variable +
                                               '/fit_results/' + category +
                                               '/initial_values_' + channel +
                                               '_' + met_type + '.txt')
        electron_fit = read_data_from_JSON(path_to_JSON + '/' + variable +
                                           '/fit_results/' + category +
                                           '/fit_results_' + channel + '_' +
                                           met_type + '.txt')
        channel = 'muon'
        muon_initial = read_data_from_JSON(path_to_JSON + '/' + variable +
                                           '/fit_results/' + category +
                                           '/initial_values_' + channel + '_' +
                                           met_type + '.txt')
        muon_fit = read_data_from_JSON(path_to_JSON + '/' + variable +
                                       '/fit_results/' + category +
                                       '/fit_results_' + channel + '_' +
                                       met_type + '.txt')

        electron_results_selected['TTJet'] = electron_results['TTJet_measured']
        muon_results_selected['TTJet'] = muon_results['TTJet_measured']

        electron_results_selected['SingleTop'] = electron_fit['SingleTop']
        muon_results_selected['SingleTop'] = muon_fit['SingleTop']

        electron_results_selected['QCD'] = electron_fit['QCD']
        muon_results_selected['QCD'] = muon_fit['QCD']
        # theta does not understand DATA with systematics
        if category == 'central':
            electron_results_selected['DATA'] = electron_initial['data']
            muon_results_selected['DATA'] = muon_initial['data']

        electron_results_selected['VJets'] = electron_fit['V+Jets']
        muon_results_selected['VJets'] = muon_fit['V+Jets']

        # reset met for JES
        if category == 'JES_up':
            met_type = met_type.replace('JetEnUp', '')
        elif category == 'JES_down':
            met_type = met_type.replace('JetEnDown', '')

    return electron_results_selected, muon_results_selected, combined_results_selected
示例#23
0
 def fromJSON(json_file):
     src = read_data_from_JSON(json_file)
     i = Input(**src)
     return i
def debug_last_bin():
    '''
        For debugging why the last bin in the problematic variables deviates a
        lot in _one_ of the channels only.
    '''
    file_template = '/hdfs/TopQuarkGroup/run2/dpsData/'
    file_template += 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += 'normalised_xsection_{channel}_RooUnfoldSvd{suffix}.txt'
    problematic_variables = ['HT', 'MET', 'NJets', 'lepton_pt']

    for variable in problematic_variables:
        results = {}
        Result = namedtuple(
            'Result', ['before_unfolding', 'after_unfolding', 'model'])
        for channel in ['electron', 'muon', 'combined']:
            input_file_data = file_template.format(
                variable=variable,
                channel=channel,
                suffix='_with_errors',
            )
            input_file_model = file_template.format(
                variable=variable,
                channel=channel,
                suffix='',
            )
            data = read_data_from_JSON(input_file_data)
            data_model = read_data_from_JSON(input_file_model)
            before_unfolding = data['TTJet_measured_withoutFakes']
            after_unfolding = data['TTJet_unfolded']

            model = data_model['powhegPythia8']

            # only use the last bin
            h_before_unfolding = value_errors_tuplelist_to_graph(
                [before_unfolding[-1]], bin_edges_vis[variable][-2:])
            h_after_unfolding = value_errors_tuplelist_to_graph(
                [after_unfolding[-1]], bin_edges_vis[variable][-2:])
            h_model = value_error_tuplelist_to_hist(
                [model[-1]], bin_edges_vis[variable][-2:])

            r = Result(before_unfolding, after_unfolding, model)
            h = Result(h_before_unfolding, h_after_unfolding, h_model)
            results[channel] = (r, h)

        models = {'POWHEG+PYTHIA': results['combined'][1].model}
        h_unfolded = [results[channel][1].after_unfolding for channel in [
            'electron', 'muon', 'combined']]
        tmp_hists = spread_x(h_unfolded, bin_edges_vis[variable][-2:])
        measurements = {}
        for channel, hist in zip(['electron', 'muon', 'combined'], tmp_hists):
            value = results[channel][0].after_unfolding[-1][0]
            error = results[channel][0].after_unfolding[-1][1]
            label = '{c_label} ({value:1.2g} $\pm$ {error:1.2g})'.format(
                    c_label=channel,
                    value=value,
                    error=error,
            )
            measurements[label] = hist

        properties = Histogram_properties()
        properties.name = 'normalised_xsection_compare_channels_{0}_{1}_last_bin'.format(
            variable, channel)
        properties.title = 'Comparison of channels'
        properties.path = 'plots'
        properties.has_ratio = True
        properties.xerr = False
        properties.x_limits = (
            bin_edges_vis[variable][-2], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        properties.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + \
            variables_latex[variable] + '}$'
        properties.legend_location = (0.95, 0.40)
        if variable == 'NJets':
            properties.legend_location = (0.97, 0.80)
        properties.formats = ['png']

        compare_measurements(models=models, measurements=measurements, show_measurement_errors=True,
                             histogram_properties=properties, save_folder='plots/', save_as=properties.formats)
示例#25
0
'''
Created on 23 Jan 2015

@author: phxlk
'''
from dps.utils.file_utilities import read_data_from_JSON

if __name__ == '__main__':
    JSON_input_file = 'data/absolute_eta_M3_angle_bl/7TeV/HT/fit_results/central/fit_results_muon_patType1CorrectedPFMet.txt'
    normalisation = read_data_from_JSON(JSON_input_file)
    absolute_total_value = 0
    absolute_total_error = 0
    absolute_total_corrected_error = 0
    for sample, fit_result in normalisation.iteritems():
        print 'Calculating total # events for sample "%s"' % sample
        total_events = 0
        total_error = 0
        total_corrected_error = 0
        # loop over binsZ
        for result in fit_result:
            value = result[0]
            error = result[1]
            total_events += value
            total_error += error
        if total_error > total_events:
            total_corrected_error = total_events
        else:
            total_corrected_error = total_error
        print 'Total number of events %d += %d (%d)' % (
            total_events, total_corrected_error, total_error)
        absolute_total_value += total_events
示例#26
0
def read_xsection_measurement_results(path_to_JSON, variable, bin_edges,
                                      category,
                                      channel,
                                      k_values,
                                      met_type='patType1CorrectedPFMet',
                                      met_uncertainties=[]):

    filename = ''
    if category in met_uncertainties and variable == 'HT' and not 'JES' in category and not 'JER' in category:
        filename = path_to_JSON + '/xsection_measurement_results/' + \
            channel + '/central/normalised_xsection_' + met_type + '.txt'
    else:
        filename = path_to_JSON + '/xsection_measurement_results/' + channel + \
            '/' + category + '/normalised_xsection_' + met_type + '.txt'

    if channel == 'combined':
        filename = filename.replace('kv' + str(k_values[channel]), '')

    normalised_xsection_unfolded = read_data_from_JSON(filename)

    h_normalised_xsection = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable])
    h_normalised_xsection_unfolded = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable])

    histograms_normalised_xsection_different_generators = {'measured': h_normalised_xsection,
                                                           'unfolded': h_normalised_xsection_unfolded}

    histograms_normalised_xsection_systematics_shifts = {'measured': h_normalised_xsection,
                                                         'unfolded': h_normalised_xsection_unfolded}

    if category == 'central':
        # true distributions
        h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable])
        h_normalised_xsection_POWHEG_PYTHIA = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['POWHEG_PYTHIA'], bin_edges[variable])
        h_normalised_xsection_POWHEG_HERWIG = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['POWHEG_HERWIG'], bin_edges[variable])
        h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['MCATNLO'], bin_edges[variable])
        h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['matchingup'], bin_edges[variable])
        h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['matchingdown'], bin_edges[variable])
        h_normalised_xsection_scaleup = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['scaleup'], bin_edges[variable])
        h_normalised_xsection_scaledown = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['scaledown'], bin_edges[variable])

        histograms_normalised_xsection_different_generators.update({'MADGRAPH': h_normalised_xsection_MADGRAPH,
                                                                    'POWHEG_PYTHIA': h_normalised_xsection_POWHEG_PYTHIA,
                                                                    'POWHEG_HERWIG': h_normalised_xsection_POWHEG_HERWIG,
                                                                    'MCATNLO': h_normalised_xsection_MCATNLO})

        histograms_normalised_xsection_systematics_shifts.update({'MADGRAPH': h_normalised_xsection_MADGRAPH,
                                                                  'matchingdown': h_normalised_xsection_mathchingdown,
                                                                  'matchingup': h_normalised_xsection_mathchingup,
                                                                  'scaledown': h_normalised_xsection_scaledown,
                                                                  'scaleup': h_normalised_xsection_scaleup})

        file_template = path_to_JSON + '/xsection_measurement_results/' + channel + \
            '/kv' + str(k_values[channel]) + '/' + \
            category + '/normalised_xsection_' + met_type
        if channel == 'combined':
            file_template = file_template.replace(
                'kv' + str(k_values[channel]), '')
#         normalised_xsection_unfolded_with_errors = read_data_from_JSON( file_template + '_with_errors.txt' )
        normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON(
            file_template + '_with_systematics_but_without_ttbar_theory_errors.txt')
        normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = read_data_from_JSON(
            file_template + '_with_systematics_but_without_generator_errors.txt')

        # a rootpy.Graph with asymmetric errors!
        h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph(
            normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[
                'TTJet_measured'],
            bin_edges[variable])
        h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph(
            normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[
                'TTJet_unfolded'],
            bin_edges[variable])

        h_normalised_xsection_with_systematics_but_without_generator = value_errors_tuplelist_to_graph(
            normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[
                'TTJet_measured'],
            bin_edges[variable])
        h_normalised_xsection_with_systematics_but_without_generator_unfolded = value_errors_tuplelist_to_graph(
            normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[
                'TTJet_unfolded'],
            bin_edges[variable])

        histograms_normalised_xsection_different_generators[
            'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator
        histograms_normalised_xsection_different_generators[
            'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator_unfolded

        histograms_normalised_xsection_systematics_shifts[
            'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory
        histograms_normalised_xsection_systematics_shifts[
            'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded

    return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def getControlRegionHistogramsFromFile(file):
	config = read_data_from_JSON(file)
	measurement = Measurement( config )
	return measurement.cr_histograms
示例#28
0
def read_xsection_measurement_results(path_to_JSON,
                                      variable,
                                      bin_edges,
                                      category,
                                      channel,
                                      k_values,
                                      met_type='patType1CorrectedPFMet',
                                      met_uncertainties=[]):

    filename = ''
    if category in met_uncertainties and variable == 'HT' and not 'JES' in category and not 'JER' in category:
        filename = path_to_JSON + '/xsection_measurement_results/' + \
            channel + '/central/normalised_xsection_' + met_type + '.txt'
    else:
        filename = path_to_JSON + '/xsection_measurement_results/' + channel + \
            '/' + category + '/normalised_xsection_' + met_type + '.txt'

    if channel == 'combined':
        filename = filename.replace('kv' + str(k_values[channel]), '')

    normalised_xsection_unfolded = read_data_from_JSON(filename)

    h_normalised_xsection = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable])
    h_normalised_xsection_unfolded = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable])

    histograms_normalised_xsection_different_generators = {
        'measured': h_normalised_xsection,
        'unfolded': h_normalised_xsection_unfolded
    }

    histograms_normalised_xsection_systematics_shifts = {
        'measured': h_normalised_xsection,
        'unfolded': h_normalised_xsection_unfolded
    }

    if category == 'central':
        # true distributions
        h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable])
        h_normalised_xsection_POWHEG_PYTHIA = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['POWHEG_PYTHIA'], bin_edges[variable])
        h_normalised_xsection_POWHEG_HERWIG = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['POWHEG_HERWIG'], bin_edges[variable])
        h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['MCATNLO'], bin_edges[variable])
        h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['matchingup'], bin_edges[variable])
        h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['matchingdown'], bin_edges[variable])
        h_normalised_xsection_scaleup = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['scaleup'], bin_edges[variable])
        h_normalised_xsection_scaledown = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['scaledown'], bin_edges[variable])

        histograms_normalised_xsection_different_generators.update({
            'MADGRAPH':
            h_normalised_xsection_MADGRAPH,
            'POWHEG_PYTHIA':
            h_normalised_xsection_POWHEG_PYTHIA,
            'POWHEG_HERWIG':
            h_normalised_xsection_POWHEG_HERWIG,
            'MCATNLO':
            h_normalised_xsection_MCATNLO
        })

        histograms_normalised_xsection_systematics_shifts.update({
            'MADGRAPH':
            h_normalised_xsection_MADGRAPH,
            'matchingdown':
            h_normalised_xsection_mathchingdown,
            'matchingup':
            h_normalised_xsection_mathchingup,
            'scaledown':
            h_normalised_xsection_scaledown,
            'scaleup':
            h_normalised_xsection_scaleup
        })

        file_template = path_to_JSON + '/xsection_measurement_results/' + channel + \
            '/kv' + str(k_values[channel]) + '/' + \
            category + '/normalised_xsection_' + met_type
        if channel == 'combined':
            file_template = file_template.replace(
                'kv' + str(k_values[channel]), '')


#         normalised_xsection_unfolded_with_errors = read_data_from_JSON( file_template + '_with_errors.txt' )
        normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON(
            file_template +
            '_with_systematics_but_without_ttbar_theory_errors.txt')
        normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = read_data_from_JSON(
            file_template +
            '_with_systematics_but_without_generator_errors.txt')

        # a rootpy.Graph with asymmetric errors!
        h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph(
            normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[
                'TTJet_measured'], bin_edges[variable])
        h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph(
            normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[
                'TTJet_unfolded'], bin_edges[variable])

        h_normalised_xsection_with_systematics_but_without_generator = value_errors_tuplelist_to_graph(
            normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[
                'TTJet_measured'], bin_edges[variable])
        h_normalised_xsection_with_systematics_but_without_generator_unfolded = value_errors_tuplelist_to_graph(
            normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[
                'TTJet_unfolded'], bin_edges[variable])

        histograms_normalised_xsection_different_generators[
            'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator
        histograms_normalised_xsection_different_generators[
            'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator_unfolded

        histograms_normalised_xsection_systematics_shifts[
            'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory
        histograms_normalised_xsection_systematics_shifts[
            'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded

    return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def get_data_histogram( channel, variable, met_type ):
    fit_result_input = 'data/M3_angle_bl/13TeV/%(variable)s/fit_results/central/fit_results_%(channel)s_%(met_type)s.txt'
    fit_results = read_data_from_JSON( fit_result_input % {'channel': channel, 'variable': variable, 'met_type':met_type} )
    fit_data = fit_results['TTJet']
    h_data = value_error_tuplelist_to_hist( fit_data, bin_edges[variable] )
    return h_data
    def fromJSON(JSON_file):
        src = read_data_from_JSON(JSON_file)
        m = Measurement.fromDict(src)

        return m
def main():

    config = XSectionConfig(13)

    file_for_powhegPythia = File(config.unfolding_central, "read")
    file_for_ptReweight_up = File(config.unfolding_ptreweight_up, "read")
    file_for_ptReweight_down = File(config.unfolding_ptreweight_down, "read")
    file_for_etaReweight_up = File(config.unfolding_etareweight_up, "read")
    file_for_etaReweight_down = File(config.unfolding_etareweight_down, "read")
    file_for_data_template = "data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_combined_patType1CorrectedPFMet.txt"

    for channel in ["combined"]:
        for variable in config.variables:
            print variable
            # for variable in ['HT']:
            # Get the central powheg pythia distributions
            _, _, response_central, fakes_central = get_unfold_histogram_tuple(
                inputfile=file_for_powhegPythia,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=True,
                visiblePS=True,
            )

            measured_central = asrootpy(response_central.ProjectionX("px", 1))
            truth_central = asrootpy(response_central.ProjectionY())

            # Get the reweighted powheg pythia distributions
            _, _, response_pt_reweighted_up, _ = get_unfold_histogram_tuple(
                inputfile=file_for_ptReweight_up,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=False,
                visiblePS=True,
            )

            measured_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionX("px", 1))
            truth_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionY())

            _, _, response_pt_reweighted_down, _ = get_unfold_histogram_tuple(
                inputfile=file_for_ptReweight_down,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=False,
                visiblePS=True,
            )

            measured_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionX("px", 1))
            truth_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionY())

            _, _, response_eta_reweighted_up, _ = get_unfold_histogram_tuple(
                inputfile=file_for_etaReweight_up,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=False,
                visiblePS=True,
            )

            measured_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionX("px", 1))
            truth_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionY())

            _, _, response_eta_reweighted_down, _ = get_unfold_histogram_tuple(
                inputfile=file_for_etaReweight_down,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=False,
                visiblePS=True,
            )

            measured_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionX("px", 1))
            truth_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionY())

            # Get the data input (data after background subtraction, and fake removal)
            file_for_data = file_for_data_template.format(variable=variable)
            data = read_data_from_JSON(file_for_data)["TTJet"]
            data = value_error_tuplelist_to_hist(data, reco_bin_edges_vis[variable])
            data = removeFakes(measured_central, fakes_central, data)

            # Plot all three

            hp = Histogram_properties()
            hp.name = "Reweighting_check_{channel}_{variable}_at_{com}TeV".format(
                channel=channel, variable=variable, com="13"
            )

            v_latex = latex_labels.variables_latex[variable]
            unit = ""
            if variable in ["HT", "ST", "MET", "WPT", "lepton_pt"]:
                unit = " [GeV]"
            hp.x_axis_title = v_latex + unit
            hp.y_axis_title = "Number of events"
            hp.title = "Reweighting check for {variable}".format(variable=v_latex)

            measured_central.Rebin(2)
            measured_pt_reweighted_up.Rebin(2)
            measured_pt_reweighted_down.Rebin(2)
            measured_eta_reweighted_up.Rebin(2)
            measured_eta_reweighted_down.Rebin(2)
            data.Rebin(2)

            measured_central.Scale(1 / measured_central.Integral())
            measured_pt_reweighted_up.Scale(1 / measured_pt_reweighted_up.Integral())
            measured_pt_reweighted_down.Scale(1 / measured_pt_reweighted_down.Integral())
            measured_eta_reweighted_up.Scale(1 / measured_eta_reweighted_up.Integral())
            measured_eta_reweighted_down.Scale(1 / measured_eta_reweighted_down.Integral())

            data.Scale(1 / data.Integral())

            compare_measurements(
                models={
                    "Central": measured_central,
                    "PtReweighted Up": measured_pt_reweighted_up,
                    "PtReweighted Down": measured_pt_reweighted_down,
                    "EtaReweighted Up": measured_eta_reweighted_up,
                    "EtaReweighted Down": measured_eta_reweighted_down,
                },
                measurements={"Data": data},
                show_measurement_errors=True,
                histogram_properties=hp,
                save_folder="plots/unfolding/reweighting_check",
                save_as=["pdf"],
            )
def read_xsection_measurement_results( category, channel ):
    global path_to_JSON, variable, met_type, phase_space, method

    file_template = '{path}/{category}/{name}_{channel}_{method}{suffix}.txt'
    filename = file_template.format(
                path = path_to_JSON,
                category = category,
                name = 'normalised_xsection',
                channel = channel,
                method = method,
                suffix = '',
                )

    xsec_04_log.debug('Reading file {0}'.format(filename))
    normalised_xsection_unfolded = read_data_from_JSON( filename )
    edges = bin_edges_full[variable]
    if phase_space == 'VisiblePS':
        edges = bin_edges_vis[variable]

    h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], edges )
    h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], edges )

    histograms_normalised_xsection_different_generators = {'measured':h_normalised_xsection,
                                                           'unfolded':h_normalised_xsection_unfolded}

    histograms_normalised_xsection_systematics_shifts = {'measured':h_normalised_xsection,
                                                         'unfolded':h_normalised_xsection_unfolded}

    if category == 'central':
        # true distributions
        h_normalised_xsection_powhegPythia8 = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegPythia8'], edges )
        h_normalised_xsection_amcatnlo = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnlo'], edges )
        h_normalised_xsection_madgraphMLM = value_error_tuplelist_to_hist( normalised_xsection_unfolded['madgraphMLM'], edges )
        h_normalised_xsection_powhegHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegHerwig'], edges )
        # h_normalised_xsection_amcatnloHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnloHerwig'], edges )

        # h_normalised_xsection_scaleup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaleup'], edges )
        # h_normalised_xsection_scaledown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaledown'], edges )
        h_normalised_xsection_massup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massup'], edges )
        h_normalised_xsection_massdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massdown'], edges )

        histograms_normalised_xsection_different_generators.update( {
                                                                     'powhegPythia8':h_normalised_xsection_powhegPythia8,
                                                                     'amcatnloPythia8':h_normalised_xsection_amcatnlo,
                                                                     'madgraphMLM':h_normalised_xsection_madgraphMLM,
                                                                     'powhegHerwig':h_normalised_xsection_powhegHerwigpp,
                                                                     # 'amcatnloHerwig':h_normalised_xsection_amcatnloHerwigpp,
                                                                })

        histograms_normalised_xsection_systematics_shifts.update( {'powhegPythia8':h_normalised_xsection_powhegPythia8,
                                                                  # 'scaledown': h_normalised_xsection_scaledown,
                                                                  # 'scaleup': h_normalised_xsection_scaleup,
                                                                  'massdown': h_normalised_xsection_massdown,
                                                                  'massup': h_normalised_xsection_massup
                                                                  })

        filename = file_template.format(
                path = path_to_JSON,
                category = category,
                name = 'normalised_xsection',
                channel = channel,
                method = method,
                suffix = '_with_errors',
                )

        normalised_xsection_unfolded_with_errors = read_data_from_JSON( filename )
        xsec_04_log.debug('Reading file {0}'.format(filename))
#         filename = file_template.format(
#                 path = path_to_JSON,
#                 category = category,
#                 name = 'normalised_xsection',
#                 channel = channel,
#                 method = method,
#                 suffix = '_with_systematics_but_without_generator_errors',
#                 )
        ### normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON( file_template + '_with_systematics_but_without_ttbar_theory_errors.txt' )
#         normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = normalised_xsection_unfolded_with_errors

        # a rootpy.Graph with asymmetric errors!
        ### h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph(
        ###                                                         normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory['TTJet_measured'],
        ###                                                         edges )
        ### h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph(
        ###                                                         normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory['TTJet_unfolded'],
        ###                                                         edges )

        h_normalised_xsection_unfolded_with_errors = value_errors_tuplelist_to_graph(
                                                                normalised_xsection_unfolded_with_errors['TTJet_measured'],
                                                                edges )
        h_normalised_xsection_unfolded_with_errors_unfolded = value_errors_tuplelist_to_graph(
                                                                normalised_xsection_unfolded_with_errors['TTJet_unfolded'],
                                                                edges )


        # histograms_normalised_xsection_different_generators['measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory
        # histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded
        histograms_normalised_xsection_different_generators['measured_with_systematics'] = h_normalised_xsection_unfolded_with_errors
        histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded

        histograms_normalised_xsection_systematics_shifts['measured_with_systematics'] = h_normalised_xsection_unfolded_with_errors
        histograms_normalised_xsection_systematics_shifts['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded

    return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def main():
    '''
    1 - Read Config file for normalisation measurement
    2 - Run measurement
    3 - Combine measurement before unfolding
    '''
    results = {}

    # config file template
    input_template          = 'config/measurements/background_subtraction/{com}TeV/{ch}/{var}/{ps}/'
    output_folder_template  = 'data/normalisation/background_subtraction/{com}TeV/{var}/{ps}/{cat}/'

    ps = 'FullPS'
    if args.visiblePS:
        ps = 'VisiblePS'

    channels = [
        'electron', 
        'muon',
    ]

    for ch in channels:
        for var in measurement_config.variables:
            if not args.variable == var: continue
            qcd_transfer_factor = {}

            # Create measurement_filepath
            measurement_filepath = input_template.format(
                com = args.CoM,
                ch = ch,
                var = var,
                ps = ps,
            )
            
            # Get all config files in measurement_filepath
            measurement_files = get_files_in_path(measurement_filepath, file_ending='.json')

            for f in sorted(measurement_files):
                if args.test:
                    if 'central' not in f: continue
                print('Processing file ' + f)
                sample = get_filename_without_extension(f)

                # Read in Measurement JSON
                config = read_data_from_JSON(f)

                if 'electron' in ch:
                    # Create Measurement Class using JSON
                    electron_measurement = Measurement(config)
                    electron_measurement.calculate_normalisation()
                    electron_measurement.save(ps)
                    qcd_transfer_factor[sample] = [electron_measurement.t_factor]

                elif 'muon' in ch:
                    # Create Measurement Class using JSON
                    muon_measurement = Measurement(config)
                    muon_measurement.calculate_normalisation()
                    muon_measurement.save(ps)
                    qcd_transfer_factor[sample] = [muon_measurement.t_factor]

            output_folder = output_folder_template.format(
                com = args.CoM,  var = var,
                ps  = ps,   cat = 'central',
            )
            store_transfer_factor(qcd_transfer_factor, output_folder, ch)
    return
示例#34
0
'''
Created on 23 Jan 2015

@author: phxlk
'''
from dps.utils.file_utilities import read_data_from_JSON

if __name__ == '__main__':
    JSON_input_file = 'data/absolute_eta_M3_angle_bl/7TeV/HT/fit_results/central/fit_results_muon_patType1CorrectedPFMet.txt'
    normalisation = read_data_from_JSON(JSON_input_file)
    absolute_total_value = 0
    absolute_total_error = 0
    absolute_total_corrected_error = 0
    for sample, fit_result in normalisation.iteritems():
        print 'Calculating total # events for sample "%s"' % sample
        total_events = 0
        total_error = 0
        total_corrected_error = 0
        # loop over binsZ
        for result in fit_result:
            value = result[0]
            error = result[1]
            total_events += value
            total_error += error
        if total_error > total_events:
            total_corrected_error = total_events
        else:
            total_corrected_error = total_error
        print 'Total number of events %d += %d (%d)' %(total_events, total_corrected_error, total_error)
        absolute_total_value += total_events
        absolute_total_error += total_error
        elif category == 'central_TTJet':
                electron_file = path_to_JSON + '/central/initial_normalisation_electron_' + met_type + '.txt'
                muon_file = path_to_JSON + '/central/initial_normalisation_muon_' + met_type + '.txt'            
        # elif category in met_uncertainties and not 'JES' in category and not 'JER' in category:
        #         electron_file = path_to_JSON + '/'+category+'/initial_normalisation_electron_' + met_type + '.txt'
        #         muon_file = path_to_JSON + '/'+category+'/initial_normalisation_muon_' + met_type + '.txt'
        elif category != 'central':
                electron_file = path_to_JSON + '/' + category + '/normalisation_electron_' + met_type + '.txt'
                muon_file = path_to_JSON + '/' + category + '/normalisation_muon_' + met_type + '.txt'    

        fit_results_electron = None
        fit_results_muon = None
        
        if category == 'Muon_up' or category == 'Muon_down':
            # fit_results_electron = read_data_from_JSON( path_to_JSON + '/central/initial_normalisation_electron_' + met_type + '.txt' )
            fit_results_electron = read_data_from_JSON( path_to_JSON + '/central/normalisation_electron_' + met_type + '.txt' )
            fit_results_muon = read_data_from_JSON( muon_file )
        elif category == 'Electron_up' or category == 'Electron_down':
            fit_results_electron = read_data_from_JSON( electron_file )
            # fit_results_muon = read_data_from_JSON( path_to_JSON + '/central/initial_normalisation_muon_' + met_type + '.txt' )
            fit_results_muon = read_data_from_JSON( path_to_JSON + '/central/normalisation_muon_' + met_type + '.txt' )
        else:
            fit_results_electron = read_data_from_JSON( electron_file )
            fit_results_muon = read_data_from_JSON( muon_file )
        fit_results_combined = combine_complex_results(fit_results_electron, fit_results_muon)
        TTJet_fit_results_electron = fit_results_electron['TTJet']
        TTJet_fit_results_muon = fit_results_muon['TTJet']
        TTJet_fit_results_combined = fit_results_combined['TTJet']

    #     # change back to original MET type for the unfolding
        met_type = translate_options[options.metType]
def get_fit_results( variable, channel ):
    global path_to_JSON, category, met_type
    fit_results = read_data_from_JSON( path_to_JSON + variable + '/fit_results/' + category + '/fit_results_' + channel + '_' + met_type + '.txt' )
    return fit_results
示例#37
0
    for category in categories:
        #Setting up systematic MET for JES up/down samples
        met_type = translateOptions[options.metType]
        if category == 'JES_up':
            met_type += 'JetEnUp'
            if met_type == 'PFMETJetEnUp':
                met_type = 'patPFMetJetEnUp'
        elif category == 'JES_down':
            met_type += 'JetEnDown'
            if met_type == 'PFMETJetEnDown':
                met_type = 'patPFMetJetEnDown'

        #read fit results from JSON
        TTJet_fit_results_electron = read_data_from_JSON(
            path_to_JSON + '/' + variable + '/fit_results/' + category +
            '/fit_results_electron_' + met_type + '.txt')['TTJet']
        TTJet_fit_results_muon = read_data_from_JSON(path_to_JSON + '/' +
                                                     variable +
                                                     '/fit_results/' +
                                                     category +
                                                     '/fit_results_muon_' +
                                                     met_type +
                                                     '.txt')['TTJet']

        #change back to original MET type for the unfolding
        met_type = translateOptions[options.metType]
        #ad-hoc switch for PFMET -> patMETsPFlow
        if met_type == 'PFMET':
            met_type = 'patMETsPFlow'