Exemplo n.º 1
0
def make_data_mc_comparison_plot(
    histograms=[],
    histogram_lables=[],
    histogram_colors=[],
    histogram_properties=Histogram_properties(),
    data_index=0,
    save_folder='plots/',
    save_as=['pdf'],
    normalise=False,
    show_ratio=False,
    show_stat_errors_on_mc=False,
    draw_vertical_line=0,
    systematics_for_ratio=None,
    systematics_for_plot=None,
    histograms_to_compare=None,
):
    '''
    systematics_for_plot takes the same input as systematics_for_ratio. There may be some repition with reagrds to 
    mc_error and mc_relative_errors, but these only deal with a flat error up and down.
    '''
    save_folder = check_save_folder(save_folder)
    # make copies in order not to mess with existing histograms
    histograms_ = deepcopy(histograms)
    stack = HistStack()
    add_mc = stack.Add
    for index, histogram in enumerate(histograms_):
        label = histogram_lables[index]
        color = histogram_colors[index]

        histogram.SetTitle(label)
        if normalise:
            histogram.Sumw2()

        if not index == data_index:
            histogram.fillstyle = 'solid'
            histogram.fillcolor = color
            histogram.legendstyle = 'F'
            add_mc(histogram)

    data = histograms_[data_index]
    data.SetMarkerSize(CMS.data_marker_size)
    if normalise:
        n_events_data = data.Integral()
        n_events_mc = stack.Integral()
        data.Scale(1 / n_events_data)
        stack.Scale(1 / n_events_mc)

    # plot with matplotlib
    plt.figure(figsize=CMS.figsize, dpi=CMS.dpi, facecolor=CMS.facecolor)
    axes = None
    if show_ratio:
        ratio = data.Clone('ratio')
        sumHists = sum(stack.GetHists())
        for bin_i in range(1, sumHists.GetNbinsX()):
            sumHists.SetBinError(bin_i, 0)
        ratio.Divide(sum(stack.GetHists()))
        ratio.SetMarkerSize(3)
        gs = gridspec.GridSpec(2, 1, height_ratios=[5, 1])
        axes = plt.subplot(gs[0])
    else:
        axes = plt.axes()

    if histogram_properties.set_log_y:
        axes.set_yscale('log', nonposy="clip")
        axes.set_ylim(ymin=1e-2)

    if systematics_for_plot != None:
        plusErrors = [x + 1 for x in systematics_for_plot]
        minusErrors = [1 - x for x in systematics_for_plot]

        stack_lower = sum(stack.GetHists())
        stack_upper = stack_lower.Clone('upper')

        for bin_i in range(1, stack_lower.GetNbinsX() + 1):
            central_value = stack_lower.GetBinContent(bin_i)
            error_upper_bound = plusErrors[bin_i - 1] * central_value
            error_lower_bound = minusErrors[bin_i - 1] * central_value
            stack_upper.SetBinContent(bin_i, error_upper_bound)
            stack_lower.SetBinContent(bin_i, error_lower_bound)

        rplt.fill_between(
            stack_lower,
            stack_upper,
            axes,
            hatch='//',
            # facecolor = 'Black',
            facecolor='None',
            edgecolor='Grey',
            alpha=1.,
            linewidth=0.,
            zorder=len(histograms_) + 1)

    mc_error = histogram_properties.mc_error
    mc_relative_errors = histogram_properties.mc_relative_errors
    if mc_relative_errors:
        stack_lower = sum(stack.GetHists())
        stack_upper = stack_lower.Clone('upper')
        for bin_i in range(1, stack_lower.GetNbinsX()):
            central_value = stack_lower.GetBinContent(bin_i)
            relative_error = mc_relative_errors[bin_i - 1]

            error_upper_bound = central_value * (1 + relative_error)
            error_lower_bound = central_value * (1 - relative_error)

            stack_lower.SetBinContent(bin_i, error_upper_bound)
            stack_upper.SetBinContent(bin_i, error_lower_bound)
        rplt.fill_between(stack_upper,
                          stack_lower,
                          axes,
                          facecolor='0.75',
                          alpha=0.5,
                          hatch='/',
                          zorder=len(histograms_) + 1)
    else:
        if mc_error > 0:
            stack_lower = sum(stack.GetHists())
            stack_upper = stack_lower.Clone('upper')
            stack_lower.Scale(1 - mc_error)
            stack_upper.Scale(1 + mc_error)
            rplt.fill_between(stack_upper,
                              stack_lower,
                              axes,
                              facecolor='0.75',
                              alpha=0.5,
                              hatch='/',
                              zorder=len(histograms_) + 1)
        if not mc_error > 0 and show_stat_errors_on_mc:
            stack_lower = sum(stack.GetHists())
            mc_errors = list(stack_lower.yerravg())
            stack_upper = stack_lower.Clone('upper')
            for bin_i in range(1, stack_lower.GetNbinsX()):
                central_value = stack_lower.GetBinContent(bin_i)
                error = mc_errors[bin_i - 1]
                error_upper_bound = central_value + error
                error_lower_bound = central_value - error
                stack_lower.SetBinContent(bin_i, error_lower_bound)
                stack_upper.SetBinContent(bin_i, error_upper_bound)
            rplt.fill_between(stack_upper,
                              stack_lower,
                              axes,
                              facecolor='0.75',
                              alpha=0.5,
                              hatch='/',
                              zorder=len(histograms_) + 1)

    # a comment on zorder: the MC stack should be always at the very back (z = 1),
    # then the MC error (z = len(histograms_) + 1) and finally the data
    # (z = len(histograms_) + 2)
    rplt.hist(stack, stacked=True, axes=axes, zorder=1)
    rplt.errorbar(data,
                  emptybins=histogram_properties.emptybins,
                  axes=axes,
                  xerr=histogram_properties.xerr,
                  elinewidth=2,
                  capsize=10,
                  capthick=2,
                  zorder=len(histograms_) + 2)

    if histograms_to_compare:
        h_compare = {}
        for h, l, c in zip(histograms_to_compare['hists'],
                           histograms_to_compare['labels'],
                           histograms_to_compare['colours']):
            for histogram in histograms_:
                if histogram.GetTitle() not in [
                        histograms_to_compare['to_replace'], 'data'
                ]:
                    h += histogram
            h_compare[l] = [h, c]
            rplt.step(
                h,
                axes=axes,
                label=l,
                color=c,
                linewidth=4,
            )

    # put legend into the correct order (data is always first!)
    handles, labels = axes.get_legend_handles_labels()
    data_label_index = labels.index('data')
    data_handle = handles[data_label_index]
    labels.remove('data')
    handles.remove(data_handle)
    labels.insert(0, 'data')
    handles.insert(0, data_handle)
    if mc_error > 0 or (not mc_error > 0 and show_stat_errors_on_mc):
        p1 = Rectangle((0, 0), 1, 1, fc="0.75", alpha=0.5, hatch='/')
        handles.append(p1)
        labels.append(histogram_properties.mc_errors_label)

    l1 = axes.legend(handles,
                     labels,
                     numpoints=1,
                     frameon=histogram_properties.legend_color,
                     bbox_to_anchor=histogram_properties.legend_location,
                     bbox_transform=plt.gcf().transFigure,
                     prop=CMS.legend_properties,
                     ncol=histogram_properties.legend_columns)
    l1.set_zorder(102)

    set_labels(plt,
               histogram_properties,
               show_x_label=not show_ratio,
               axes=axes)

    x_limits = histogram_properties.x_limits
    y_limits = histogram_properties.y_limits

    if len(x_limits) >= 2:
        axes.set_xlim(xmin=x_limits[0], xmax=x_limits[-1])
    if len(y_limits) >= 2:
        axes.set_ylim(ymin=y_limits[0], ymax=y_limits[-1])
    else:
        y_max = get_best_max_y(
            histograms_, x_limits=x_limits) * histogram_properties.y_max_scale
        print("Chosen limits : ", 0, y_max)
        axes.set_ylim(ymin=0, ymax=y_max)
    if histogram_properties.set_log_y:
        if not len(y_limits) == 2:  # if not user set y-limits, set default
            axes.set_ylim(ymin=1e-1)

    #draw a red vertical line if needed:
    if draw_vertical_line != 0:
        plt.axvline(x=draw_vertical_line, color='red', linewidth=3)

    if show_ratio:
        plt.setp(axes.get_xticklabels(), visible=False)
        ax1 = plt.subplot(gs[1])
        ax1.minorticks_on()
        ax1.grid(True, 'major', linewidth=1)
        ax1.axhline(y=1, linewidth=1)
        set_labels(plt,
                   histogram_properties,
                   show_x_label=True,
                   show_title=False)
        plt.ylabel(r'$\frac{\mathrm{data}}{\mathrm{pred.}}$', CMS.y_axis_title)
        ax1.yaxis.set_label_coords(-0.115, 0.8)
        rplt.errorbar(ratio,
                      emptybins=histogram_properties.emptybins,
                      axes=ax1,
                      xerr=histogram_properties.xerr,
                      elinewidth=1.5,
                      capsize=5,
                      capthick=1.5)
        if histograms_to_compare:
            for l, h in h_compare.iteritems():
                r = data.Clone(l).Divide(h[0])
                rplt.step(
                    r,
                    axes=ax1,
                    label='',
                    colour=h[1],
                    linewidth=2,
                )

        if len(x_limits) >= 2:
            ax1.set_xlim(xmin=x_limits[0], xmax=x_limits[-1])
        if len(histogram_properties.ratio_y_limits) >= 2:
            ax1.set_ylim(ymin=histogram_properties.ratio_y_limits[0],
                         ymax=histogram_properties.ratio_y_limits[-1])

        # dynamic tick placement
        adjust_ratio_ticks(ax1.yaxis,
                           n_ticks=3,
                           y_limits=histogram_properties.ratio_y_limits)

        if histogram_properties.integerXVariable:
            ax1.tick_params(axis='x', which='minor', bottom='off', top='off')

        if systematics_for_ratio != None:
            plusErrors = [x + 1 for x in systematics_for_ratio]
            minusErrors = [1 - x for x in systematics_for_ratio]

            ratioPlusError = ratio.Clone('plus')
            ratioMinusError = ratio.Clone('minus')
            for bin_i in range(1, ratioPlusError.GetNbinsX() + 1):
                ratioPlusError.SetBinContent(bin_i, plusErrors[bin_i - 1])
                ratioMinusError.SetBinContent(bin_i, minusErrors[bin_i - 1])
            rplt.fill_between(
                ratioPlusError,
                ratioMinusError,
                axes,
                hatch='//',
                # facecolor = 'Black',
                facecolor='None',
                edgecolor='Grey',
                alpha=1.,
                linewidth=0.,
                # zorder = len(histograms_) + 1
                zorder=0)

    if CMS.tight_layout:
        plt.tight_layout()

    for save in save_as:
        if save == 'root':
            saveHistogramsToROOTFile(
                data, stack,
                save_folder + histogram_properties.name + '.' + save)
        else:
            plt.savefig(save_folder + histogram_properties.name + '.' + save)

    plt.close()
Exemplo n.º 2
0
        # need to set things like min, max, change to log, etc for hstack and soloHists
        normalizeTo = plots_path.get('normalize', plots_config.get('normalize', None))
        if normalizeTo is not None:
          dataScale = 0.
          if "unity" in normalizeTo:
            for hist in hstack:
              if (hist.integral() != 0):
                hist.scale(1.0/hist.integral())
            for hist in soloHists:
              if (hist.integral() != 0):
                hist.scale(1.0/hist.integral())
          elif "stack" in normalizeTo:
            for hist in soloHists:
              if (hist.integral() != 0):
                print(hist)
                print(hstack.Integral()/hist.integral())
                exit()
                hist.scale(hstack.Integral()/hist.integral())

          else:
            if normalizeTo not in [hist.title for hist in soloHists]: raise ValueError("Could not find %s as a solo hist for normalizing to." % normalizeTo)
            for hist in soloHists:
              if hist.title == normalizeTo: dataScale = hist.integral()
            mcScale = 0.
            for hist in hstack:
              mcScale += hist.integral()
            if mcScale != 0.:
              normalizeFactor = dataScale/mcScale
            else:
              normalizeFactor = 1.
            for hist in hstack: