示例#1
0
    def plot_features(self):
        """Plot the features"""
        self.msg_svc.INFO(
            "DL : Plotting features comparing top quarks and anti-quarks. ")
        top = self.df.loc[self.df['target'] == self.metadata['t_target']]
        tbar = self.df.loc[self.df['target'] == self.metadata['tbar_target']]

        filename = self.metadata['file'].split('/')[-1].split('.')[0].rstrip(
            '\n')

        processed_features = []
        for hi, feature in enumerate(self.features2plot):

            eventlevel = False
            if feature.startswith(
                    't_'):  # specific top properties in dataframe
                feature = feature[2:]
                eventlevel = True
            elif feature.startswith(
                    'tbar_'):  # specific tbar properties in dataframe
                feature = feature[5:]
                eventlevel = True
            else:
                eventlevel = False  # single object in dataframe (use 'target' to distinguish)

            if feature in processed_features: continue
            else: processed_features.append(feature)

            if 'btag' in feature:
                x_label = self.text_dicts[feature]['label'].format(
                    self.metadata['btag_wkpt'])
            else:
                x_label = self.text_dicts[feature]['label']

            hist = HepPlotter("histogram", 1)

            hist.ratio_plot = False
            hist.binning = self.text_dicts[feature]['bins']
            hist.stacked = False
            hist.logplot = False
            hist.x_label = x_label
            hist.y_label = "Events"
            hist.format = self.image_format
            hist.saveAs = self.output + "/hist_" + feature + "_" + self.date
            hist.ATLASlabel = 'top left'
            hist.ATLASlabelStatus = 'Simulation Internal'
            hist.numLegendColumns = 1
            hist.extra_text.Add(self.processlabel_args[filename]['label'],
                                coords=[0.03, 0.80])

            hist.initialize()

            multiply = 1.
            if feature.endswith('_m_ttbar') or feature == 'pt':
                multiply = 1e-3

            if eventlevel:
                hist.Add(self.df['t_' + feature],
                         name=feature + '_top',
                         linecolor='r',
                         color='r',
                         draw='step',
                         label='Large-R Jet (top)')
                hist.Add(self.df['tbar_' + feature],
                         name=feature + '_tbar',
                         linecolor='b',
                         color='b',
                         draw='step',
                         label='Large-R Jet (anti-top)')
            else:
                hist.Add(top[feature].multiply(multiply),
                         name=feature + '_top',
                         linecolor='r',
                         color='r',
                         draw='step',
                         label='Large-R Jet (top)')
                hist.Add(tbar[feature].multiply(multiply),
                         name=feature + '_tbar',
                         linecolor='b',
                         color='b',
                         draw='step',
                         label='Large-R Jet (anti-top)')

            p = hist.execute()
            hist.savefig()

        ## Correlation Matrices of Features (top/antitop) ##
        corrmat_df_top = top[self.features].corr()
        corrmat_df_tbar = tbar[self.features].corr()

        names = ["top", "tbar"]
        namelabels = [r"t correlations", r"$\bar{\text{t}}$ correlations"]
        fontProperties = {'family': 'sans-serif'}
        opts = {'cmap': plt.get_cmap("bwr"), 'vmin': -1, 'vmax': +1}

        for c, corrmat in enumerate([corrmat_df_top, corrmat_df_tbar]):

            fig, ax = plt.subplots()

            # hide the upper part of the triangle
            #mask = np.zeros_like(corrmat, dtype=np.bool)    # return array of zeros with same shape as corrmat
            #mask[np.tril_indices_from(mask)] = True
            #corrmat_mask  = np.ma.array(corrmat, mask=mask)

            heatmap1 = ax.pcolor(corrmat, **opts)
            cbar = plt.colorbar(heatmap1, ax=ax)

            cbar.ax.set_yticklabels(
                [i.get_text().strip('$') for i in cbar.ax.get_yticklabels()],
                **fontProperties)

            labels = corrmat.columns.values
            labels = [i.replace('_', '\_') for i in labels]
            # shift location of ticks to center of the bins
            ax.set_xticks(np.arange(len(labels)) + 0.5, minor=False)
            ax.set_yticks(np.arange(len(labels)) + 0.5, minor=False)
            ax.set_xticklabels(labels,
                               fontProperties,
                               fontsize=18,
                               minor=False,
                               ha='right',
                               rotation=70)
            ax.set_yticklabels(labels,
                               fontProperties,
                               fontsize=18,
                               minor=False)

            text_args = {
                'fontsize': 16,
                'ha': 'left',
                'va': 'bottom',
                'transform': ax.transAxes
            }

            ## ATLAS Label + Signal name
            ax.text(0.02, 1.00, r"\textbf{\textit{ATLAS}} Simulation Internal",
                    **text_args)
            ax.text(
                0.03, 0.93,
                "{0}, {1}".format(self.processlabel_args[filename]['label'],
                                  namelabels[c]), **text_args)

            ## Energy Label
            text_args['ha'] = 'right'
            ax.text(0.99, 1.00, r"$\sqrt{\text{s}}$ = 13 TeV", **text_args)

            plt.savefig(self.output + "/correlations_{0}_{1}.{2}".format(
                names[c], self.date, self.image_format),
                        format=self.image_format,
                        dpi=300,
                        bbox_inches='tight')
            plt.close()

        return
示例#2
0
    def plot_score(self):
        """Plot the features"""

        betterColors = hpt.betterColors()['linecolors']
        filename = self.metadata['file'].split('/')[-1].split('.')[0].rstrip(
            '\n')

        # Plot all k-fold cross-validation results
        for i, (train_X, train_Y, test_X, test_Y) in enumerate(
                zip(self.train_data['X'], self.train_data['Y'],
                    self.test_data['X'], self.test_data['Y'])):

            hist = HepPlotter("histogram", 1)

            hist.ratio_plot = True
            hist.y_ratio_label = "Test/Train"
            hist.normed = True
            hist.binning = [0.05 * j for j in range(21)]
            hist.stacked = False
            hist.logplot = False
            hist.x_label = "DNN Score"
            hist.y_label = "Events"
            hist.format = self.image_format
            hist.label_size = 14
            hist.saveAs = self.output + "/hist_DNNscore_kfold{0}_{1}".format(
                i, self.date)
            hist.ATLASlabel = 'top left'
            hist.ATLASlabelStatus = 'Simulation Internal'
            hist.numLegendColumns = 1
            hist.extra_text.Add(self.processlabel_args[filename]['label'],
                                coords=[0.03, 0.80],
                                fontsize=14)

            hist.initialize()

            top_train_scores = self.train_scores[i][train_Y == 1]
            tbar_train_scores = self.train_scores[i][train_Y == 0]

            top_test_scores = self.test_scores[i][test_Y == 1]
            tbar_test_scores = self.test_scores[i][test_Y == 0]

            ## Train
            index = i * 2
            top_color = 'r'  #betterColors[index]
            tbar_color = 'b'  #betterColors[index+1]
            hist.Add(top_train_scores,
                     name='score_top_train_' + str(i),
                     linecolor=top_color,
                     color=top_color,
                     linewidth=2,
                     draw='step',
                     label='Large-R Jet (top) Train ' + str(i),
                     ratio_den=True,
                     ratio_num=False,
                     ratio_partner='score_top_test_' + str(i))
            hist.Add(tbar_train_scores,
                     name='score_tbar_train_' + str(i),
                     linecolor=tbar_color,
                     color=tbar_color,
                     linewidth=2,
                     draw='step',
                     label='Large-R Jet (anti-top) Train ' + str(i),
                     ratio_den=True,
                     ratio_num=False,
                     ratio_partner='score_tbar_test_' + str(i))

            ## Test
            hist.Add(top_test_scores,
                     name='score_top_test_' + str(i),
                     linecolor=top_color,
                     color=top_color,
                     draw='stepfilled',
                     label='Large-R Jet (top) Test ' + str(i),
                     alpha=0.5,
                     linewidth=0,
                     ratio_num=True,
                     ratio_den=False,
                     ratio_partner='score_top_train_' + str(i))
            hist.Add(tbar_test_scores,
                     name='score_tbar_test_' + str(i),
                     linecolor=tbar_color,
                     color=tbar_color,
                     draw='stepfilled',
                     label='Large-R Jet (anti-top) Test ' + str(i),
                     alpha=0.5,
                     linewidth=0,
                     ratio_num=True,
                     ratio_den=False,
                     ratio_partner='score_tbar_train_' + str(i))

            p = hist.execute()
            hist.savefig()

            ## Calculation of the rejection
            ## use percentile (set above) to calculate at specific efficiency
            eff_value = np.percentile(top_test_scores, self.percentile)
            tbar_wrong = tbar_test_scores[tbar_test_scores >= eff_value]
            rejection = len(tbar_wrong) * 1.0 / len(tbar_test_scores)
            self.rejections.append(rejection)

        self.rejection = {
            'mean': np.mean(self.rejections),
            'std': np.std(self.rejections)
        }

        return
示例#3
0
    def drawSyst(self, name=[], symmetrized=None, one_sided=False):
        """
        Draw single systematic with nominal
        
        @param name         name(s) for histogram
        @param symmetrized  Values from symmetrized uncertainties
        @param one_sided    Boolean for one sided systematic or not
        """
        if name[0].endswith("up"):
            systname = name[0][:-2].replace("_", "-")
        elif name[0].endswith("down"):
            systname = name[0][:-4].replace("_", "-")

        systname = systname.split("xleptonicT-mmerged-boostedcomb-")[1]

        h_nominal = self.systData['nominal'][
            'data']  # data (histogram bins values)
        b_nominal = self.systData['nominal'][
            'center']  # dummy values to get binning right

        hist = HepPlotter("histogram", 1)

        hist.ratio_plot = True  # plot a ratio of things [Data/MC]
        hist.ratio_type = "ratio"  # "ratio"
        hist.stacked = False  # stack plots
        hist.rebin = self.rebin
        hist.logplot = False  # plot on log scale
        hist.x_label = self.x_labels[self.variable]['label']
        hist.y_label = "Events"
        hist.extra_text = systname + '\n ' + self.sampleName
        hist.binning = self.systData['nominal']['bins']
        hist.numLegendColumns = 1
        hist.y_ratio_label = "Syst/Nom"
        hist.lumi = '14.7'  # in /fb
        hist.format = 'png'  # file format for saving image
        hist.saveAs = self.outpath + "h_syst_" + self.sampleName + "_" + systname  # save figure with unique name
        hist.CMSlabel = 'top left'  # 'top left', 'top right'; hack code for something else
        hist.CMSlabelStatus = 'Simulation Internal'  # ('Simulation')+'Internal' || 'Preliminary'

        hist.initialize()

        ## Regular uncertainties
        up = self.systData[name[0]]['center']
        upData = self.systData[name[0]]['data']
        hist.Add(up,
                 weights=upData,
                 name=systname + " UP",
                 label="UP",
                 linecolor='r',
                 color='r',
                 linestyle='dotted',
                 draw='step',
                 ratio_num=True,
                 ratio_den=False,
                 ratio_partner="nominal")
        if not one_sided:
            down = self.systData[name[1]]['center']
            downData = self.systData[name[1]]['data']
            hist.Add(down,
                     weights=downData,
                     name=systname + " DOWN",
                     label="DOWN",
                     linecolor='b',
                     color='b',
                     linestyle='dotted',
                     draw='step',
                     ratio_num=True,
                     ratio_den=False,
                     ratio_partner="nominal")

        ## Symmetrized uncertainties
        if symmetrized is not None:
            # - same binning as 'up' systematic
            hist.Add(up,
                     weights=h_nominal + symmetrized,
                     name=systname + " UP Symm.",
                     label="UP Symm.",
                     linecolor='r',
                     linestyle='solid',
                     draw='step',
                     color='r',
                     ratio_num=True,
                     ratio_den=False,
                     ratio_partner="nominal")
            hist.Add(up,
                     weights=h_nominal - symmetrized,
                     name=systname + " DOWN Symm.",
                     label="DOWN Symm.",
                     linecolor='b',
                     linestyle='solid',
                     draw='step',
                     color='b',
                     ratio_num=True,
                     ratio_den=False,
                     ratio_partner="nominal")

        ## nominal
        uncertainty_hists = [
            systname + " UP", systname + " DOWN", systname + " UP Symm.",
            systname + " DOWN Symm."
        ]
        hist.Add(b_nominal,
                 weights=h_nominal,
                 name="nominal",
                 label="nominal",
                 linecolor='k',
                 draw='step',
                 linestyle='solid',
                 ratio_num=False,
                 ratio_den=True,
                 ratio_partner=uncertainty_hists)

        p = hist.execute()  # can do something with p, if needed
        hist.savefig()  # save and close the figure

        return