def main(datadir: str, sample: str = "tW_DR"): qf = quick_files(datadir) files_PP8 = qf[f"{sample}_AFII"] if sample == "ttbar": files_PH7 = qf[f"{sample}_PS713"] else: files_PH7 = qf[f"{sample}_PS"] print("PP8 files:") pprint(files_PP8) print("PH7 files:") pprint(files_PH7) df_PP8 = iterative_selection( files_PP8, "(OS == True)", branches=["OS", "reg1j1b", "reg2j1b", "reg2j2b"], ) df_PH7 = iterative_selection( files_PH7, "(OS == True)", branches=["OS", "reg1j1b", "reg2j1b", "reg2j2b"], ) PP8_raw_sum = df_PP8.weight_nominal.sum() PH7_raw_sum = df_PH7.weight_nominal.sum() overall_norm_unc = abs(PH7_raw_sum - PP8_raw_sum) / PP8_raw_sum print("overall_norm_unc:", overall_norm_unc) scale_fac_for_PH7 = PP8_raw_sum / PH7_raw_sum print("scale_fac:", scale_fac_for_PH7) PP8_1j1b = df_PP8.query("reg1j1b==True").weight_nominal.sum() PP8_2j1b = df_PP8.query("reg2j1b==True").weight_nominal.sum() PP8_2j2b = df_PP8.query("reg2j2b==True").weight_nominal.sum() PH7_1j1b = df_PH7.query( "reg1j1b==True").weight_nominal.sum() * scale_fac_for_PH7 PH7_2j1b = df_PH7.query( "reg2j1b==True").weight_nominal.sum() * scale_fac_for_PH7 PH7_2j2b = df_PH7.query( "reg2j2b==True").weight_nominal.sum() * scale_fac_for_PH7 mig_1j1b = abs(PH7_1j1b - PP8_1j1b) / PP8_1j1b mig_2j1b = abs(PH7_2j1b - PP8_2j1b) / PP8_2j1b mig_2j2b = abs(PH7_2j2b - PP8_2j2b) / PP8_2j2b print("mig_1j1b:", mig_1j1b) print("mig_2j1b:", mig_2j1b) print("mig_2j2b:", mig_2j2b)
def test_selection_strings(): files = [ str(test_file_root / "testfile1.root"), str(test_file_root / "testfile3.root"), ] root_sel1 = "OS == 1 && reg2j2b == 1 && mass_lep1jet1 < 155" nume_sel1 = "(OS == 1) & (reg2j2b == 1) & (mass_lep1jet1 < 155)" root_sel2 = "OS == true && reg2j2b == true && mass_lep1jet1 < 155" nume_sel2 = "(OS == True) & (reg2j2b == True) & (mass_lep1jet1 < 155)" df_r_sel1 = iterative_selection(files, root_sel1) df_r_sel2 = iterative_selection(files, root_sel2) df_n_sel1 = iterative_selection(files, nume_sel1) df_n_sel2 = iterative_selection(files, nume_sel2) assert df_r_sel1.equals(df_r_sel2) assert df_r_sel1.equals(df_n_sel1) assert df_r_sel1.equals(df_n_sel2)
def test_drop_jet2(): files = [ str(test_file_root / "testfile1.root"), str(test_file_root / "testfile3.root"), ] df = iterative_selection(files, "(OS == True)") j2s = [col for col in df.columns if "jet2" in col] df.drop_jet2() for j in j2s: assert j not in df.columns
def test_drop_avoid(): files = [ str(test_file_root / "testfile1.root"), str(test_file_root / "testfile2.root"), str(test_file_root / "testfile3.root"), ] df = iterative_selection(files, "(reg1j1b == True)") df.drop_avoid() avoid = set(AVOID_IN_CLF) cols = set(df.columns) assert len(cols & avoid) == 0
def test_exclude_avoids(): files = [ str(test_file_root / "testfile1.root"), str(test_file_root / "testfile2.root"), str(test_file_root / "testfile3.root"), ] df = iterative_selection(files, "(reg1j1b == True)", exclude_avoids=True) cols = set(df.columns) avoid = set(AVOID_IN_CLF) assert len(cols & avoid) == 0 df = iterative_selection( files, "(reg1j1b == True)", exclude_avoids=True, keep_category="kinematics", ) cols = set(df.columns) avoid = set(AVOID_IN_CLF) assert len(cols & avoid) == 0
def test_selection_augmented(): files = [ str(test_file_root / "testfile1.root"), str(test_file_root / "testfile3.root"), ] df = iterative_selection( files, "(OS == True) & (reg1j1b == True) & (mass_lep1jet1 < 155)") sel_vars = set(selection_branches(df.selection_used)) manual = {"OS", "reg1j1b", "mass_lep1jet1"} assert sel_vars == manual assert (df.selection_used == "(OS == True) & (reg1j1b == True) & (mass_lep1jet1 < 155)")