def main(datadir: str, sample: str = "tW_DR"):
    qf = quick_files(datadir)

    files_PP8 = qf[f"{sample}_AFII"]
    if sample == "ttbar":
        files_PH7 = qf[f"{sample}_PS713"]
    else:
        files_PH7 = qf[f"{sample}_PS"]

    print("PP8 files:")
    pprint(files_PP8)
    print("PH7 files:")
    pprint(files_PH7)

    df_PP8 = iterative_selection(
        files_PP8,
        "(OS == True)",
        branches=["OS", "reg1j1b", "reg2j1b", "reg2j2b"],
    )
    df_PH7 = iterative_selection(
        files_PH7,
        "(OS == True)",
        branches=["OS", "reg1j1b", "reg2j1b", "reg2j2b"],
    )
    PP8_raw_sum = df_PP8.weight_nominal.sum()
    PH7_raw_sum = df_PH7.weight_nominal.sum()

    overall_norm_unc = abs(PH7_raw_sum - PP8_raw_sum) / PP8_raw_sum
    print("overall_norm_unc:", overall_norm_unc)

    scale_fac_for_PH7 = PP8_raw_sum / PH7_raw_sum
    print("scale_fac:", scale_fac_for_PH7)

    PP8_1j1b = df_PP8.query("reg1j1b==True").weight_nominal.sum()
    PP8_2j1b = df_PP8.query("reg2j1b==True").weight_nominal.sum()
    PP8_2j2b = df_PP8.query("reg2j2b==True").weight_nominal.sum()

    PH7_1j1b = df_PH7.query(
        "reg1j1b==True").weight_nominal.sum() * scale_fac_for_PH7
    PH7_2j1b = df_PH7.query(
        "reg2j1b==True").weight_nominal.sum() * scale_fac_for_PH7
    PH7_2j2b = df_PH7.query(
        "reg2j2b==True").weight_nominal.sum() * scale_fac_for_PH7

    mig_1j1b = abs(PH7_1j1b - PP8_1j1b) / PP8_1j1b
    mig_2j1b = abs(PH7_2j1b - PP8_2j1b) / PP8_2j1b
    mig_2j2b = abs(PH7_2j2b - PP8_2j2b) / PP8_2j2b

    print("mig_1j1b:", mig_1j1b)
    print("mig_2j1b:", mig_2j1b)
    print("mig_2j2b:", mig_2j2b)
示例#2
0
def test_selection_strings():
    files = [
        str(test_file_root / "testfile1.root"),
        str(test_file_root / "testfile3.root"),
    ]
    root_sel1 = "OS == 1 && reg2j2b == 1 && mass_lep1jet1 < 155"
    nume_sel1 = "(OS == 1) & (reg2j2b == 1) & (mass_lep1jet1 < 155)"
    root_sel2 = "OS == true && reg2j2b == true && mass_lep1jet1 < 155"
    nume_sel2 = "(OS == True) & (reg2j2b == True) & (mass_lep1jet1 < 155)"
    df_r_sel1 = iterative_selection(files, root_sel1)
    df_r_sel2 = iterative_selection(files, root_sel2)
    df_n_sel1 = iterative_selection(files, nume_sel1)
    df_n_sel2 = iterative_selection(files, nume_sel2)
    assert df_r_sel1.equals(df_r_sel2)
    assert df_r_sel1.equals(df_n_sel1)
    assert df_r_sel1.equals(df_n_sel2)
示例#3
0
def test_drop_jet2():
    files = [
        str(test_file_root / "testfile1.root"),
        str(test_file_root / "testfile3.root"),
    ]
    df = iterative_selection(files, "(OS == True)")
    j2s = [col for col in df.columns if "jet2" in col]
    df.drop_jet2()
    for j in j2s:
        assert j not in df.columns
示例#4
0
def test_drop_avoid():
    files = [
        str(test_file_root / "testfile1.root"),
        str(test_file_root / "testfile2.root"),
        str(test_file_root / "testfile3.root"),
    ]
    df = iterative_selection(files, "(reg1j1b == True)")
    df.drop_avoid()
    avoid = set(AVOID_IN_CLF)
    cols = set(df.columns)
    assert len(cols & avoid) == 0
示例#5
0
def test_exclude_avoids():
    files = [
        str(test_file_root / "testfile1.root"),
        str(test_file_root / "testfile2.root"),
        str(test_file_root / "testfile3.root"),
    ]
    df = iterative_selection(files, "(reg1j1b == True)", exclude_avoids=True)
    cols = set(df.columns)
    avoid = set(AVOID_IN_CLF)
    assert len(cols & avoid) == 0

    df = iterative_selection(
        files,
        "(reg1j1b == True)",
        exclude_avoids=True,
        keep_category="kinematics",
    )
    cols = set(df.columns)
    avoid = set(AVOID_IN_CLF)
    assert len(cols & avoid) == 0
示例#6
0
def test_selection_augmented():
    files = [
        str(test_file_root / "testfile1.root"),
        str(test_file_root / "testfile3.root"),
    ]
    df = iterative_selection(
        files, "(OS == True) & (reg1j1b == True) & (mass_lep1jet1 < 155)")
    sel_vars = set(selection_branches(df.selection_used))
    manual = {"OS", "reg1j1b", "mass_lep1jet1"}
    assert sel_vars == manual
    assert (df.selection_used ==
            "(OS == True) & (reg1j1b == True) & (mass_lep1jet1 < 155)")