Exemplo n.º 1
0
def template_analysis(environment_file, filelist, executor, compression):
    from coffea.processor import Runner
    from coffea.processor.test_items import NanoTestProcessor

    executor = executor(
        compression=compression,
        environment_file=environment_file,
        resources_mode="fixed",
        cores=2,
        memory=500,  # MB
        disk=1000,  # MB
        master_name="coffea_test",
        port=work_queue_port,
        print_stdout=True,
    )

    run = Runner(executor)

    hists = run(filelist, "Events", NanoTestProcessor())

    print(hists)
    assert hists["cutflow"]["ZJets_pt"] == 18
    assert hists["cutflow"]["ZJets_mass"] == 6
    assert hists["cutflow"]["Data_pt"] == 84
    assert hists["cutflow"]["Data_mass"] == 66
Exemplo n.º 2
0
def submit_job(client, parameters):
    mkdir(parameters["out_dir"])
    out_dir = f"{parameters['out_dir']}/"
    mkdir(out_dir)

    executor_args = {"client": client, "retries": 0}
    executor = DaskExecutor(**executor_args)
    processor_args = {
        "apply_to_output": partial(save_dask_pandas_to_parquet, out_dir=out_dir)
    }
    run = Runner(
        executor=executor,
        schema=DelphesSchema,
        chunksize=parameters["chunksize"],
        maxchunks=parameters["maxchunks"],
    )
    try:
        run(
            parameters["fileset"],
            "Delphes",
            processor_instance=DimuonProcessorDelphes(**processor_args),
        )
    except Exception as e:
        tb = traceback.format_exc()
        return "Failed: " + str(e) + " " + tb

    return "Success!"
Exemplo n.º 3
0
def submit_job(arg_set, parameters):
    mkdir(parameters["out_dir"])
    if parameters["pt_variations"] == ["nominal"]:
        out_dir = f"{parameters['out_dir']}/"
    else:
        out_dir = f"{parameters['out_dir']}_jec/"
    mkdir(out_dir)

    executor_args = {"client": parameters["client"], "retries": 0}
    processor_args = {
        "samp_info": parameters["samp_infos"],
        "do_timer": False,
        "do_btag_syst": False,
        "pt_variations": parameters["pt_variations"],
        "apply_to_output": partial(save_dask_pandas_to_parquet,
                                   out_dir=out_dir),
    }

    executor = DaskExecutor(**executor_args)
    run = Runner(
        executor=executor,
        schema=NanoAODSchema,
        chunksize=parameters["chunksize"],
        maxchunks=parameters["maxchunks"],
    )

    try:
        run(
            parameters["samp_infos"].fileset,
            "Events",
            processor_instance=DimuonProcessor(**processor_args),
        )

    except Exception as e:
        tb = traceback.format_exc()
        return "Failed: " + str(e) + " " + tb

    return "Success!"
    parameters = {"lumi": 3000000}
    client = dask.distributed.Client(processes=True,
                                     n_workers=1,
                                     threads_per_worker=1,
                                     memory_limit="2.9GB")
    print("Client created")

    file_name = "ggh_delphes.root"
    file_path = f"{os.getcwd()}/tests/samples/{file_name}"
    datasets = {"ggh_powheg": file_path}

    fileset = get_fileset(client, datasets, parameters)

    executor_args = {"client": client, "use_dataframes": True, "retries": 0}
    executor = DaskExecutor(**executor_args)
    run = Runner(executor=executor, schema=DelphesSchema, chunksize=10000)
    output = run(fileset,
                 "Delphes",
                 processor_instance=DimuonProcessorDelphes())

    df = output.compute()
    print(df)

    elapsed = round(time.time() - tick, 3)
    print(f"Finished everything in {elapsed} s.")

    dimuon_mass = df.loc[df.event == 20002, "dimuon_mass"].values[0]
    jj_mass = df.loc[df.event == 20011, "jj_mass"].values[0]
    assert df.shape == (86, 78)
    assert almost_equal(dimuon_mass, 124.3369651)
    assert almost_equal(jj_mass, 78.593476)
Exemplo n.º 5
0
    manager_host_port="localhost:{}".format(wq_port)
    # with a batch system, e.g., condor.
    # (If coffea not at the installation site, then a conda
    # environment_file should be defined in the work_queue_executor_args.)
    # batch_type="condor", manager_name=wq_manager_name
)

workers.max_workers = 2
workers.min_workers = 1
workers.cores = 2
workers.memory = 1000  # MB.
workers.disk = 2000  # MB

with workers:
    # define the Runner instance
    run_fn = Runner(
        executor=executor,
        chunksize=100000,
        maxchunks=4,  # change this to None for a large run
    )
    # execute the analysis on the given dataset
    hists = run_fn(fileset, "Events", MyProcessor())

elapsed = time.time() - tstart

print(hists)
print(hists["mass"])

# (assert only valid when using maxchunks=4)
assert hists["sumw"]["DoubleMuon"] == 400224
Exemplo n.º 6
0
    samp_info = SamplesInfo(xrootd=False)
    samp_info.paths = dataset
    samp_info.year = "2018"
    samp_info.load("vbf_powheg", use_dask=False)
    samp_info.lumi_weights["vbf_powheg"] = 1.0
    print(samp_info.fileset)

    executor_args = {"client": client, "use_dataframes": True, "retries": 0}
    processor_args = {
        "samp_info": samp_info,
        "do_timer": False,
        "do_btag_syst": False
    }

    executor = DaskExecutor(**executor_args)
    run = Runner(executor=executor, schema=NanoAODSchema, chunksize=10000)
    out_df = run(
        samp_info.fileset,
        "Events",
        processor_instance=DimuonProcessor(**processor_args),
    )

    df = load_dataframe(client, parameters, inputs=out_df)
    out_hist = to_histograms(client, parameters, df=df)
    out_plot = plotter(client, parameters, hist_df=out_hist)

    elapsed = round(time.time() - tick, 3)
    print(f"Finished everything in {elapsed} s.")

    out_df = out_df.compute()
    dimuon_mass = out_df.loc[out_df.event == 2, "dimuon_mass"].values[0]