def main(): parser = argparse.ArgumentParser( description="Starts an LLH client, which then requests 3600 evals.") parser.add_argument("-c", "--conf_file", type=str, help="service configuration file", required=True) parser.add_argument("-d", "--data_file", type=str, help="test data file") args = parser.parse_args() with open(args.conf_file) as f: params = json.load(f) client = LLHClient(ctrl_addr=params["ctrl_addr"], conf_timeout=20000) with open(args.data_file, "rb") as f: event = pickle.load(f)[8] hit_data = event["hits"] evt_data = event["total_charge"] theta = event["params"] llhs = [] start = time.time() for i in range(N_ITERATIONS): llhs.append(client.eval_llh(hit_data, evt_data, theta)) delta = time.time() - start print(f"{N_ITERATIONS} evals took {delta*1000:.3f} ms" f" ({delta/N_ITERATIONS*1e3:.3f} ms per eval)")
def __init__(self, ctrl_addr, conf_timeout, rng=None): """initialize FreeDOM client, connect to LLH service""" self._llh_client = LLHClient(ctrl_addr, conf_timeout) if rng is None: self._rng = np.random.default_rng(None) else: self._rng = rng
def main(): parser = argparse.ArgumentParser(description="runs a FreeDOM reconstruction job") parser.add_argument( "-c", "--conf_file", type=str, help="reco configuration file", required=True ) args = parser.parse_args() with open(args.conf_file) as f: conf = json.load(f) i3cols_dirname = None try: i3cols_dirname = conf["i3cols_dir"] print(f"Loading i3cols data from {i3cols_dirname}") include_doms = "domnet_file" in conf["service_conf"] events = i3cols_dataloader.load_events( i3cols_dirname, recos={"retro": "retro_crs_prefit__median__neutrino"}, include_doms=include_doms, )[0] except KeyError: print("i3cols_dir not specified. Looking for a test events pkl file") with open(conf["test_events_file"], "rb") as f: events = pickle.load(f) allowed_DOMs = np.load( pkg_resources.resource_filename("freedom", "resources/allowed_DOMs.npy") ) service_conf = conf["service_conf"] # add hit_data, evt_data keys based on the networks being used for event in events: event["hit_data"] = event["hits"][:, : service_conf["n_hit_features"]] if event["hit_data"].shape[1] < service_conf["n_hit_features"]: # some networks were trained expecting more features than # are loaded in the most recent data loader functions n_additional_cols = ( service_conf["n_hit_features"] - event["hit_data"].shape[1] ) n_hits = len(event["hit_data"]) event["hit_data"] = np.concatenate( (event["hit_data"], np.zeros((n_hits, n_additional_cols))), axis=1 ) # adapt to structure created for ICU reco event["hit_data"] = [event["hit_data"]] if "domnet_file" in service_conf: event["evt_data"] = [event["doms"][allowed_DOMs]] else: event["evt_data"] = [event["total_charge"]] req_addrs = [] ctrl_addrs = [] gpus = conf["cuda_devices"] n_gpus = len(gpus) for gpu in gpus: req_addrs.append(adjust_addr_string(conf["base_req_addr"], gpu)) ctrl_addrs.append(adjust_addr_string(conf["base_ctrl_addr"], gpu)) print("starting LLH services...") procs = [] for ctrl_addr, req_addr, gpu in zip(ctrl_addrs, req_addrs, gpus): proc = Process( target=start_service, args=(service_conf, ctrl_addr, req_addr, gpu) ) proc.start() procs.append(proc) # wait for the LLH services to start by attempting to connect to them for ctrl_addr in ctrl_addrs: LLHClient(ctrl_addr=ctrl_addr, conf_timeout=60000) print("Services ready") # start the reco jobs pool_size = conf["n_workers"] evts_to_process = conf.get("n_evts", len(events)) print( f"\nReconstructing {evts_to_process} events with {pool_size} workers and {n_gpus} gpus. Starting the jobs...\n" ) evts_per_proc = int(math.ceil(evts_to_process / pool_size)) evt_splits = [ events[i * evts_per_proc : (i + 1) * evts_per_proc] for i in range(pool_size) ] worker_gpu_inds = np.arange(pool_size) % n_gpus init_range = np.array(conf["init_range"]) param_search_limits = np.array(conf["param_search_limits"]).T n_live_points = conf["n_live_points"] conf_timeout = conf["conf_timeout"] sph_opt_kwargs = conf["spherical_opt_conf"] do_postfit = conf["do_postfit"] truth_seed = conf.get("truth_seed", False) # fit events partial that fixes common parameters fit_events_partial = functools.partial( fit_events, ctrl_addrs=ctrl_addrs, init_range=init_range, search_limits=param_search_limits, n_live_points=n_live_points, conf_timeout=conf_timeout, do_postfit=do_postfit, truth_seed=truth_seed, **sph_opt_kwargs, ) start = time.time() with Pool(pool_size) as p: outs = p.starmap(fit_events_partial, zip(evt_splits, worker_gpu_inds),) delta = time.time() - start print(f"reconstructing {evts_to_process} events took: {delta/60:.1f} minutes") # print summary results, save output file all_outs = sum((out for out in outs), []) print("Timing summary:") total_calls = sum(out[0]["n_calls"] for out in all_outs) total_iters = sum(out[0]["nit"] for out in all_outs) print(f"{total_calls} total calls") time_per_call = delta / total_calls print(f"{total_iters} total iters") time_per_iter = delta / total_iters print(f"{total_calls/len(all_outs):.1f} calls per event") print(f"{time_per_call*1e6:.2f} us per call") print(f"{total_iters/len(all_outs):.1f} iters per event") print(f"{time_per_iter*1e6:.2f} us per iter") print("\nSaving summary dataframe\n") # build summary df df = summary_df.build_summary_df(all_outs, conf["par_names"]) # store some metadata df.attrs["reco_conf"] = conf df.attrs["reco_time"] = delta if i3cols_dirname is not None: df.attrs["i3cols_dirname"] = i3cols_dirname # append datetime to the filename to avoid accidentally overwriting previous reco job's output time_str = datetime.datetime.now().strftime("%m_%d_%Y-%H_%M_%S") outf_name = conf.get("outfile_name", "reco_out") outf_name = f"{outf_name}_{time_str}.pkl" df.to_pickle(outf_name) print("Killing the LLH services") for proc, ctrl_addr in zip(procs, ctrl_addrs): with zmq.Context.instance().socket(zmq.REQ) as ctrl_sock: ctrl_sock.connect(ctrl_addr) ctrl_sock.send_string("die") proc.join() print("Done")
class I3FreeDOMClient: """FreeDOM client IceTray module. Connects to a running LLHService""" def __init__(self, ctrl_addr, conf_timeout, rng=None): """initialize FreeDOM client, connect to LLH service""" self._llh_client = LLHClient(ctrl_addr, conf_timeout) if rng is None: self._rng = np.random.default_rng(None) else: self._rng = rng def __call__( self, frame, geo, reco_pulse_series_name, suffix="", init_range=DEFAULT_INIT_RANGE, search_limits=DEFAULT_SEARCH_LIMITS, n_live_points=DEFAULT_N_LIVE_POINTS, do_postfit=True, store_all=False, truth_seed=False, batch_size=DEFAULT_BATCH_SIZE, par_transforms=None, do_track_dllh=False, **crs_fit_kwargs, ): """reconstruct an event stored in an i3frame""" event = i3frame_dataloader.load_event(frame, geo, reco_pulse_series_name) fit_kwargs = dict( event=event, clients=[self._llh_client], rng=self._rng, init_range=init_range, search_limits=search_limits, n_live_points=n_live_points, do_postfit=do_postfit, store_all=store_all, truth_seed=truth_seed, param_transforms=par_transforms, batch_size=batch_size, spherical_indices=DEFAULT_SPHERICAL_INDICES, max_iter=DEFAULT_MAX_ITER, **crs_fit_kwargs, ) full_res = timed_fit(**fit_kwargs) if event["params"] is not None: full_res["truth_LLH"] = self._llh_client.eval_llh( event["hit_data"][0], event["evt_data"][0], event["params"]) prefix = f"FreeDOM_{suffix}_" store_fit_result(frame, prefix, full_res, par_transforms, store_i3_particles=True) if do_track_dllh: # do not conduct postfit for zero_track fits fit_kwargs["do_postfit"] = False no_track_res, E_only_res = zero_track_fit(full_res, **fit_kwargs) store_dllh(frame, prefix, full_res, no_track_res, E_only_res) store_fit_result(frame, prefix + "no_track_", no_track_res, par_transforms) store_fit_result(frame, prefix + "E_only_", E_only_res, par_transforms)
def fit_events( events, index, ctrl_addrs, init_range=DEFAULT_INIT_RANGE, search_limits=DEFAULT_SEARCH_LIMITS, n_live_points=None, random_seed=None, conf_timeout=60000, do_postfit=False, store_all=False, truth_seed=False, seeds=None, param_transforms=None, fixed_params=None, initial_points=None, ICU = False, **sph_opt_kwargs, ): """fit a list of events see batch_opt_ret for param descriptions""" rng = np.random.default_rng(random_seed) outputs = [] if ICU: # use this for ICU reco clients = [] for i in range(3): clients.append(LLHClient(ctrl_addr=ctrl_addrs[i], conf_timeout=conf_timeout)) else: clients = [LLHClient(ctrl_addr=ctrl_addrs[index], conf_timeout=conf_timeout)] if np.all(seeds) == None: seeds = [None] * len(events) for j, event in enumerate(events): fit_res = timed_fit( event, clients, rng, init_range, search_limits, n_live_points=n_live_points, do_postfit=do_postfit, store_all=store_all, truth_seed=truth_seed, seed=seeds[j], param_transforms=param_transforms, fixed_params=fixed_params, initial_points=initial_points, **sph_opt_kwargs, ) delta = fit_res["delta"] try: true_param_llh = 0 for i in range(len(clients)): true_param_llh += clients[i].eval_llh( event["hit_data"][i], event["evt_data"][i], event["params"] ) except KeyError: # true params not available true_param_llh = None if "retro" in event.keys(): retro_param_llh = clients[0].eval_llh( event["hit_data"], event["evt_data"], event["retro"] ) outputs.append((fit_res, true_param_llh, delta, retro_param_llh)) else: outputs.append((fit_res, true_param_llh, delta)) return outputs