def run(): print(datetime.datetime.now(), sys.argv[0], " begin") args = get_args() if (not os.path.exists(args.out_path)): os.makedirs(args.out_path) if (args.test): print("calculation begin for test") path_to_input = os.path.join(path_to_trackml, "test") nevents = 125 parts = ["hits"] else: print("calculation begin for train_1") path_to_input = os.path.join(path_to_trackml, "train_1") nevents = 1 parts = ["hits", "truth"] for (i, data) in tqdm(enumerate( load_dataset(path_to_input, parts=parts, nevents=nevents)), total=nevents): if (len(data) == 3): (event_id, hits, truth) = data elif (len(data) == 2): (event_id, hits) = data else: print(data) raise RuntimeError("not match") if (np.mod(i, args.num) == args.idx): print("go calc for event_id=", event_id) else: print("pass calc for event_id=", event_id) continue print("len(hits): ", len(hits)) hits["event_id"] = event_id model = get_model(args) path_to_sub = get_path_to_submission(args.in_path, event_id) if (path_to_sub is None): print("submission set None") else: print("submission read from {0}".format(path_to_sub)) model.set_submission(pd.read_csv(path_to_sub)) model.fit(hits) submission = model.submission path_to_sub = get_path_to_submission(args.out_path, event_id) if (not os.path.exists(os.path.dirname(path_to_sub))): os.makedirs(os.path.dirname(path_to_sub)) submission.to_csv(path_to_sub, index=None) if (len(data) == 3): score = score_event(truth, model.submission) print("score: ", score) print(datetime.datetime.now(), sys.argv[0], " end")
def run(filename): w_a1 = 0.90291 w_xy_rt = 0.010809 w_z1 = 0.357996 w_z2 = 0.229602 c_rt1 = 1.330075 c_rt2 = 1.92522 model = models.UnrollingHelicesRt2( dbscan_features=["sina1", "cosa1", "z1", "z2", "x_rt", "y_rt"], dbscan_weight=[w_a1, w_a1, w_z1, w_z2, w_xy_rt, w_xy_rt]) model.coef_rt1 = c_rt1 model.coef_rt2 = c_rt2 model.niter = 150 path_to_input = os.path.join(path_to_trackml, "test") dataset_submission = [] for event_id, hits in load_dataset(path_to_input, parts=["hits"]): labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) dataset_submission.append(one_submission) submission = pd.concat(dataset_submission) submission.to_csv(filename, index=None)
def run(): print(datetime.datetime.now(), sys.argv[0], " begin") submission = pd.read_csv("01_merge.submission.csv") model = extension.RemoveOutliersByQuadric() path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step1, score: %0.5f" % (score)) submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step2, score: %0.5f" % (score)) submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step3, score: %0.5f" % (score)) submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step4, score: %0.5f" % (score)) submission = model.run(submission, hits) score = score_event(truth, submission) print("step5, score: %0.5f" % (score)) print(datetime.datetime.now(), sys.argv[0], " end")
def run(): print(datetime.datetime.now(), sys.argv[0], " begin") path_to_input = os.path.join(path_to_trackml, "train_1") candidates_dir = "candidates0" istep = 0 candidates_maker = make_candidates.UnrollingHelices( niter=150, output_dir=candidates_dir, eps0=0.0010) merger = merge.LengthMerge(candidates_output_dir=candidates_dir) for event_id, hits in load_dataset(path_to_input, parts=["hits"], skip=0, nevents=1): print("len(hits): ", len(hits)) print("make candidates") candidates_maker.run(event_id, hits) print("merge") submission = merger.run(event_id, hits) csvfilename = "step{0}.submission.csv".format(istep) print("save subimission as {0}".format(csvfilename)) submission.to_csv(csvfilename, index=None)
def run(filename): model = models.UnrollingHelicesRt2( dbscan_features=["sina1", "cosa1", "z1", "z2", "x_rt", "y_rt"], dbscan_weight=[1.0, 1.0, 0.75, 0.2, 0.05, 0.05]) model.niter = 150 path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): def Fun4BO(w_a1, w_z1, w_z2, w_xy_rt, c_rt1, c_rt2): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_z2 model.dbscan_weight[4] = w_xy_rt model.dbscan_weight[5] = w_xy_rt model.coef_rt1 = c_rt1 model.coef_rt2 = c_rt2 labels = model.predict(hits) one_submission = create_one_event_submission( event_id, hits, labels) score = score_event(truth, one_submission) return score print("Bayesian Optimization") opt = BayesianOptimization(Fun4BO, { "w_a1": (0.9, 1.2), "w_z1": (0.3, 0.7), "w_z2": (0.1, 0.4), "w_xy_rt": (0.01, 0.2), "c_rt1": (0.5, 1.5), "c_rt2": (0.1, 5.0) }, verbose=True) opt.maximize( init_points=3, n_iter=100, # acq="ucb", kappa=2.576) # [string] labels = opt.res["max"]["max_params"].keys() # [dict(string, [float])] params = opt.res["all"]["params"] len_params = len(params) data_dic = {} for label in labels: val = [opt.res["max"]["max_params"][label]] for i in range(len_params): val.append(params[i][label]) data_dic[label] = val data_dic["value"] = [opt.res["max"]["max_val"] ] + opt.res["all"]["values"] data_dic["label"] = ["max"] + [str(x) for x in range(len_params)] df = pd.DataFrame(data_dic) df.to_csv(filename, index=None)
def run(): max_istep = 4 full_submission_list = [ pd.read_csv("step{0}.submission.csv".format(istep)) for istep in range(max_istep + 1) ] good_submission_list = [ pd.read_csv("step{0}_good.submission.csv".format(istep)) for istep in range(max_istep) ] path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): for mi in range(max_istep + 1): submission_list = [full_submission_list[mi]] + [ good_submission_list[i] for i in range(mi) ] submission = pd.concat(submission_list) score0 = score_event(truth, submission) print("step = {0}, score = {1}".format(mi, score0)) if (mi == max_istep): for i in range(5): submission = extension.extend(submission, hits) score0 = score_event(truth, submission) print("with extension = {0}, score = {1}".format( i + 1, score0))
def removeBlacklist(path='../input/', file='blacklist_training.zip'): """ In the competition there are a number of particles that had non-physical behaviour. A blacklist was published of all non-physical particles. This function removes the particles that are on the blacklist from the input data set. Parameters ---------------------------------- path : str or pathlib.Path path to the directory containing the zipped data files file : str or pathlib.Path the blacklist filename """ files = get_zips(path) print(files) blackList_ids, blackList_data = get_blacklist(path, file) print(blackList_ids) # Iterate through each of the zip files for f in files: # Iterate through each event in the file dir_name = os.path.splitext(f)[0] with zipfile.ZipFile(path + 'clean_' + f, mode='w', compression=zipfile.ZIP_DEFLATED) as clean_zip: for event_id, hits, cells, particles, truth in load_dataset(path + f): print("Event ID:", event_id) if event_id in blackList_ids: print("This event contains blacklisted data") bl_hits, bl_particles = blackList_data[blackList_ids.index( event_id)] for hit in bl_hits['hit_id']: hits.drop(hits[hits['hit_id'] == hit].index, inplace=True) cells.drop(cells[cells['hit_id'] == hit].index, inplace=True) truth.drop(truth[truth['hit_id'] == hit].index, inplace=True) for particle in bl_particles['particle_id']: particles.drop(particles[particles['particle_id'] == particle].index, inplace=True) truth.drop( truth[truth['particle_id'] == particle].index, inplace=True) clean_zip.writestr( dir_name + '\event00000' + str(event_id) + '-hits.csv', hits.to_csv(index=False)) clean_zip.writestr( dir_name + '\event00000' + str(event_id) + '-cells.csv', cells.to_csv(index=False)) clean_zip.writestr( dir_name + '\event00000' + str(event_id) + '-particles.csv', particles.to_csv(index=False)) clean_zip.writestr( dir_name + '\event00000' + str(event_id) + '-truth.csv', truth.to_csv(index=False))
def run(): model = scan.UnrollingHelices(niter=150) path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits in load_dataset(path_to_input, parts=["hits"], skip=0, nevents=1): model.run(event_id, hits)
def run(): f = open("08.log", "w") f.write("extention of 07 results.\n") path_to_input = os.path.join(path_to_trackml, "test") old_submission = pd.read_csv("07_test_UHBO_submission.csv") sys.stderr.write("load data\n") for event_id, hits in load_dataset(path_to_input, parts=["hits"]): submission = extension.extend(old_submission, hits) submission.to_csv("08_ext07_submission.csv", index=None) f.close()
def run(): model = merge.LengthMerge() path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): submission = model.run(event_id, hits) submission.to_csv("01_merge.submission.csv" ,index=None) score = score_event(truth, submission) print("score: %0.5f" % (score))
def run(filename): model = models.UnrollingHelices(use_outlier=False, dbscan_features = ["sina1", "cosa1", "z1", "x1", "x2", "x_y", "x_rt", "y_rt"], dbscan_weight = [1.0, 1.0, 0.75, 0.5, 0.5, 0.2, 0.2, 0.2]) path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): def Fun4BO(w_a1, w_z1 w_x1, w_x2, w_x_y, w_xy_rt, niter): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_x1 model.dbscan_weight[4] = w_x2 model.dbscan_weight[5] = w_x_y model.dbscan_weight[6] = w_xy_rt model.dbscan_weight[7] = w_xy_rt model.iter_size_helix = int(niter) labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) return score print("Bayesian Optimization") opt = BayesianOptimization(Fun4BO, {"w1": (0.9, 1.2), "w2": (0.3, 0.8), "w3": (0.1, 0.6), "w4": (0.1, 0.6), "w5": (0.1, 0.6), "w6": (0.1, 0.6), "niter": (140, 190)}, #(140, 190) verbose = True) opt.maximize(init_points = 3, n_iter = 20, acq = "ucb", kappa = 2.576) # [string] labels = opt.res["max"]["max_params"].keys() # [dict(string, [float])] params = opt.res["all"]["params"] len_params = len(params) data_dic = {} for label in labels: val = [opt.res["max"]["max_params"][label]] for i in range(len_params): val.append(params[i][label]) data_dic[label] = val data_dic["value"] = [opt.res["max"]["max_val"]] + opt.res["all"]["values"] data_dic["label"] = ["max"] + [str(x) for x in range(len_params)] df = pd.DataFrame(data_dic) df.to_csv(filename, label=None)
def create_submission(): dataset_submissions = [] jobs = [] pool=mp.Pool(processes=4) for event_id, hits, cells in load_dataset(path_to_test, parts=['hits', 'cells']): jobs.append(pool.apply_async(add_submission, args=(event_id,hits))) # Create submission file dataset_submissions = [job.get() for job in jobs] submission = pd.concat(dataset_submissions, axis=0) submission.to_csv('results/submission_predict2.csv.gz', index=False, compression='gzip')
def run(): model = merge.QuadricMerge(candidates_output_dir="../09_UH_len/candidates") path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): submission = model.run(event_id, hits) submission.to_csv("01_merge.submission.csv", index=None) score = score_event(truth, submission) print("score: %0.5f" % (score))
def run(model, test_or_train, path_to_out, nevents=None): if(test_or_train not in ["test", "train_1"]): sys.stderr.write("Error. test_or_train must be \"test\" or \"train_1\"\n") sys.exit() if(test_or_train=="test" and (nevents is not None)): sys.strerr.write("Error") sys.exit() path_to_input = os.path.join(path_to_trackml, test_or_train) os.makedirs(path_to_out, exist_ok=True) print("calculation begin : {0}".format(datetime.datetime.today())) dataset_submission = [] if(test_or_train == "test"): for event_id, hits in load_dataset(path_to_input, parts=["hits"]): sys.stderr.write("processing event_id : {0}".format(event_id)) labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) dataset_submission.append(one_submission) else: dataset_score = [] for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): sys.stderr.write("processing event_id : {0}".format(event_id)) labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) dataset_submission.append(one_submission) score = score_event(truth, one_submission) dataset_score.append(score) print("Score for event %d:%.8f" % (event_id, score)) print("Mean Score : %.8f" % (np.sum(dataset_score)/len(dataset_score))) submission = pd.concat(dataset_submission) submission.to_csv(os.path.join(path_to_out, "submission.csv"), index=None) print("calculation end : {0}".format(datetime.datetime.today()))
def acquire_vlm_module_dataset(skip, nevents, train_path): ''' Create the dataset to predict the module id ''' X_dataset = [] y_dataset = [] for event_id, hits, cells, particles, truth in load_dataset( train_path, skip=skip, nevents=nevents): vlm = hits.module_id X_dataset.append(hits[['x', 'y', 'z']].values) y_dataset.append(vlm) return np.concatenate(X_dataset, axis=0), np.concatenate(y_dataset, axis=0)
def run(): path_to_input = os.path.join(path_to_trackml, "train_1") nevents = 1 old_submission = pd.read_csv("02.csv") sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): submission = extension.extend(old_submission, hits) submission.to_csv("09.csv", index=None) score = score_event(truth, submission) print("") print("score: %0.5f" % (score))
def run(): print("script begin", datetime.datetime.now()) path_to_input = os.path.join(path_to_trackml, "train_1") nevents = 1 sys.stderr.write("load data\n") for event_id, hits, truth, particle in load_dataset(path_to_input, parts=["hits", "truth", "particles"], skip=0, nevents=nevents): z = particle["vz"].values for zz in [10, 20, 30]: print(zz, len(z[np.where(abs(z)<zz)])/len(z)) z0 = z[np.where(abs(z)<100.0)] plt.hist(z, bins=100) plt.savefig("tmp.pdf")
def acquire_vlm_dataset(skip, nevents, train_path): ''' Create the dataset to predict the volume and layer id ''' X_dataset = [] y_dataset = [] for event_id, hits, cells, particles, truth in load_dataset( train_path, skip=skip, nevents=nevents): vlm = (hits.volume_id * 100) + (hits.layer_id) X_dataset.append(hits[['x', 'y', 'z']].values) y_dataset.append(vlm) return np.concatenate(X_dataset, axis=0), np.concatenate(y_dataset, axis=0)
def run(): print(datetime.datetime.now(), sys.argv[0], " begin") submission = pd.read_csv("01_merge.submission.csv") path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): for i in range(5): submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step%d, score: %0.5f" % (i + 1, score)) print(datetime.datetime.now(), sys.argv[0], " end")
def run(): f = open("10.log", "w") f.write("extention many times\n") path_to_input = os.path.join(path_to_trackml, "train_1") nevents = 1 submission = pd.read_csv("09.csv") sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): for i in range(5): submission = extension.extend(submission, hits) submission.to_csv("10_{0}.csv".format(i), index=None) score = score_event(truth, submission) f.write("i={0} score={1}\n".format(i, score))
def run(): print(datetime.datetime.now(), sys.argv[0], " begin") path_to_input = os.path.join(path_to_trackml, "train_1") model = UnrollingHelicesModel() for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): print("len(hits): ", len(hits)) labels = model.fit_predict(hits) score = model.score(hits, truth) print("score: ", score) print(datetime.datetime.now(), sys.argv[0], " end")
def run(filename): model = models.UnrollingHelicesRt2() path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): def Fun4BO(w1, w2, w3, niter): model.dbscan_weight[0] = w1 model.dbscan_weight[1] = w1 model.dbscan_weight[2] = w2 model.dbscan_weight[3] = w3 model.niter = int(niter) labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) return score print("Bayesian Optimization") opt = BayesianOptimization(Fun4BO, {"w1": (0.9, 1.2), "w2": (0.3, 0.7), "w3": (0.1, 0.4), "niter": (140, 190)}, #(140, 190) verbose = True) opt.maximize(init_points = 3, n_iter = 100, # acq = "ucb", kappa = 2.576) # [string] labels = opt.res["max"]["max_params"].keys() # [dict(string, [float])] params = opt.res["all"]["params"] len_params = len(params) data_dic = {} for label in labels: val = [opt.res["max"]["max_params"][label]] for i in range(len_params): val.append(params[i][label]) data_dic[label] = val data_dic["value"] = [opt.res["max"]["max_val"]] + opt.res["all"]["values"] data_dic["label"] = ["max"] + [str(x) for x in range(len_params)] df = pd.DataFrame(data_dic) df.to_csv(filename, label=None)
def run_candidate(): print("script begin", datetime.datetime.now()) EPS = 1e-12 model = models.ZAScale(djs=np.arange(-20, 20 + EPS, 10), dis=np.arange(-0.003, 0.003 + EPS, 0.00025), min_ncand=1) nevents = 1 path_to_input = os.path.join(path_to_trackml, "train_1") path_to_out = "out_{0}".format(sys.argv[0].split(".")[0]) os.makedirs(path_to_out, exist_ok=True) event_id_list = [] hits_list = [] truth_list = [] sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): print("size(hits) : ", len(hits)) event_id_list.append(event_id) hits_list.append(hits) truth_list.append(truth) sys.stderr.write("scan\n") for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list): truth = truth.merge(hits, on=['hit_id'], how='left') dfh = truth.copy() dfh["rt"] = np.sqrt(dfh['x'].values**2 + dfh['y'].values**2) dfh = dfh.loc[dfh.z > 500] dfh = dfh.loc[(dfh.rt > 50) & (dfh.rt < 100)] label = model.predict(dfh) submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'], data=np.column_stack(([ int(event_id), ] * len(dfh), dfh.hit_id.values, label))).astype(int) score = score_event(dfh, submission) max_score = dfh.weight.sum() print("score: %0.5f (%0.5f)" % (score * max_score, score)) print("script end", datetime.datetime.now())
def run_candidate(): print("script begin", datetime.datetime.now()) EPS = 1e-12 model = models.UnrollingHelicesShiftingZ( djs=[-20, -10, 0, 10, 20], dbscan_features=["sina1", "cosa1", "z1", "z2", "x_rt", "y_rt"], dbscan_weight=[0.9, 0.9, 0.35, 0.22, 0.01, 0.01], coef_rt1=1.33, coef_rt2=0.0, niter=150) nevents = 1 path_to_input = os.path.join(path_to_trackml, "train_1") path_to_out = "out_{0}".format(sys.argv[0].split(".")[0]) event_id_list = [] hits_list = [] truth_list = [] sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): print("size(hits) : ", len(hits)) event_id_list.append(event_id) hits_list.append(hits) truth_list.append(truth) sys.stderr.write("scan\n") for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list): truth = truth.merge(hits, on=['hit_id'], how='left') dfh = truth.copy() label = model.predict(dfh) submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'], data=np.column_stack(([ int(event_id), ] * len(dfh), dfh.hit_id.values, label))).astype(int) submission.to_csv("02.csv", index=None) score = score_event(dfh, submission) max_score = dfh.weight.sum() print("score: %0.5f (%0.5f)" % (score * max_score, score)) print("script end", datetime.datetime.now())
def run(): print(datetime.datetime.now(), sys.argv[0], " begin") path_to_input = os.path.join(path_to_trackml, "train_1") candidates_dir = "candidates1" th_len = 3 istep = 4 candidates_maker = make_candidates.UnrollingHelices( niter=150, output_dir=candidates_dir, eps0=0.04) # eps0=0.02 => 0.51353324, 0.53356 merger = merge.LengthMerge(candidates_output_dir=candidates_dir) for event_id, hits in load_dataset(path_to_input, parts=["hits"], skip=0, nevents=1): print("len(hits): ", len(hits)) path_submission0 = "step{0}.submission.csv".format(istep - 1) print("read submission file from {0}".format(path_submission0)) submission0 = pd.read_csv(path_submission0) print("calculate track score") tscore = score_track.score_by_length(submission0, hits) outlier_mask = tscore < th_len hits1 = submission0[outlier_mask].merge(hits, on="hit_id") print("len(hits1): ", len(hits1)) print("# of outlier: ", len(submission0[outlier_mask])) print("# of good: ", len(submission0[~outlier_mask])) df = submission0[outlier_mask] df.to_csv("step{0}_outlier.submission.csv".format(istep - 1), index=None) df = submission0[~outlier_mask] df.to_csv("step{0}_good.submission.csv".format(istep - 1), index=None) print("make candidates") candidates_maker.run(event_id, hits1) print("merge") submission1 = merger.run(event_id, hits1) submission1.to_csv("step{0}.submission.csv".format(istep), index=None) print(datetime.datetime.now(), sys.argv[0], " end")
def run_candidate(): print("script begin", datetime.datetime.now()) EPS = 1e-12 model = models.UnrollingHelicesShiftingZ( dbscan_features = ["sina1", "cosa1", "z1", "z2", "x_y", "x_r", "y_r", "rt_r"], dbscan_weight = [2.7474448671796874, 2.7474448671796874, 1.3649721713529086, 0.7034918842926337, 0.0005549122352940002, 0.023096034747190672,0.04619756315527515,0.2437077420144654], djs = [-20, -10, 0, 10, 20], niter = 150, eps0 = 0.00975) nevents = 1 path_to_input = os.path.join(path_to_trackml, "train_1") path_to_out = "out_{0}".format(sys.argv[0].split(".")[0]) event_id_list = [] hits_list = [] truth_list = [] sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): print("size(hits) : ", len(hits)) event_id_list.append(event_id) hits_list.append(hits) truth_list.append(truth) sys.stderr.write("scan\n") for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list): truth = truth.merge(hits, on=['hit_id'], how='left') dfh = truth.copy() label = model.predict(dfh) submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'], data=np.column_stack(([int(event_id),]*len(dfh), dfh.hit_id.values, label))).astype(int) submission.to_csv("05.csv", index=None) score = score_event(dfh, submission) max_score = dfh.weight.sum() print("score: %0.5f (%0.5f)" % (score*max_score, score)) print("script end", datetime.datetime.now())
def create_massive_dataset(classifier, train_path, training_aperture=2, angle_delta=2, k_min=50, skip=30, nevents=5, keep_only_percentage=50, extend_start_of_track=True, extend_end_of_track=True): ''' Create a dataset to be used for the classification of a hit to a given track. It produce a X and y data for both the start and end of a track classifier - layer detector classifier extend_start_of_track - boolean to declare the will to extend the start of the tracks extend_end_of_track - boolean to declare the will to extend the end of the tracks ''' X_start_massive = [] y_start_massive = [] X_end_massive = [] y_end_massive = [] p_timer = PrintTime() for event_id, hits, cells, particles, truth in load_dataset(train_path, skip=skip, nevents=nevents): print('---------- Event :',event_id, '----------') ''' Predict basic tracks for the event ''' rz_scale=[1.3,1.4,0.94,0.273,0.01] shifting=[('z_shift',0)] lab = unroll_helix_clustering(hits, classifier, truth, shifting=shifting, func='hough', dz0=0.002, num_step=40, eps=0.008, additional_theta=[0], rz_scale=rz_scale, print_info=True, threshold_value_post=15) ''' Create the start & end dataset for the tracks just inferred ''' extended_subm = create_one_event_submission(0, hits, lab) X_train_start, y_train_start, X_train_end, y_train_end = extend_supervised_create_dataset(extended_subm, hits, truth, training_aperture=training_aperture, angle_delta=angle_delta, k_min=k_min, extend_start_of_track=extend_start_of_track, extend_end_of_track=extend_end_of_track) X_start_massive.append(X_train_start) y_start_massive.append(y_train_start) X_end_massive.append(X_train_end) y_end_massive.append(y_train_end) print('\t',p_timer.get_timer()) return np.concatenate(X_start_massive, axis=0), np.concatenate(y_start_massive, axis=0), np.concatenate(X_end_massive, axis=0), np.concatenate(y_end_massive, axis=0)
def main(): tracker = Model() time_spent = 0 n_event = 0 score_sum = 0 for event_id, hits, cells, truth in load_dataset( PATH_TO_DATA, parts=['hits', 'cells', 'truth']): print("Runing event", event_id, "...", flush=True) # Make predictions t_start = get_clock() sub = tracker.predict_one_event(event_id, hits, cells=cells) t_end = get_clock() # Compute accuracy score score = score_event(truth, sub) # accumulate time, score, number of events time_spent += t_end - t_start score_sum += score n_event += 1 time_per_event = time_spent / n_event score = score_sum / n_event # Print information print("event", event_id, "accuracy score :", score) print("event", event_id, 'time spent :', t_end - t_start) print('total time spent:', time_spent) print("running speed : {:.3f} sec per event".format(time_spent / n_event)) print("running score :", mixt_score(score, time_per_event)) print('-----------------------------------', flush=True) if n_event > 100: break if n_event == 0: print("Warning: no event where found in the given directory.") exit() if time_spent <= 0: print("Warning : execution time <= 0. Something went wrong !") time_per_event = time_spent / n_event score = score_sum / n_event print("Accuracy mean :", score) print("Time per event :", time_per_event) print("Overall mixt score :", mixt_score(score, time_per_event))
def run_candidate(): print("script begin", datetime.datetime.now()) EPS = 1e-12 # sigma of z is 5.5 mm model = models.ZAScaleNFilter(djs=np.linspace(-2.25, 2.25 + EPS, 10), dis=np.linspace(-0.003, 0.003 + EPS, 25)) # model = models.ZAScaleNFilter(djs=[-20, 0.0, 20], # dis=[0.0]) nevents = 1 path_to_input = os.path.join(path_to_trackml, "train_1") path_to_out = "out_{0}".format(sys.argv[0].split(".")[0]) event_id_list = [] hits_list = [] truth_list = [] sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): print("size(hits) : ", len(hits)) event_id_list.append(event_id) hits_list.append(hits) truth_list.append(truth) sys.stderr.write("scan\n") for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list): truth = truth.merge(hits, on=['hit_id'], how='left') dfh = truth.copy() label = model.predict(dfh) submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'], data=np.column_stack(([ int(event_id), ] * len(dfh), dfh.hit_id.values, label))).astype(int) score = score_event(dfh, submission) max_score = dfh.weight.sum() print("score: %0.5f (%0.5f)" % (score * max_score, score)) print("script end", datetime.datetime.now())
def test(self, path='../data/test/', number_of_events=np.inf): "Loads all test data (125 events)" n_events = 0 tracks = dict() for event, hits in load_dataset(path, parts=['hits']): hits = hits.set_index('hit_id')[self.input_columns] # normalize the hit coordinates hits.loc[:, 'x':'z'] = hits.loc[:, 'x':'z'] / TrackFinder.MaxDetectorSize tracks[event] = list() # apply tracking algorithm while len(hits.index) > 0: self.find_track(hits, tracks[event]) print( str(len(tracks[event])) + " tracks found in event " + str(event)) n_events = n_events + 1 if n_events >= number_of_events: break return tracks