示例#1
0
def run():
    print(datetime.datetime.now(), sys.argv[0], " begin")

    args = get_args()

    if (not os.path.exists(args.out_path)):
        os.makedirs(args.out_path)

    if (args.test):
        print("calculation begin for test")
        path_to_input = os.path.join(path_to_trackml, "test")
        nevents = 125
        parts = ["hits"]

    else:
        print("calculation begin for train_1")
        path_to_input = os.path.join(path_to_trackml, "train_1")
        nevents = 1
        parts = ["hits", "truth"]

    for (i, data) in tqdm(enumerate(
            load_dataset(path_to_input, parts=parts, nevents=nevents)),
                          total=nevents):

        if (len(data) == 3):
            (event_id, hits, truth) = data
        elif (len(data) == 2):
            (event_id, hits) = data
        else:
            print(data)
            raise RuntimeError("not match")

        if (np.mod(i, args.num) == args.idx):
            print("go calc for event_id=", event_id)
        else:
            print("pass calc for event_id=", event_id)
            continue

        print("len(hits): ", len(hits))
        hits["event_id"] = event_id

        model = get_model(args)
        path_to_sub = get_path_to_submission(args.in_path, event_id)
        if (path_to_sub is None):
            print("submission set None")
        else:
            print("submission read from  {0}".format(path_to_sub))
            model.set_submission(pd.read_csv(path_to_sub))
        model.fit(hits)
        submission = model.submission

        path_to_sub = get_path_to_submission(args.out_path, event_id)
        if (not os.path.exists(os.path.dirname(path_to_sub))):
            os.makedirs(os.path.dirname(path_to_sub))
        submission.to_csv(path_to_sub, index=None)
        if (len(data) == 3):
            score = score_event(truth, model.submission)
            print("score: ", score)

    print(datetime.datetime.now(), sys.argv[0], " end")
示例#2
0
def run(filename):
    w_a1 = 0.90291
    w_xy_rt = 0.010809
    w_z1 = 0.357996
    w_z2 = 0.229602
    c_rt1 = 1.330075
    c_rt2 = 1.92522

    model = models.UnrollingHelicesRt2(
        dbscan_features=["sina1", "cosa1", "z1", "z2", "x_rt", "y_rt"],
        dbscan_weight=[w_a1, w_a1, w_z1, w_z2, w_xy_rt, w_xy_rt])
    model.coef_rt1 = c_rt1
    model.coef_rt2 = c_rt2
    model.niter = 150
    path_to_input = os.path.join(path_to_trackml, "test")
    dataset_submission = []
    for event_id, hits in load_dataset(path_to_input, parts=["hits"]):

        labels = model.predict(hits)

        one_submission = create_one_event_submission(event_id, hits, labels)

        dataset_submission.append(one_submission)

    submission = pd.concat(dataset_submission)
    submission.to_csv(filename, index=None)
示例#3
0
def run():
    print(datetime.datetime.now(), sys.argv[0], " begin")
    submission = pd.read_csv("01_merge.submission.csv")
    model = extension.RemoveOutliersByQuadric()

    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):

        submission = extension.extend(submission, hits)
        score = score_event(truth, submission)
        print("step1, score: %0.5f" % (score))

        submission = extension.extend(submission, hits)
        score = score_event(truth, submission)
        print("step2, score: %0.5f" % (score))

        submission = extension.extend(submission, hits)
        score = score_event(truth, submission)
        print("step3, score: %0.5f" % (score))

        submission = extension.extend(submission, hits)
        score = score_event(truth, submission)
        print("step4, score: %0.5f" % (score))

        submission = model.run(submission, hits)
        score = score_event(truth, submission)
        print("step5, score: %0.5f" % (score))

    print(datetime.datetime.now(), sys.argv[0], " end")
示例#4
0
def run():
    print(datetime.datetime.now(), sys.argv[0], " begin")

    path_to_input = os.path.join(path_to_trackml, "train_1")
    candidates_dir = "candidates0"
    istep = 0
    candidates_maker = make_candidates.UnrollingHelices(
        niter=150, output_dir=candidates_dir, eps0=0.0010)
    merger = merge.LengthMerge(candidates_output_dir=candidates_dir)

    for event_id, hits in load_dataset(path_to_input,
                                       parts=["hits"],
                                       skip=0,
                                       nevents=1):
        print("len(hits): ", len(hits))

        print("make candidates")
        candidates_maker.run(event_id, hits)

        print("merge")
        submission = merger.run(event_id, hits)

        csvfilename = "step{0}.submission.csv".format(istep)
        print("save subimission as {0}".format(csvfilename))
        submission.to_csv(csvfilename, index=None)
示例#5
0
def run(filename):
    model = models.UnrollingHelicesRt2(
        dbscan_features=["sina1", "cosa1", "z1", "z2", "x_rt", "y_rt"],
        dbscan_weight=[1.0, 1.0, 0.75, 0.2, 0.05, 0.05])
    model.niter = 150
    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):

        def Fun4BO(w_a1, w_z1, w_z2, w_xy_rt, c_rt1, c_rt2):
            model.dbscan_weight[0] = w_a1
            model.dbscan_weight[1] = w_a1
            model.dbscan_weight[2] = w_z1
            model.dbscan_weight[3] = w_z2
            model.dbscan_weight[4] = w_xy_rt
            model.dbscan_weight[5] = w_xy_rt
            model.coef_rt1 = c_rt1
            model.coef_rt2 = c_rt2
            labels = model.predict(hits)
            one_submission = create_one_event_submission(
                event_id, hits, labels)
            score = score_event(truth, one_submission)
            return score

        print("Bayesian Optimization")
        opt = BayesianOptimization(Fun4BO, {
            "w_a1": (0.9, 1.2),
            "w_z1": (0.3, 0.7),
            "w_z2": (0.1, 0.4),
            "w_xy_rt": (0.01, 0.2),
            "c_rt1": (0.5, 1.5),
            "c_rt2": (0.1, 5.0)
        },
                                   verbose=True)
        opt.maximize(
            init_points=3,
            n_iter=100,  #
            acq="ucb",
            kappa=2.576)

        # [string]
        labels = opt.res["max"]["max_params"].keys()
        # [dict(string, [float])]
        params = opt.res["all"]["params"]
        len_params = len(params)

        data_dic = {}

        for label in labels:
            val = [opt.res["max"]["max_params"][label]]
            for i in range(len_params):
                val.append(params[i][label])
                data_dic[label] = val
        data_dic["value"] = [opt.res["max"]["max_val"]
                             ] + opt.res["all"]["values"]
        data_dic["label"] = ["max"] + [str(x) for x in range(len_params)]
        df = pd.DataFrame(data_dic)
        df.to_csv(filename, index=None)
示例#6
0
def run():

    max_istep = 4

    full_submission_list = [
        pd.read_csv("step{0}.submission.csv".format(istep))
        for istep in range(max_istep + 1)
    ]
    good_submission_list = [
        pd.read_csv("step{0}_good.submission.csv".format(istep))
        for istep in range(max_istep)
    ]
    path_to_input = os.path.join(path_to_trackml, "train_1")

    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):
        for mi in range(max_istep + 1):
            submission_list = [full_submission_list[mi]] + [
                good_submission_list[i] for i in range(mi)
            ]
            submission = pd.concat(submission_list)
            score0 = score_event(truth, submission)
            print("step = {0}, score = {1}".format(mi, score0))

            if (mi == max_istep):
                for i in range(5):
                    submission = extension.extend(submission, hits)
                    score0 = score_event(truth, submission)
                    print("with extension = {0}, score = {1}".format(
                        i + 1, score0))
示例#7
0
def removeBlacklist(path='../input/', file='blacklist_training.zip'):
    """ In the competition there are a number of particles that had non-physical behaviour.
    A blacklist was published of all non-physical particles.  This function removes the particles that are on the
    blacklist from the input data set.

    Parameters
    ----------------------------------
    path : str or pathlib.Path
        path to the directory containing the zipped data files

    file : str or pathlib.Path
        the blacklist filename
    """

    files = get_zips(path)
    print(files)
    blackList_ids, blackList_data = get_blacklist(path, file)
    print(blackList_ids)
    # Iterate through each of the zip files
    for f in files:
        # Iterate through each event in the file
        dir_name = os.path.splitext(f)[0]
        with zipfile.ZipFile(path + 'clean_' + f,
                             mode='w',
                             compression=zipfile.ZIP_DEFLATED) as clean_zip:

            for event_id, hits, cells, particles, truth in load_dataset(path +
                                                                        f):
                print("Event ID:", event_id)
                if event_id in blackList_ids:
                    print("This event contains blacklisted data")
                    bl_hits, bl_particles = blackList_data[blackList_ids.index(
                        event_id)]
                    for hit in bl_hits['hit_id']:
                        hits.drop(hits[hits['hit_id'] == hit].index,
                                  inplace=True)
                        cells.drop(cells[cells['hit_id'] == hit].index,
                                   inplace=True)
                        truth.drop(truth[truth['hit_id'] == hit].index,
                                   inplace=True)
                    for particle in bl_particles['particle_id']:
                        particles.drop(particles[particles['particle_id'] ==
                                                 particle].index,
                                       inplace=True)
                        truth.drop(
                            truth[truth['particle_id'] == particle].index,
                            inplace=True)
                clean_zip.writestr(
                    dir_name + '\event00000' + str(event_id) + '-hits.csv',
                    hits.to_csv(index=False))
                clean_zip.writestr(
                    dir_name + '\event00000' + str(event_id) + '-cells.csv',
                    cells.to_csv(index=False))
                clean_zip.writestr(
                    dir_name + '\event00000' + str(event_id) +
                    '-particles.csv', particles.to_csv(index=False))
                clean_zip.writestr(
                    dir_name + '\event00000' + str(event_id) + '-truth.csv',
                    truth.to_csv(index=False))
示例#8
0
def run():
    model = scan.UnrollingHelices(niter=150)

    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits in load_dataset(path_to_input,
                                       parts=["hits"],
                                       skip=0,
                                       nevents=1):
        model.run(event_id, hits)
示例#9
0
def run():
    f = open("08.log", "w")
    f.write("extention of 07 results.\n")
    path_to_input = os.path.join(path_to_trackml, "test")
    old_submission = pd.read_csv("07_test_UHBO_submission.csv")
    sys.stderr.write("load data\n")
    for event_id, hits in load_dataset(path_to_input, parts=["hits"]):
        submission = extension.extend(old_submission, hits)
        submission.to_csv("08_ext07_submission.csv", index=None)
    f.close()
示例#10
0
def run():
    model = merge.LengthMerge()

    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"],
                                       skip=0, nevents=1):
        submission = model.run(event_id, hits)
        submission.to_csv("01_merge.submission.csv" ,index=None)
        score = score_event(truth, submission)
        print("score: %0.5f" % (score))
示例#11
0
def run(filename):
    model = models.UnrollingHelices(use_outlier=False,
                                    dbscan_features = ["sina1", "cosa1", "z1", "x1", "x2", "x_y", "x_rt", "y_rt"],
                                    dbscan_weight   = [1.0,     1.0,     0.75, 0.5,  0.5,  0.2,   0.2,    0.2])
    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"],
                                              skip=0, nevents=1):

        def Fun4BO(w_a1, w_z1 w_x1, w_x2, w_x_y, w_xy_rt, niter):
            model.dbscan_weight[0] = w_a1
            model.dbscan_weight[1] = w_a1
            model.dbscan_weight[2] = w_z1
            model.dbscan_weight[3] = w_x1
            model.dbscan_weight[4] = w_x2
            model.dbscan_weight[5] = w_x_y
            model.dbscan_weight[6] = w_xy_rt
            model.dbscan_weight[7] = w_xy_rt
            model.iter_size_helix = int(niter)
            labels = model.predict(hits)
            one_submission = create_one_event_submission(event_id, hits, labels)
            score = score_event(truth, one_submission)
            return score

        print("Bayesian Optimization")
        opt = BayesianOptimization(Fun4BO,
                                   {"w1": (0.9, 1.2),
                                    "w2": (0.3, 0.8),
                                    "w3": (0.1, 0.6),
                                    "w4": (0.1, 0.6),
                                    "w5": (0.1, 0.6),
                                    "w6": (0.1, 0.6),
                                    "niter": (140, 190)},  #(140, 190)
                                   verbose = True)
        opt.maximize(init_points = 3,
                     n_iter = 20,
                     acq = "ucb",
                     kappa = 2.576)

                # [string]
        labels = opt.res["max"]["max_params"].keys()
        # [dict(string, [float])]
        params = opt.res["all"]["params"]
        len_params = len(params)
    
        data_dic = {}

        for label in labels:
            val = [opt.res["max"]["max_params"][label]]
            for i in range(len_params):
                val.append(params[i][label])
                data_dic[label] = val
        data_dic["value"] = [opt.res["max"]["max_val"]] + opt.res["all"]["values"]
        data_dic["label"] = ["max"] + [str(x) for x in range(len_params)]
        df = pd.DataFrame(data_dic)
        df.to_csv(filename, label=None)
示例#12
0
def create_submission():
    dataset_submissions = []
    jobs = []
    pool=mp.Pool(processes=4) 
    for event_id, hits, cells in load_dataset(path_to_test, parts=['hits', 'cells']):
        jobs.append(pool.apply_async(add_submission, args=(event_id,hits)))

    # Create submission file
    dataset_submissions = [job.get() for job in jobs]
    submission = pd.concat(dataset_submissions, axis=0)
    submission.to_csv('results/submission_predict2.csv.gz', index=False, compression='gzip')
示例#13
0
def run():
    model = merge.QuadricMerge(candidates_output_dir="../09_UH_len/candidates")

    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):
        submission = model.run(event_id, hits)
        submission.to_csv("01_merge.submission.csv", index=None)
        score = score_event(truth, submission)
        print("score: %0.5f" % (score))
示例#14
0
def run(model, test_or_train, path_to_out, nevents=None):
    if(test_or_train not in ["test", "train_1"]):
        sys.stderr.write("Error. test_or_train must be \"test\" or \"train_1\"\n")
        sys.exit()
    if(test_or_train=="test" and (nevents is not None)):
        sys.strerr.write("Error")
        sys.exit()

    path_to_input = os.path.join(path_to_trackml, test_or_train)

    os.makedirs(path_to_out, exist_ok=True)
    print("calculation begin : {0}".format(datetime.datetime.today()))

    dataset_submission = []    
    if(test_or_train == "test"):
        for event_id, hits in load_dataset(path_to_input, parts=["hits"]):
            sys.stderr.write("processing event_id : {0}".format(event_id))
            labels = model.predict(hits)

            one_submission = create_one_event_submission(event_id, hits, labels)
            dataset_submission.append(one_submission)
    else:
        dataset_score = []
        for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"],
                                                  skip=0, nevents=nevents):
            sys.stderr.write("processing event_id : {0}".format(event_id))
            labels = model.predict(hits)

            one_submission = create_one_event_submission(event_id, hits, labels)
            dataset_submission.append(one_submission)
            
            score = score_event(truth, one_submission)
            dataset_score.append(score)

            print("Score for event %d:%.8f" % (event_id, score))
        print("Mean Score : %.8f" % (np.sum(dataset_score)/len(dataset_score)))
        
    submission = pd.concat(dataset_submission)
    submission.to_csv(os.path.join(path_to_out, "submission.csv"), index=None)
    print("calculation end : {0}".format(datetime.datetime.today()))
示例#15
0
def acquire_vlm_module_dataset(skip, nevents, train_path):
    '''
    Create the dataset to predict the module id
    '''
    X_dataset = []
    y_dataset = []
    for event_id, hits, cells, particles, truth in load_dataset(
            train_path, skip=skip, nevents=nevents):
        vlm = hits.module_id
        X_dataset.append(hits[['x', 'y', 'z']].values)
        y_dataset.append(vlm)

    return np.concatenate(X_dataset, axis=0), np.concatenate(y_dataset, axis=0)
示例#16
0
def run():
    path_to_input = os.path.join(path_to_trackml, "train_1")
    nevents = 1
    old_submission = pd.read_csv("02.csv")
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        submission = extension.extend(old_submission, hits)
        submission.to_csv("09.csv", index=None)
        score = score_event(truth, submission)
        print("")
        print("score: %0.5f" % (score))
示例#17
0
def run():
    print("script begin", datetime.datetime.now())
    path_to_input = os.path.join(path_to_trackml, "train_1")
    nevents = 1
    sys.stderr.write("load data\n")
    for event_id, hits, truth, particle in load_dataset(path_to_input, parts=["hits", "truth", "particles"],
                                                        skip=0, nevents=nevents):
        z = particle["vz"].values
        for zz in [10, 20, 30]:
            print(zz, len(z[np.where(abs(z)<zz)])/len(z))
        z0 = z[np.where(abs(z)<100.0)]
        plt.hist(z, bins=100)

    plt.savefig("tmp.pdf")
示例#18
0
def acquire_vlm_dataset(skip, nevents, train_path):
    '''
    Create the dataset to predict the volume and layer id
    '''
    X_dataset = []
    y_dataset = []
    for event_id, hits, cells, particles, truth in load_dataset(
            train_path, skip=skip, nevents=nevents):

        vlm = (hits.volume_id * 100) + (hits.layer_id)
        X_dataset.append(hits[['x', 'y', 'z']].values)
        y_dataset.append(vlm)

    return np.concatenate(X_dataset, axis=0), np.concatenate(y_dataset, axis=0)
示例#19
0
def run():
    print(datetime.datetime.now(), sys.argv[0], " begin")
    submission = pd.read_csv("01_merge.submission.csv")

    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):
        for i in range(5):
            submission = extension.extend(submission, hits)
            score = score_event(truth, submission)
            print("step%d, score: %0.5f" % (i + 1, score))

    print(datetime.datetime.now(), sys.argv[0], " end")
示例#20
0
def run():
    f = open("10.log", "w")
    f.write("extention many times\n")
    path_to_input = os.path.join(path_to_trackml, "train_1")
    nevents = 1
    submission = pd.read_csv("09.csv")
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        for i in range(5):
            submission = extension.extend(submission, hits)
            submission.to_csv("10_{0}.csv".format(i), index=None)
            score = score_event(truth, submission)
            f.write("i={0} score={1}\n".format(i, score))
示例#21
0
def run():
    print(datetime.datetime.now(), sys.argv[0], " begin")

    path_to_input = os.path.join(path_to_trackml, "train_1")
    model = UnrollingHelicesModel()

    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):
        print("len(hits): ", len(hits))
        labels = model.fit_predict(hits)
        score = model.score(hits, truth)
        print("score: ", score)

    print(datetime.datetime.now(), sys.argv[0], " end")
示例#22
0
def run(filename):
    model = models.UnrollingHelicesRt2()    
    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"],
                                              skip=0, nevents=1):

        def Fun4BO(w1, w2, w3, niter):
            model.dbscan_weight[0] = w1
            model.dbscan_weight[1] = w1
            model.dbscan_weight[2] = w2
            model.dbscan_weight[3] = w3
            model.niter = int(niter)
            labels = model.predict(hits)
            one_submission = create_one_event_submission(event_id, hits, labels)
            score = score_event(truth, one_submission)
            return score

        print("Bayesian Optimization")
        opt = BayesianOptimization(Fun4BO,
                                   {"w1": (0.9, 1.2),
                                    "w2": (0.3, 0.7),
                                    "w3": (0.1, 0.4),
                                    "niter": (140, 190)},  #(140, 190)
                                   verbose = True)
        opt.maximize(init_points = 3,
                     n_iter = 100, #
                     acq = "ucb",
                     kappa = 2.576)

        # [string]
        labels = opt.res["max"]["max_params"].keys()
        # [dict(string, [float])]
        params = opt.res["all"]["params"]
        len_params = len(params)
    
        data_dic = {}

        for label in labels:
            val = [opt.res["max"]["max_params"][label]]
            for i in range(len_params):
                val.append(params[i][label])
                data_dic[label] = val
        data_dic["value"] = [opt.res["max"]["max_val"]] + opt.res["all"]["values"]
        data_dic["label"] = ["max"] + [str(x) for x in range(len_params)]
        df = pd.DataFrame(data_dic)
        df.to_csv(filename, label=None)
示例#23
0
def run_candidate():
    print("script begin", datetime.datetime.now())
    EPS = 1e-12
    model = models.ZAScale(djs=np.arange(-20, 20 + EPS, 10),
                           dis=np.arange(-0.003, 0.003 + EPS, 0.00025),
                           min_ncand=1)
    nevents = 1
    path_to_input = os.path.join(path_to_trackml, "train_1")
    path_to_out = "out_{0}".format(sys.argv[0].split(".")[0])

    os.makedirs(path_to_out, exist_ok=True)

    event_id_list = []
    hits_list = []
    truth_list = []
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        print("size(hits) : ", len(hits))
        event_id_list.append(event_id)
        hits_list.append(hits)
        truth_list.append(truth)

    sys.stderr.write("scan\n")
    for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list):

        truth = truth.merge(hits, on=['hit_id'], how='left')
        dfh = truth.copy()
        dfh["rt"] = np.sqrt(dfh['x'].values**2 + dfh['y'].values**2)
        dfh = dfh.loc[dfh.z > 500]
        dfh = dfh.loc[(dfh.rt > 50) & (dfh.rt < 100)]

        label = model.predict(dfh)

        submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'],
                                  data=np.column_stack(([
                                      int(event_id),
                                  ] * len(dfh), dfh.hit_id.values,
                                                        label))).astype(int)
        score = score_event(dfh, submission)
        max_score = dfh.weight.sum()
        print("score: %0.5f  (%0.5f)" % (score * max_score, score))

    print("script end", datetime.datetime.now())
示例#24
0
def run_candidate():
    print("script begin", datetime.datetime.now())
    EPS = 1e-12
    model = models.UnrollingHelicesShiftingZ(
        djs=[-20, -10, 0, 10, 20],
        dbscan_features=["sina1", "cosa1", "z1", "z2", "x_rt", "y_rt"],
        dbscan_weight=[0.9, 0.9, 0.35, 0.22, 0.01, 0.01],
        coef_rt1=1.33,
        coef_rt2=0.0,
        niter=150)

    nevents = 1
    path_to_input = os.path.join(path_to_trackml, "train_1")
    path_to_out = "out_{0}".format(sys.argv[0].split(".")[0])

    event_id_list = []
    hits_list = []
    truth_list = []
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        print("size(hits) : ", len(hits))
        event_id_list.append(event_id)
        hits_list.append(hits)
        truth_list.append(truth)

    sys.stderr.write("scan\n")
    for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list):

        truth = truth.merge(hits, on=['hit_id'], how='left')
        dfh = truth.copy()
        label = model.predict(dfh)

        submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'],
                                  data=np.column_stack(([
                                      int(event_id),
                                  ] * len(dfh), dfh.hit_id.values,
                                                        label))).astype(int)
        submission.to_csv("02.csv", index=None)
        score = score_event(dfh, submission)
        max_score = dfh.weight.sum()
        print("score: %0.5f  (%0.5f)" % (score * max_score, score))

    print("script end", datetime.datetime.now())
示例#25
0
def run():
    print(datetime.datetime.now(), sys.argv[0], " begin")

    path_to_input = os.path.join(path_to_trackml, "train_1")
    candidates_dir = "candidates1"
    th_len = 3
    istep = 4
    candidates_maker = make_candidates.UnrollingHelices(
        niter=150, output_dir=candidates_dir, eps0=0.04)
    # eps0=0.02 => 0.51353324, 0.53356
    merger = merge.LengthMerge(candidates_output_dir=candidates_dir)

    for event_id, hits in load_dataset(path_to_input,
                                       parts=["hits"],
                                       skip=0,
                                       nevents=1):

        print("len(hits): ", len(hits))
        path_submission0 = "step{0}.submission.csv".format(istep - 1)
        print("read submission file from {0}".format(path_submission0))
        submission0 = pd.read_csv(path_submission0)

        print("calculate track score")
        tscore = score_track.score_by_length(submission0, hits)
        outlier_mask = tscore < th_len

        hits1 = submission0[outlier_mask].merge(hits, on="hit_id")
        print("len(hits1): ", len(hits1))

        print("# of outlier: ", len(submission0[outlier_mask]))
        print("# of good: ", len(submission0[~outlier_mask]))
        df = submission0[outlier_mask]
        df.to_csv("step{0}_outlier.submission.csv".format(istep - 1),
                  index=None)
        df = submission0[~outlier_mask]
        df.to_csv("step{0}_good.submission.csv".format(istep - 1), index=None)

        print("make candidates")
        candidates_maker.run(event_id, hits1)

        print("merge")
        submission1 = merger.run(event_id, hits1)
        submission1.to_csv("step{0}.submission.csv".format(istep), index=None)

    print(datetime.datetime.now(), sys.argv[0], " end")
示例#26
0
def run_candidate():
    print("script begin", datetime.datetime.now())
    EPS = 1e-12
    model = models.UnrollingHelicesShiftingZ(
        dbscan_features = ["sina1", "cosa1", "z1", "z2", "x_y", "x_r", "y_r", "rt_r"],
        dbscan_weight   = [2.7474448671796874, 2.7474448671796874,
                           1.3649721713529086, 0.7034918842926337,
                           0.0005549122352940002, 0.023096034747190672,0.04619756315527515,0.2437077420144654],
        djs = [-20, -10, 0, 10, 20],
        niter = 150,
        eps0 = 0.00975)

    nevents = 1
    path_to_input = os.path.join(path_to_trackml, "train_1")
    path_to_out   = "out_{0}".format(sys.argv[0].split(".")[0])

    event_id_list = []
    hits_list = []
    truth_list = []
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"],
                                              skip=0, nevents=nevents):
        print("size(hits) : ", len(hits))
        event_id_list.append(event_id)
        hits_list.append(hits)
        truth_list.append(truth)

    sys.stderr.write("scan\n")
    for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list):

        truth = truth.merge(hits,       on=['hit_id'],      how='left')
        dfh = truth.copy()
        label = model.predict(dfh)

        submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'],
                                  data=np.column_stack(([int(event_id),]*len(dfh),
                                                        dfh.hit_id.values,
                                                        label))).astype(int)
        submission.to_csv("05.csv", index=None)
        score = score_event(dfh, submission)
        max_score = dfh.weight.sum()        
        print("score: %0.5f  (%0.5f)" % (score*max_score, score))

    print("script end", datetime.datetime.now())
def create_massive_dataset(classifier, train_path, training_aperture=2, angle_delta=2, k_min=50, skip=30, nevents=5, keep_only_percentage=50, 
    extend_start_of_track=True, extend_end_of_track=True):
    '''
	Create a dataset to be used for the classification of a hit to a given track. It produce a X and y data for both the start and end of a track
	classifier - layer detector classifier

	extend_start_of_track - boolean to declare the will to extend the start of the tracks
    extend_end_of_track - boolean to declare the will to extend the end of the tracks
    '''
    X_start_massive = []
    y_start_massive = []
    X_end_massive = []
    y_end_massive = []
    
    p_timer = PrintTime()

    for event_id, hits, cells, particles, truth in load_dataset(train_path, skip=skip, nevents=nevents):
        print('---------- Event :',event_id, '----------')
        
        '''
		Predict basic tracks for the event
        '''
        rz_scale=[1.3,1.4,0.94,0.273,0.01]
        shifting=[('z_shift',0)]
        lab = unroll_helix_clustering(hits, classifier, truth, shifting=shifting, func='hough', dz0=0.002, num_step=40, eps=0.008, additional_theta=[0], rz_scale=rz_scale, print_info=True, threshold_value_post=15)

        '''
		Create the start & end dataset for the tracks just inferred
        '''

        extended_subm = create_one_event_submission(0, hits, lab)
        X_train_start, y_train_start, X_train_end, y_train_end = extend_supervised_create_dataset(extended_subm, hits, truth, training_aperture=training_aperture, angle_delta=angle_delta, k_min=k_min, 
                                                                                extend_start_of_track=extend_start_of_track, extend_end_of_track=extend_end_of_track)
        X_start_massive.append(X_train_start)
        y_start_massive.append(y_train_start)
        
        X_end_massive.append(X_train_end)
        y_end_massive.append(y_train_end)

    print('\t',p_timer.get_timer())

    return np.concatenate(X_start_massive, axis=0), np.concatenate(y_start_massive, axis=0), np.concatenate(X_end_massive, axis=0), np.concatenate(y_end_massive, axis=0)
示例#28
0
def main():
    tracker = Model()
    time_spent = 0
    n_event = 0
    score_sum = 0
    for event_id, hits, cells, truth in load_dataset(
            PATH_TO_DATA, parts=['hits', 'cells', 'truth']):
        print("Runing event", event_id, "...", flush=True)
        # Make predictions
        t_start = get_clock()
        sub = tracker.predict_one_event(event_id, hits, cells=cells)
        t_end = get_clock()
        # Compute accuracy score
        score = score_event(truth, sub)
        # accumulate time, score, number of events
        time_spent += t_end - t_start
        score_sum += score
        n_event += 1
        time_per_event = time_spent / n_event
        score = score_sum / n_event
        # Print information
        print("event", event_id, "accuracy score :", score)
        print("event", event_id, 'time spent     :', t_end - t_start)
        print('total time spent:', time_spent)
        print("running speed   : {:.3f} sec per event".format(time_spent /
                                                              n_event))
        print("running score   :", mixt_score(score, time_per_event))
        print('-----------------------------------', flush=True)
        if n_event > 100:
            break
    if n_event == 0:
        print("Warning: no event where found in the given directory.")
        exit()
    if time_spent <= 0:
        print("Warning : execution time <= 0. Something went wrong !")

    time_per_event = time_spent / n_event
    score = score_sum / n_event

    print("Accuracy mean      :", score)
    print("Time per event     :", time_per_event)
    print("Overall mixt score :", mixt_score(score, time_per_event))
示例#29
0
def run_candidate():
    print("script begin", datetime.datetime.now())
    EPS = 1e-12
    # sigma of z is 5.5 mm
    model = models.ZAScaleNFilter(djs=np.linspace(-2.25, 2.25 + EPS, 10),
                                  dis=np.linspace(-0.003, 0.003 + EPS, 25))
    # model = models.ZAScaleNFilter(djs=[-20, 0.0, 20],
    #                              dis=[0.0])
    nevents = 1
    path_to_input = os.path.join(path_to_trackml, "train_1")
    path_to_out = "out_{0}".format(sys.argv[0].split(".")[0])

    event_id_list = []
    hits_list = []
    truth_list = []
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        print("size(hits) : ", len(hits))
        event_id_list.append(event_id)
        hits_list.append(hits)
        truth_list.append(truth)

    sys.stderr.write("scan\n")
    for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list):

        truth = truth.merge(hits, on=['hit_id'], how='left')
        dfh = truth.copy()
        label = model.predict(dfh)

        submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'],
                                  data=np.column_stack(([
                                      int(event_id),
                                  ] * len(dfh), dfh.hit_id.values,
                                                        label))).astype(int)
        score = score_event(dfh, submission)
        max_score = dfh.weight.sum()
        print("score: %0.5f  (%0.5f)" % (score * max_score, score))

    print("script end", datetime.datetime.now())
示例#30
0
    def test(self, path='../data/test/', number_of_events=np.inf):
        "Loads all test data (125 events)"
        n_events = 0
        tracks = dict()
        for event, hits in load_dataset(path, parts=['hits']):
            hits = hits.set_index('hit_id')[self.input_columns]
            # normalize the hit coordinates
            hits.loc[:,
                     'x':'z'] = hits.loc[:,
                                         'x':'z'] / TrackFinder.MaxDetectorSize
            tracks[event] = list()
            # apply tracking algorithm
            while len(hits.index) > 0:
                self.find_track(hits, tracks[event])
            print(
                str(len(tracks[event])) + " tracks found in event " +
                str(event))

            n_events = n_events + 1
            if n_events >= number_of_events:
                break

        return tracks