Пример #1
0
def run():

    max_istep = 4

    full_submission_list = [
        pd.read_csv("step{0}.submission.csv".format(istep))
        for istep in range(max_istep + 1)
    ]
    good_submission_list = [
        pd.read_csv("step{0}_good.submission.csv".format(istep))
        for istep in range(max_istep)
    ]
    path_to_input = os.path.join(path_to_trackml, "train_1")

    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):
        for mi in range(max_istep + 1):
            submission_list = [full_submission_list[mi]] + [
                good_submission_list[i] for i in range(mi)
            ]
            submission = pd.concat(submission_list)
            score0 = score_event(truth, submission)
            print("step = {0}, score = {1}".format(mi, score0))

            if (mi == max_istep):
                for i in range(5):
                    submission = extension.extend(submission, hits)
                    score0 = score_event(truth, submission)
                    print("with extension = {0}, score = {1}".format(
                        i + 1, score0))
Пример #2
0
def run():
    print(datetime.datetime.now(), sys.argv[0], " begin")
    submission = pd.read_csv("01_merge.submission.csv")
    model = extension.RemoveOutliersByQuadric()

    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):

        submission = extension.extend(submission, hits)
        score = score_event(truth, submission)
        print("step1, score: %0.5f" % (score))

        submission = extension.extend(submission, hits)
        score = score_event(truth, submission)
        print("step2, score: %0.5f" % (score))

        submission = extension.extend(submission, hits)
        score = score_event(truth, submission)
        print("step3, score: %0.5f" % (score))

        submission = extension.extend(submission, hits)
        score = score_event(truth, submission)
        print("step4, score: %0.5f" % (score))

        submission = model.run(submission, hits)
        score = score_event(truth, submission)
        print("step5, score: %0.5f" % (score))

    print(datetime.datetime.now(), sys.argv[0], " end")
Пример #3
0
def run():
    print(datetime.datetime.now(), sys.argv[0], " begin")

    args = get_args()

    if (not os.path.exists(args.out_path)):
        os.makedirs(args.out_path)

    if (args.test):
        print("calculation begin for test")
        path_to_input = os.path.join(path_to_trackml, "test")
        nevents = 125
        parts = ["hits"]

    else:
        print("calculation begin for train_1")
        path_to_input = os.path.join(path_to_trackml, "train_1")
        nevents = 1
        parts = ["hits", "truth"]

    for (i, data) in tqdm(enumerate(
            load_dataset(path_to_input, parts=parts, nevents=nevents)),
                          total=nevents):

        if (len(data) == 3):
            (event_id, hits, truth) = data
        elif (len(data) == 2):
            (event_id, hits) = data
        else:
            print(data)
            raise RuntimeError("not match")

        if (np.mod(i, args.num) == args.idx):
            print("go calc for event_id=", event_id)
        else:
            print("pass calc for event_id=", event_id)
            continue

        print("len(hits): ", len(hits))
        hits["event_id"] = event_id

        model = get_model(args)
        path_to_sub = get_path_to_submission(args.in_path, event_id)
        if (path_to_sub is None):
            print("submission set None")
        else:
            print("submission read from  {0}".format(path_to_sub))
            model.set_submission(pd.read_csv(path_to_sub))
        model.fit(hits)
        submission = model.submission

        path_to_sub = get_path_to_submission(args.out_path, event_id)
        if (not os.path.exists(os.path.dirname(path_to_sub))):
            os.makedirs(os.path.dirname(path_to_sub))
        submission.to_csv(path_to_sub, index=None)
        if (len(data) == 3):
            score = score_event(truth, model.submission)
            print("score: ", score)

    print(datetime.datetime.now(), sys.argv[0], " end")
Пример #4
0
def score_tracks(all_tracks, hits, truth):
    # this part takes most of time
    # need improvement
    total_tracks = len(all_tracks)
    # logging.info("total tracks: {}".format(total_tracks))

    results = []
    for itrk, track in enumerate(all_tracks):
        results += [(x, itrk) for x in track]

    new_df = pd.DataFrame(results, columns=['hit_id', 'track_id'])
    new_df = new_df.drop_duplicates(subset='hit_id')

    df_sub = hits[['hit_id']]
    df_sub = df_sub.merge(new_df, on='hit_id', how='outer').fillna(total_tracks+1)
    matched = truth.merge(new_df, on='hit_id', how='inner')
    tot_truth_weight = np.sum(matched['weight'])

    ## remove the hits that belong to the same particle
    # but of that the total number is less than 50% of the hits of the particle
    particle_ids = np.unique(matched['particle_id'])
    for p_id in particle_ids:
        pID_match = matched[matched['particle_id'] == p_id]
        if pID_match.shape[0] <= truth[truth['particle_id'] == p_id].shape[0]*0.5:
            tot_truth_weight -= np.sum(pID_match['weight'])

    return [score_event(truth, df_sub), tot_truth_weight]
Пример #5
0
def multiple_tracks_merge_by_layer(lab_list,
                                   hits,
                                   classifier,
                                   truth=None,
                                   return_result=True):
    '''
    Merge N tracks by taking the one with a higher number of hits belonging to different volume-layers

    lab_list - list of predicted id track for a "hits"
    hits - "hits" file
    classifier - volume-layers classifier
    truth - "truth" file to calculate the score
    return_results - boolean to return the result or the merged tracks

    '''
    s_combo = lab_list[0]
    vlm_predicted = classifier.predict(hits[['x', 'y', 'z']])

    for lb in lab_list[1:]:
        ## Calculate the number of hits with different volume-layer in a track
        N1 = number_hits_different_module(s_combo, vlm_predicted)
        N2 = number_hits_different_module(lb, vlm_predicted)

        ## Merge lb and s_combo
        s_combo = choose_longest_track_by_layer(N2,
                                                N1,
                                                lb,
                                                s_combo,
                                                threshold_value=17)

    if return_result:
        return score_event(truth,
                           create_one_event_submission(0, hits, s_combo))
    else:
        return s_combo
Пример #6
0
def find_clusters(min_points, max_radius, shift, phi_wraparound=False, plot_intermediate=False):
    global clustering, remaining_hits, n_clusters_found
    remaining_hits['phiCR'] = remaining_hits['phi'] - shift*remaining_hits['R']
    X = remaining_hits[['eta', 'phiCR']]

    eps = max_radius
    min_samp = min_points
    db = DBSCAN(eps=eps, min_samples=min_samp, metric='euclidean').fit(X)
    labels = db.labels_
    labels = [i+n_clusters_found if i!=-1 else -1 for i in labels]
    if max(labels)>-1:
        n_clusters_found = max(labels)+1
    remaining_hits['track_id'] = labels
    clustering.update(remaining_hits['track_id'])
    remaining_hits = remaining_hits[remaining_hits.track_id==-1]

    # plot currently found clusters
    if (plot_intermediate):
        hits['phiCR'] = hits['phi'] - shift*hits['R']
        fig = plt.figure(figsize=(20,7))
        ax = fig.add_subplot(111)
        clusters = np.unique(clustering['track_id'])
        for cluster in clusters:
            cluster_hit_ids = clustering[clustering['track_id'] == cluster]['hit_id'] # all hits in cluster
            t = hits[hits['hit_id'].isin(cluster_hit_ids)][['eta', 'phiCR']]
            if cluster != -1:
                ax.plot(t.phiCR, t.eta, '.-', ms=10)
        plt.show()

    # print score
    score = score_event(truth, clustering)
    print('track-ml custom metric score:', round(score, 4), '- %d hits remaining to match' % len(remaining_hits), '- %d clusters found' % n_clusters_found)
Пример #7
0
def display_score(event_id, hits, labels, truth, message):
    if truth is not None:
        one_submission = create_one_event_submission(event_id, hits, labels)
        score = score_event(truth, one_submission)
        print(message + "%d: %.8f" % (event_id, score))
    else:
        print(message + '%d: no score available' % (event_id))
Пример #8
0
 def get_score(self, hits, truth):
     tracks_pred = []
     while len(hits.index) > 0:
         self.find_track(hits, tracks_pred)
     submission = self.make_submission(tracks_pred)
     score = score_event(truth, submission)
     return score
Пример #9
0
def foo(i):
    model = Clusterer()
    model.initialize(hits)

    hits_with_dz = preprocess_hits(hits, 0.055 * i)

    result = model.Hough_clustering(hits_with_dz,
                                    coef=c,
                                    epsilon=0.0048,
                                    min_samples=min_samples_in_cluster,
                                    n_loop=300,
                                    verbose=True)

    second = []
    for k in range(10):

        np.random.shuffle(result)

        #result = res0
        labels = range(result.shape[1])

        for k in [0]:
            for i in range(len(result[:])):
                labels = merge(labels, result[i], k)

            submission = create_one_event_submission(0, hits['hit_id'].values,
                                                     labels)
            print(score_event(truth, submission))

        second += [labels]

    result = np.array(second)
    labels = range(result.shape[1])

    for k in [0]:
        for i in range(len(result[:])):
            labels = merge(labels, result[i], k)

        submission = create_one_event_submission(0, hits['hit_id'].values,
                                                 labels)
        print(score_event(truth, submission))

    np.save('predicts/53/{}'.format(i), labels)
    return None
Пример #10
0
 def Fun4BO(w1, w2, w3, niter):
     model.dbscan_weight[0] = w1
     model.dbscan_weight[1] = w1
     model.dbscan_weight[2] = w2
     model.dbscan_weight[3] = w3
     model.niter = int(niter)
     labels = model.predict(hits)
     one_submission = create_one_event_submission(event_id, hits, labels)
     score = score_event(truth, one_submission)
     return score
Пример #11
0
def GA_eval(weights):
    eps = weights[0]
    z_scale = weights[1]
    model = Clusterer(eps=eps)
    labels = model.predict(hits, rz_scale=z_scale)

    submission = create_one_event_submission(0, hits, labels)
    score = score_event(truth, submission)
    print('score: %f' % score)
    return 1-score
Пример #12
0
def run():
    model = merge.LengthMerge()

    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"],
                                       skip=0, nevents=1):
        submission = model.run(event_id, hits)
        submission.to_csv("01_merge.submission.csv" ,index=None)
        score = score_event(truth, submission)
        print("score: %0.5f" % (score))
Пример #13
0
def run(filename):
    model = models.UnrollingHelices(use_outlier=False,
                                    dbscan_features = ["sina1", "cosa1", "z1", "x1", "x2", "x_y", "x_rt", "y_rt"],
                                    dbscan_weight   = [1.0,     1.0,     0.75, 0.5,  0.5,  0.2,   0.2,    0.2])
    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"],
                                              skip=0, nevents=1):

        def Fun4BO(w_a1, w_z1 w_x1, w_x2, w_x_y, w_xy_rt, niter):
            model.dbscan_weight[0] = w_a1
            model.dbscan_weight[1] = w_a1
            model.dbscan_weight[2] = w_z1
            model.dbscan_weight[3] = w_x1
            model.dbscan_weight[4] = w_x2
            model.dbscan_weight[5] = w_x_y
            model.dbscan_weight[6] = w_xy_rt
            model.dbscan_weight[7] = w_xy_rt
            model.iter_size_helix = int(niter)
            labels = model.predict(hits)
            one_submission = create_one_event_submission(event_id, hits, labels)
            score = score_event(truth, one_submission)
            return score

        print("Bayesian Optimization")
        opt = BayesianOptimization(Fun4BO,
                                   {"w1": (0.9, 1.2),
                                    "w2": (0.3, 0.8),
                                    "w3": (0.1, 0.6),
                                    "w4": (0.1, 0.6),
                                    "w5": (0.1, 0.6),
                                    "w6": (0.1, 0.6),
                                    "niter": (140, 190)},  #(140, 190)
                                   verbose = True)
        opt.maximize(init_points = 3,
                     n_iter = 20,
                     acq = "ucb",
                     kappa = 2.576)

                # [string]
        labels = opt.res["max"]["max_params"].keys()
        # [dict(string, [float])]
        params = opt.res["all"]["params"]
        len_params = len(params)
    
        data_dic = {}

        for label in labels:
            val = [opt.res["max"]["max_params"][label]]
            for i in range(len_params):
                val.append(params[i][label])
                data_dic[label] = val
        data_dic["value"] = [opt.res["max"]["max_val"]] + opt.res["all"]["values"]
        data_dic["label"] = ["max"] + [str(x) for x in range(len_params)]
        df = pd.DataFrame(data_dic)
        df.to_csv(filename, label=None)
Пример #14
0
 def compute_trackml_score(self,
                           final_tracks: List[TXplet],
                           submission=None) -> float:
     """
     :param final_tracks: a list of xplets representing tracks
     :param submission: (optional) a TrackML submission, see :py:meth:~`create_submission`
     :return: the trackml score (between 0 and 1)
     """
     if submission is None:
         submission = self.create_submission(final_tracks)
     return score_event(self.truth, submission)
Пример #15
0
def score_graph_use_kcomponents(hits, G):
    can_trkx = nx.k_components(G)[1]
    n_candidates = len(can_trkx)
    results = []
    for itrk, tracks in enumerate(can_trkx):
        results += [(G.nodes[track]['hit_id'], itrk) for track in tracks]

    trk_df = pd.DataFrame(results, columns=['hit_id', 'track_id'])
    score = score_event(hits, trk_df)
    print("{} track candidates with score: {:.4f}".format(n_candidates, score))
    return trk_df, score
Пример #16
0
def run():
    model = merge.QuadricMerge(candidates_output_dir="../09_UH_len/candidates")

    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):
        submission = model.run(event_id, hits)
        submission.to_csv("01_merge.submission.csv", index=None)
        score = score_event(truth, submission)
        print("score: %0.5f" % (score))
Пример #17
0
 def Fun4BO(w_a1, w_z1, w_z2, w_xy_rt, niter):
     model.dbscan_weight[0] = w_a1
     model.dbscan_weight[1] = w_a1
     model.dbscan_weight[2] = w_z1
     model.dbscan_weight[3] = w_z2
     model.dbscan_weight[4] = w_xy_rt
     model.dbscan_weight[5] = w_xy_rt
     model.niter = int(niter)
     labels = model.predict(hits)
     one_submission = create_one_event_submission(event_id, hits, labels)
     score = score_event(truth, one_submission)
     return score
Пример #18
0
 def Fun4BO(w_a1, w_z1, w_z2, w_xy_rt):
     model.dbscan_weight[0] = w_a1
     model.dbscan_weight[1] = w_a1
     model.dbscan_weight[2] = w_z1
     model.dbscan_weight[3] = w_z2
     model.dbscan_weight[4] = w_xy_rt
     model.dbscan_weight[5] = w_xy_rt
     score_list = []
     for (hits, truth) in zip(hits_list, truth_list):
         labels = model.predict(hits)
         one_submission = create_one_event_submission(event_id, hits, labels)
         score = score_event(truth, one_submission)
         score_list.append(score)
     return np.sum(score_list)/len(score_list)
Пример #19
0
 def Fun4BO(w_a1, w_z1, w_z2, w_xy, w_xy_rt, c_r1, c_r2):
     model.dbscan_weight[0] = w_a1
     model.dbscan_weight[1] = w_a1
     model.dbscan_weight[2] = w_z1
     model.dbscan_weight[3] = w_z2
     model.dbscan_weight[4] = w_xy
     model.dbscan_weight[5] = w_xy_rt
     model.dbscan_weight[6] = w_xy_rt
     model.coef_rt1  = c_r1
     model.coef_rt2  = c_r2
     labels = model.predict(hits)
     one_submission = create_one_event_submission(event_id, hits, labels)
     score = score_event(truth, one_submission)
     return score
Пример #20
0
def run():
    path_to_input = os.path.join(path_to_trackml, "train_1")
    nevents = 1
    old_submission = pd.read_csv("02.csv")
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        submission = extension.extend(old_submission, hits)
        submission.to_csv("09.csv", index=None)
        score = score_event(truth, submission)
        print("")
        print("score: %0.5f" % (score))
Пример #21
0
def calc_steps(niter, eps0s, th_lens, num_exts, output_dir):

    if(type(niter)!=list):
        niter = [niter] * len(eps0s)
    
    print("eps list: ", eps0s)
    print("th_len list: ", th_lens)
    print("num ext list: ", num_exts)
    
    path_to_input = os.path.join(path_to_trackml, "train_1")
    path_to_out = "out_" + sys.argv[0].split(".")[0]

    for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"],
                                              skip=0, nevents=1):
        print("len(hits): ", len(hits))

        sub_out = None
        subs_good = []
        subs_out  = []        
        
        for i in range(len(eps0s)):
            sub_good, sub_out = calc_one(i+1, sub_out, event_id, hits, path_to_out,
                                         UnrollingHelices(niter=niter[i],
                                                          eps0=eps0s[i]),
                                         LengthMerge(),
                                         th_lens[i], num_exts[i] )
            subs_good.append(sub_good)
            subs_out.append( sub_out)

            submission = pd.concat(subs_good + [subs_out[-1]])
            total_score = score_event(truth, submission)
            print("step {0}, total_score:{1}".format(i+1, total_score))            

            truth_good = sub_good.merge(truth, on="hit_id")[truth.columns]
            score_good = score_event(truth_good, sub_good)
            print("step {0}, good_score: {1}".format(i+1, score_good))
    return total_score
Пример #22
0
def run():
    print(datetime.datetime.now(), sys.argv[0], " begin")
    submission = pd.read_csv("01_merge.submission.csv")

    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):
        for i in range(5):
            submission = extension.extend(submission, hits)
            score = score_event(truth, submission)
            print("step%d, score: %0.5f" % (i + 1, score))

    print(datetime.datetime.now(), sys.argv[0], " end")
Пример #23
0
 def Fun4BO(w_a1, w_z1, w_z2, w_xy_rt, c_rt1, c_rt2, eps0, step_eps):
     model.dbscan_weight[0] = w_a1
     model.dbscan_weight[1] = w_a1
     model.dbscan_weight[2] = w_z1
     model.dbscan_weight[3] = w_z2
     model.dbscan_weight[4] = w_xy_rt
     model.dbscan_weight[5] = w_xy_rt
     model.coef_rt1 = c_rt1
     model.coef_rt2 = c_rt2
     model.eps0 = eps0
     model.step_eps = step_eps
     labels = model.predict(hits)
     one_submission = create_one_event_submission(
         event_id, hits, labels)
     score = score_event(truth, one_submission)
     return score
Пример #24
0
def run():
    f = open("10.log", "w")
    f.write("extention many times\n")
    path_to_input = os.path.join(path_to_trackml, "train_1")
    nevents = 1
    submission = pd.read_csv("09.csv")
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        for i in range(5):
            submission = extension.extend(submission, hits)
            submission.to_csv("10_{0}.csv".format(i), index=None)
            score = score_event(truth, submission)
            f.write("i={0} score={1}\n".format(i, score))
Пример #25
0
def run_candidate():
    print("script begin", datetime.datetime.now())
    EPS = 1e-12
    model = models.UnrollingHelicesShiftingZ(
        djs=[-20, -10, 0, 10, 20],
        dbscan_features=["sina1", "cosa1", "z1", "z2", "x_rt", "y_rt"],
        dbscan_weight=[0.9, 0.9, 0.35, 0.22, 0.01, 0.01],
        coef_rt1=1.33,
        coef_rt2=0.0,
        niter=150)

    nevents = 1
    path_to_input = os.path.join(path_to_trackml, "train_1")
    path_to_out = "out_{0}".format(sys.argv[0].split(".")[0])

    event_id_list = []
    hits_list = []
    truth_list = []
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        print("size(hits) : ", len(hits))
        event_id_list.append(event_id)
        hits_list.append(hits)
        truth_list.append(truth)

    sys.stderr.write("scan\n")
    for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list):

        truth = truth.merge(hits, on=['hit_id'], how='left')
        dfh = truth.copy()
        label = model.predict(dfh)

        submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'],
                                  data=np.column_stack(([
                                      int(event_id),
                                  ] * len(dfh), dfh.hit_id.values,
                                                        label))).astype(int)
        submission.to_csv("02.csv", index=None)
        score = score_event(dfh, submission)
        max_score = dfh.weight.sum()
        print("score: %0.5f  (%0.5f)" % (score * max_score, score))

    print("script end", datetime.datetime.now())
Пример #26
0
def run_candidate():
    print("script begin", datetime.datetime.now())
    EPS = 1e-12
    model = models.ZAScale(djs=np.arange(-20, 20 + EPS, 10),
                           dis=np.arange(-0.003, 0.003 + EPS, 0.00025),
                           min_ncand=1)
    nevents = 1
    path_to_input = os.path.join(path_to_trackml, "train_1")
    path_to_out = "out_{0}".format(sys.argv[0].split(".")[0])

    os.makedirs(path_to_out, exist_ok=True)

    event_id_list = []
    hits_list = []
    truth_list = []
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        print("size(hits) : ", len(hits))
        event_id_list.append(event_id)
        hits_list.append(hits)
        truth_list.append(truth)

    sys.stderr.write("scan\n")
    for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list):

        truth = truth.merge(hits, on=['hit_id'], how='left')
        dfh = truth.copy()
        dfh["rt"] = np.sqrt(dfh['x'].values**2 + dfh['y'].values**2)
        dfh = dfh.loc[dfh.z > 500]
        dfh = dfh.loc[(dfh.rt > 50) & (dfh.rt < 100)]

        label = model.predict(dfh)

        submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'],
                                  data=np.column_stack(([
                                      int(event_id),
                                  ] * len(dfh), dfh.hit_id.values,
                                                        label))).astype(int)
        score = score_event(dfh, submission)
        max_score = dfh.weight.sum()
        print("score: %0.5f  (%0.5f)" % (score * max_score, score))

    print("script end", datetime.datetime.now())
Пример #27
0
def run_candidate():
    print("script begin", datetime.datetime.now())
    EPS = 1e-12
    model = models.UnrollingHelicesShiftingZ(
        dbscan_features = ["sina1", "cosa1", "z1", "z2", "x_y", "x_r", "y_r", "rt_r"],
        dbscan_weight   = [2.7474448671796874, 2.7474448671796874,
                           1.3649721713529086, 0.7034918842926337,
                           0.0005549122352940002, 0.023096034747190672,0.04619756315527515,0.2437077420144654],
        djs = [-20, -10, 0, 10, 20],
        niter = 150,
        eps0 = 0.00975)

    nevents = 1
    path_to_input = os.path.join(path_to_trackml, "train_1")
    path_to_out   = "out_{0}".format(sys.argv[0].split(".")[0])

    event_id_list = []
    hits_list = []
    truth_list = []
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"],
                                              skip=0, nevents=nevents):
        print("size(hits) : ", len(hits))
        event_id_list.append(event_id)
        hits_list.append(hits)
        truth_list.append(truth)

    sys.stderr.write("scan\n")
    for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list):

        truth = truth.merge(hits,       on=['hit_id'],      how='left')
        dfh = truth.copy()
        label = model.predict(dfh)

        submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'],
                                  data=np.column_stack(([int(event_id),]*len(dfh),
                                                        dfh.hit_id.values,
                                                        label))).astype(int)
        submission.to_csv("05.csv", index=None)
        score = score_event(dfh, submission)
        max_score = dfh.weight.sum()        
        print("score: %0.5f  (%0.5f)" % (score*max_score, score))

    print("script end", datetime.datetime.now())
Пример #28
0
    def Fun4BO(w_a1, w_z1, w_z2):
        model.dbscan_weight[0] = w_a1
        model.dbscan_weight[1] = w_a1
        model.dbscan_weight[2] = w_z1
        model.dbscan_weight[3] = w_z2

        sys.stderr.write("scan\n")
        score_list = []
        for (event_id, hits, truth) in zip(event_id_list, hits_list,
                                           truth_list):
            label = model.predict(hits)
            submission = pd.DataFrame(
                columns=['event_id', 'hit_id', 'track_id'],
                data=np.column_stack(([
                    int(event_id),
                ] * len(hits), hits.hit_id.values, label))).astype(int)
            score = score_event(truth, submission)
            score_list.append(score)
        return np.sum(score_list) / len(score_list)
Пример #29
0
def run_dbscan():
    data_dir = '../input/train_1'

    event_ids = ['000001000']
    sum = 0
    sum_score = 0
    for i, event_id in enumerate(event_ids):
        hits, cells, particles, truth = load_event(data_dir + '/event' +
                                                   event_id)
        labels = do_dbscan_predict(hits)
        submission = create_one_event_submission(0, hits['hit_id'].values,
                                                 labels)
        score = score_event(truth, submission)
        print('[%2d] score : %0.8f' % (i, score))
        sum_score += score
        sum += 1

    print('--------------------------------------')
    print(sum_score / sum)
Пример #30
0
def run_candidate():
    print("script begin", datetime.datetime.now())
    EPS = 1e-12
    # sigma of z is 5.5 mm
    model = models.ZAScaleNFilter(djs=np.linspace(-2.25, 2.25 + EPS, 10),
                                  dis=np.linspace(-0.003, 0.003 + EPS, 25))
    # model = models.ZAScaleNFilter(djs=[-20, 0.0, 20],
    #                              dis=[0.0])
    nevents = 1
    path_to_input = os.path.join(path_to_trackml, "train_1")
    path_to_out = "out_{0}".format(sys.argv[0].split(".")[0])

    event_id_list = []
    hits_list = []
    truth_list = []
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        print("size(hits) : ", len(hits))
        event_id_list.append(event_id)
        hits_list.append(hits)
        truth_list.append(truth)

    sys.stderr.write("scan\n")
    for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list):

        truth = truth.merge(hits, on=['hit_id'], how='left')
        dfh = truth.copy()
        label = model.predict(dfh)

        submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'],
                                  data=np.column_stack(([
                                      int(event_id),
                                  ] * len(dfh), dfh.hit_id.values,
                                                        label))).astype(int)
        score = score_event(dfh, submission)
        max_score = dfh.weight.sum()
        print("score: %0.5f  (%0.5f)" % (score * max_score, score))

    print("script end", datetime.datetime.now())