Exemplo n.º 1
0
 def test_to_tf(self):
     im = InteractionMapper('./resources/map')
     tensor = im.idxs_to_tf([0, 1])
     np.testing.assert_array_equal(np.array(tensor.indices),
                                   np.array([[0, 0], [1, 1]]),
                                   err_msg="indices incorrect")
     np.testing.assert_array_equal(np.array(tensor.values),
                                   np.array([1, 1]),
                                   err_msg="values incorrect")
     np.testing.assert_array_equal(np.array(tensor.dense_shape),
                                   np.array([2, im.interaction_class_cnt]),
                                   err_msg="shape incorrect")
Exemplo n.º 2
0
    def test_knn_interaction_query(self):
        im = InteractionMapper("./resources/map")
        pd_df = pd.read_csv(
            "./resources/interaction_index/interaction_index.txt", header=None)
        for col in pd_df.columns:
            pd_df[col] = pd_df[col].astype(float)
        lambdas = [
            lambda key: True if len(key) > 1 else False, lambda key: True
            if len(key) <= 1 else False
        ]
        ci = ConditionalIndex(im, pd_df.values, lambdas)

        ii = InteractionIndex(im, pd_df.values)

        dummy = ii.knn_interaction_query("a", k=100)

        np.testing.assert_array_equal(
            ci.knn_interaction_query("a", 0, k=3)[0],
            list(filter(lambda s: len(s) > 1, dummy[0]))[0:3])
        np.testing.assert_array_equal(
            ci.knn_interaction_query("a", 1, k=3)[0],
            list(filter(lambda s: len(s) <= 1, dummy[0]))[0:3])
        np.testing.assert_array_equal(
            ci.knn_interaction_query("a", k=3)[0],
            ii.knn_interaction_query("a", k=3)[0])
Exemplo n.º 3
0
    def test_knn_idx_query(self):
        im = InteractionMapper("./resources/map")
        iv = [[0.0, 0.0, 0.0], [1.0, 2.0, 3.0]]
        ii = InteractionIndex(im, iv)

        result = ii.knn_idx_query(1)

        self.assertTrue(result[0][0] == "a")
Exemplo n.º 4
0
    def test_knn_interaction_query_exception(self):
        im = InteractionMapper("./resources/map")
        iv = [[0.0, 0.0, 0.0], [1.0, 2.0, 3.0]]
        ii = InteractionIndex(im, iv)

        result = ii.knn_interaction_query("")

        self.assertTrue(len(result[0]) == 0)
        self.assertTrue(len(result[1]) == 0)
        self.assertTrue(len(result[2]) == 0)
Exemplo n.º 5
0
 def test_constructor(self):
     im = InteractionMapper('./resources/map')
     expected = 50
     self.assertTrue(im.interaction_class_cnt == expected,
                     msg=str(im.interaction_class_cnt) + "!=" +
                     str(expected))
Exemplo n.º 6
0
    # "/home/chambroc/Desktop/RecoResults/ThreeInARow/day1/interaction_indexing",
    # "/home/chambroc/Desktop/RecoResults/ThreeInARow/day2/interaction_indexing",
    # "/home/chambroc/Desktop/RecoResults/ThreeInARow/day3/interaction_indexing",
    # "/home/chambroc/Desktop/RecoResults/ThreeInARow120/day1/interaction_indexing",
    "/home/chambroc/Desktop/RecoResults/ThreeInARow120/day2/interaction_indexing",
    "/home/chambroc/Desktop/RecoResults/ThreeInARow120/day3/interaction_indexing",
    # "/home/chambroc/Desktop/RecoResults/ThreeInARowMoreEvents/day1/interaction_indexing",
    "/home/chambroc/Desktop/RecoResults/ThreeInARowMoreEvents/day2/interaction_indexing",
    "/home/chambroc/Desktop/RecoResults/ThreeInARowMoreEvents/day3/interaction_indexing",
])
cf.method = 'ghtree'
print("building maps and indices......")
iis = []
for dir in cf.source_dirs:
    print(dir)
    im = InteractionMapper(map_path=dir)
    print("...map ready")
    print("building index...")
    pd_df = pd.read_csv(dir + "/interaction_index.txt", header=None)
    for col in pd_df.columns:
        pd_df[col] = pd_df[col].astype(float)
    iis = iis + [
        InteractionIndex(im, pd_df.values, method=cf.method, space=cf.space)
    ]
    print("...index ready")

num_classes = iis[0].im.interaction_class_cnt
ewma_dt = EWMA(100)
ewma_frac = EWMA(10000)
cnt = CNT()
jaccard_cnt = CNT()
Exemplo n.º 7
0
cf = Config("/home/chambroc/Desktop/RecoResults/ThreeInARowMoreEvents/day3/interaction_indexing")
cf.method = "hnsw"
print("loading data...")
pd_df = pd.read_csv(cf.source_dir + "/interaction_index.txt", header=None)
for col in pd_df.columns:
    pd_df[col] = pd_df[col].astype(float)
print("building conditional index...")

filter_funs = [
    lambda key: True if "suche" in key else False,
    lambda key: True if "/p/" in key else False,
    lambda key: False if ("suche" in key) or ("/p/" in key) else True,
]

multi_index = ConditionalIndex(
    InteractionMapper(map_path=cf.source_dir),
    pd_df.values,
    lambdas_of_key=filter_funs,
    method=cf.method,
    space=cf.space)
# print("building index full...")
# main_index = InteractionIndex(im, pd_df.values, method=cf.method, space=cf.space)

print("...index ready")
ewma_dt = EWMA(100)
ewma_frac = EWMA(10000)
cnt = CNT()
num_classes = multi_index.im.interaction_class_cnt

class RecoResource(object):
    def on_get(self, req, resp):
Exemplo n.º 8
0
def run(config):
    tf.reset_default_graph()
    cf = config
    um = InteractionMapper(cf.path_interaction_map)
    ii = None
    mp = False
    if cf.continnue_previous_run:
        pd_df = pd.read_csv(cf.previous_successful_output_run_dir +
                            "/interaction_indexing/interaction_index.txt",
                            header=None)
        for col in pd_df.columns:
            pd_df[col] = pd_df[col].astype(np.float32)
        network = Network(cf, um, preheated_embeddings=pd_df.values)
    else:
        network = Network(cf, um)
    train_loader = ld.Loader(cf, um, cf.path_train_data)
    test_loader = ld.Loader(cf, um, cf.path_test_data)
    trainer = Trainer(cf, network)

    cf.make_dirs()
    tbw = TensorboardWriter(cf)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        log_txt = "Config: " + cf.to_string() + "\n\n" + \
                  "Interaction mapper: " + um.to_string() + "\n\n" + \
                  "Train Loader @start: " + train_loader.to_string() + "\n\n" + \
                  "Test Loader @start: " + test_loader.to_string()
        tbw.log_info(sess, log_txt)

        while train_loader.epoch_cnt < cf.epochs:
            tb = time.time()
            batch_x, batch_y, target_distance = train_loader.get_next_batch(
                cf.batch_size)
            x_label = 1000 * train_loader.event_cnt / train_loader.tot_event_cnt + train_loader.epoch_cnt

            dt_batching = time.time() - tb
            tt = time.time()
            tensorboard_log_entry = trainer.train(sess, batch_x, batch_y,
                                                  target_distance)
            dt_tensorflow = time.time() - tt
            dt_all = time.time() - tb
            events_per_sec_in_thousand = cf.batch_size / dt_all / 1000

            tbw.add_train_summary(tensorboard_log_entry, x_label)
            tbw.log_scalar(events_per_sec_in_thousand,
                           x_label,
                           tag="performance_metric: 1000 events per second")
            tbw.log_scalar(
                dt_tensorflow / dt_batching,
                x_label,
                tag=
                "performance_metric: delta time tensorflow / delta time batch processing"
            )

            if train_loader.new_epoch:
                batch_x, batch_y, target_distance = test_loader.get_next_batch(
                    cf.batch_size * 100, fake_factor=0)
                print("epochs: " + str(train_loader.epoch_cnt))
                print("trainer testing...")
                tensorboard_log_entry = trainer.test(sess, batch_x, batch_y,
                                                     target_distance)
                tbw.add_test_summary(tensorboard_log_entry, x_label)
                tbw.flush()

                print("calculating embedding...")
                embedding_vectors = trainer.get_interaction_embeddings(sess)
                print("calculating average normalization...")
                tbw.log_scalar(
                    np.average(np.linalg.norm(embedding_vectors, axis=1)),
                    x_label,
                    tag=
                    "evaluation_metric: average norm of embedding vectors (normalization condition will force it towards 1)"
                )
                print("building index...")
                ii = InteractionIndex(um, embedding_vectors)
                print("metric profiling...")
                mp = MetricProfiler(cf, sess, tbw, train_loader, um, ii)
                mp.log_plots(x_label)
                print("epoch done")

        print("final logging...")
        mp.log_results()
        print("write timeline profile...")
        with open(cf.timeline_profile_path, 'w') as f:
            f.write(trainer.chrome_trace())

        tbw.flush()
        sess.close()

    print("saving index...")
    ii.safe(cf.index_safe_path)

    Path(cf.output_run_dir + '/_SUCCESS').touch()
    print("success: _SUCCESS generated")