示例#1
0
def predict(model, test_data, pd):
    start_time = time.time()
    for t in pd['predict_type']:
        evaluator = QuantitativeEvaluator(predict_type=t)
        evaluator.get_ranks(test_data, model)
        mrr, mr = evaluator.compute_mrr()
        print 'Type: ', evaluator.predict_type, 'mr:', mr, 'mrr:', mrr
    print 'Prediction done. Elapsed time: ', round(time.time()-start_time)
示例#2
0
    def mr_predict(self):
        test_data = pickle.load(open(self.pd['test_data_path'], 'r'))
        predictor = pickle.load(open(self.pd['model_pickled_path'], 'r'))
        predictor.update_vec_cvec(self.nt2vecs, self.nt2cvecs)

        start_time = time.time()

        for t in self.pd['predict_type']:
            evaluator = QuantitativeEvaluator(predict_type=t)
            if self.pd['new_test_method']:
                evaluator.get_ranks_from_test_graph(test_data, predictor,
                                                    self.g_test)
                mrr, mr = evaluator.compute_mrr()
                print('Type:{} mr: {}, mrr: {} '.format(
                    evaluator.predict_type, mr, mrr))
                mrr, mr = evaluator.compute_highest_mrr()
                print('Type:{} hmr: {}, hmrr: {} '.format(
                    evaluator.predict_type, mr, mrr))
            else:
                evaluator.get_ranks(test_data, predictor)
                # evaluator.get_ranks_with_output(test_data, predictor, config.result_pre+str(epoch)+t+'.txt')
                mrr, mr = evaluator.compute_mrr()
                print('Type:{} mr: {}, mrr: {} '.format(
                    evaluator.predict_type, mr, mrr))
        print("Prediction done, elapsed time {}s".format(time.time() -
                                                         start_time))
        if pd['perform_case_study']:
            self.run_case_study(predictor, self.pd)
示例#3
0
    def mr_predict(self, node_embed):
        test_data = pickle.load(open(config.test_data, 'r'))
        predictor = pickle.load(open(config.crossmap, 'r'))
        predictor.read_embedding_tf(config, node_embed)

        start_time = time.time()
        for t in config.predict_type:
            evaluator = QuantitativeEvaluator(predict_type=t)
            evaluator.get_ranks(test_data, predictor)
            mrr, mr = evaluator.compute_mrr()
            print 'Type: ', evaluator.predict_type, 'mr:', mr, 'mrr:', mrr
示例#4
0
    def mr_predict(self, node_embed, context_embed, epoch):
        test_data = pickle.load(open(config.test_data, 'r'))
        predictor = pickle.load(open(config.crossmap, 'r'))
        predictor.read_embedding_tf(config, node_embed, context_embed)

        start_time = time.time()
        for t in config.predict_type:
            evaluator = QuantitativeEvaluator(predict_type=t)
            evaluator.get_ranks_with_output(
                test_data, predictor,
                config.result_pre + str(epoch) + t + '.txt')
            mrr, mr = evaluator.compute_mrr()
            print('Type:{} mr: {}, mrr: {} '.format(evaluator.predict_type, mr,
                                                    mrr))
示例#5
0
def predict(model, test_data, pd):
    start_time = time.time()
    test_graph = CrossData(pd['node_dict'], pd['test_edges'])
    for t in pd['predict_type']:
        evaluator = QuantitativeEvaluator(predict_type=t)
        if pd['new_test_method']:
            evaluator.get_ranks_from_test_graph(test_data, model, test_graph)
            mrr, mr = evaluator.compute_highest_mrr()
            print('Type:{} hmr: {}, hmrr: {} '.format(evaluator.predict_type,
                                                      mr, mrr))
        else:
            evaluator.get_ranks(test_data, model)
            mrr, mr = evaluator.compute_mrr()
            print('Type:{} mr: {}, mrr: {} '.format(evaluator.predict_type, mr,
                                                    mrr))
    print('Prediction done. Elapsed time: {}'.format(
        round(time.time() - start_time)))
def train_and_evaluate(tweets, voca, model_type='embed'):
    # type = ['embed', 'nmf', 'count', 'prod2vec', 'prod2vec_o']
    print('#########################')
    print('Model Type: ', model_type)
    print('#########################')
    start_time = time.time()
    evaluators = [
        QuantitativeEvaluator(predict_type=predict_type, fake_num=10)
        for predict_type in pd['predict_type']
    ]
    day2batch = defaultdict(list)
    for tweet in tweets:
        day = tweet.ts / 3600
        day2batch[day].append(tweet)

    batches = [day2batch[d] for d in sorted(day2batch)]
    test_batch_indices = np.random.choice(range(
        len(batches) / 2, len(batches)),
                                          pd['test_batch_num'],
                                          replace=False)

    model = EmbedPredictor(pd)
    print('#########################')
    print('Count Measure: ', pd['update_strategy'])
    print('#########################')

    # training_batch = []
    for i, batch in enumerate(batches):
        if i % 200 == 0:
            print('time:', time.time() - start_time)

        if i in test_batch_indices:
            print('results for batch', i)
            for evaluator in evaluators:
                evaluator.get_ranks(batch, model)
                mrr, mr = evaluator.compute_mrr()
                print(evaluator.predict_type, 'mr:', mr, 'mrr:', mrr)
        model.partial_fit(batch)

    for evaluator in evaluators:
        mrr, mr = evaluator.compute_mrr()
        print(evaluator.predict_type, 'mr:', mr, 'mrr:', mrr)

    print('Model training and evaluation done, elapsed time: ',
          round(time.time() - start_time))

    return model