def whole_evaluation(self):
     e = Evaluation()
     sql = "SELECT score1, score2, score3, score4, score5 From experiment_system_user Where system_type = 1"
     self.cursor.execute(sql)
     w2v_score = []
     w2v_scores = []
     scores_list = self.cursor.fetchall()
     for score_list in scores_list:
         w2v_score.append(score_list[0])
         w2v_score.append(score_list[1])
         w2v_score.append(score_list[2])
         w2v_score.append(score_list[3])
         w2v_score.append(score_list[4])
         w2v_scores.append(w2v_score)
         w2v_score = []
     sql = "SELECT score1, score2, score3, score4, score5 From experiment_system_user Where system_type = 2"
     self.cursor.execute(sql)
     rstr_score = []
     rstr_scores = []
     scores_list = self.cursor.fetchall()
     for score_list in scores_list:
         rstr_score.append(score_list[0])
         rstr_score.append(score_list[1])
         rstr_score.append(score_list[2])
         rstr_score.append(score_list[3])
         rstr_score.append(score_list[4])
         rstr_scores.append(rstr_score)
         rstr_score = []
     print("w2v_scores:" + str(w2v_scores))
     print("length:" + str(len(w2v_scores)))
     print("rstr_scores:" + str(rstr_scores))
     print("length:" + str(len(rstr_scores)))
     w2v_ndcg = e.average_ndcg(w2v_scores)
     w2v_MAP = e.MAP(w2v_scores)
     rstr_ndcg = e.average_ndcg(rstr_scores)
     rstr_MAP = e.MAP(rstr_scores)
     print("\nNDCG:")
     print(w2v_ndcg)
     print(rstr_ndcg)
     plt.title("Evaluate System Performance NDCG@k")
     plt.xlabel("Top K Recommendation")
     plt.ylabel("NDCG@k")
     plt.plot(range(1, 6), w2v_ndcg, "-v", color='y', label="W2V")
     plt.plot(range(1, 6), rstr_ndcg, "-v", color='m', label="RSTR")
     plt.legend(loc="best")
     # save image
     plt.savefig('image/System_NDCG.png')
     plt.close()
     print("\nMAP:")
     print(w2v_MAP)
     print(rstr_MAP)
     plt.title("Evaluate System Performance MAP@k")
     plt.xlabel("Top K Recommendation")
     plt.ylabel("MAP@k")
     plt.plot(range(1, 6), w2v_MAP, "-v", color='y', label="W2V")
     plt.plot(range(1, 6), rstr_MAP, "-v", color='m', label="RSTR")
     plt.legend(loc="best")
     # save image
     plt.savefig('image/System_MAP.png')
     plt.close()
Пример #2
0
 def evaluate(self, args, data, cnn):
     res = []
     for idts, idbs, labels in data:
         xt = self.embedding.forward(idts.ravel())
         xt = xt.reshape((idts.shape[0], idts.shape[1], self.embedding.n_d))
         xb = self.embedding.forward(idbs.ravel())
         xb = xb.reshape((idbs.shape[0], idbs.shape[1], self.embedding.n_d))
         titles = Variable(torch.from_numpy(xt)).float()
         bodies = Variable(torch.from_numpy(xb)).float()
         if args.cuda:
             titles = titles.cuda()
             bodies = bodies.cuda()
         outputs = cnn(titles, bodies)
         pos = outputs[0].view(1, outputs[0].size(0))
         scores = torch.mm(pos, outputs[1:].transpose(1, 0)).squeeze()
         if args.cuda:
             scores = scores.data.cpu().numpy()
         else:
             scores = scores.data.numpy()
         assert len(scores) == len(labels)
         ranks = (-scores).argsort()
         ranked_labels = labels[ranks]
         res.append(ranked_labels)
     e = Evaluation(res)
     MAP = e.MAP() * 100
     MRR = e.MRR() * 100
     P1 = e.Precision(1) * 100
     P5 = e.Precision(5) * 100
     return MAP, MRR, P1, P5
Пример #3
0
    def evaluate(self, data, session):
        # return for each query the labels, ranked results, and scores
        eval_func = self.score_func
        all_ranked_labels = []
        all_ranked_ids = []
        all_ranked_scores = []
        query_ids = []
        all_MAP, all_MRR, all_Pat1, all_Pat5 = [], [], [], []
        for idts, idbs, labels, pid, qids in data:
            scores = eval_func(idts, idbs, session)
            assert len(scores) == len(labels)
            ranks = (-scores).argsort()
            ranked_scores = np.array(scores)[ranks]
            ranked_labels = labels[ranks]
            ranked_ids = np.array(qids)[ranks]
            query_ids.append(pid)
            all_ranked_labels.append(ranked_labels)
            all_ranked_ids.append(ranked_ids)
            all_ranked_scores.append(ranked_scores)
            this_ev = Evaluation([ranked_labels])
            all_MAP.append(this_ev.MAP())
            all_MRR.append(this_ev.MRR())
            all_Pat1.append(this_ev.Precision(1))
            all_Pat5.append(this_ev.Precision(5))

        print 'average all ... ', sum(all_MAP) / len(all_MAP), sum(
            all_MRR) / len(all_MRR), sum(all_Pat1) / len(all_Pat1), sum(
                all_Pat5) / len(all_Pat5)
        return all_MAP, all_MRR, all_Pat1, all_Pat5, all_ranked_labels, all_ranked_ids, query_ids, all_ranked_scores
Пример #4
0
    def evaluate(self, data, sess):
        res = []
        all_labels = []
        all_scores = []

        sample = 0
        for idts, idbs, id_labels in data:
            sample += 1
            cur_scores = self.eval_batch(idts, idbs, sess)
            assert len(id_labels) == len(cur_scores)  # equal to 20

            all_labels.append(id_labels)
            all_scores.append(cur_scores)
            ranks = (-cur_scores).argsort()
            ranked_labels = id_labels[ranks]
            res.append(ranked_labels)

        e = Evaluation(res)
        MAP = e.MAP()
        MRR = e.MRR()
        P1 = e.Precision(1)
        P5 = e.Precision(5)
        if 'mlp_dim' in self.args and self.args.mlp_dim != 0:
            loss1 = dev_entropy_loss(all_labels, all_scores)
        else:
            loss1 = devloss1(all_labels, all_scores)
        loss0 = devloss0(all_labels, all_scores)
        loss2 = devloss2(all_labels, all_scores)
        return MAP, MRR, P1, P5, loss0, loss1, loss2
Пример #5
0
def evaluate(all_ranked_labels):
    evaluator = Evaluation(all_ranked_labels)
    MAP = evaluator.MAP()*100
    MRR = evaluator.MRR()*100
    P1 = evaluator.Precision(1)*100
    P5 = evaluator.Precision(5)*100
    return MAP, MRR, P1, P5
Пример #6
0
def evaluate(data, labels, model):
    res = [ ]
    model.eval()
    res = compute_scores(data, labels, model)
    evaluation = Evaluation(res)
    MAP = evaluation.MAP()*100
    MRR = evaluation.MRR()*100
    P1 = evaluation.Precision(1)*100
    P5 = evaluation.Precision(5)*100
    print MAP, MRR, P1, P5
    return MAP, MRR, P1, P5
Пример #7
0
 def evaluate(self, data, eval_func):
     res = []
     for idts, idbs, labels in data:
         scores = eval_func(idts, idbs)
         assert len(scores) == len(labels)
         ranks = (-scores).argsort()
         ranked_labels = labels[ranks]
         res.append(ranked_labels)
     e = Evaluation(res)
     MAP = e.MAP() * 100
     MRR = e.MRR() * 100
     P1 = e.Precision(1) * 100
     P5 = e.Precision(5) * 100
     return MAP, MRR, P1, P5
    def evaluate(self, data, eval_func):
        res = []
        for idts, idbs, labels, weights in data:
            qq_query_weights = weights[0]

            individual_scores = []
            individual_scores_weights = []

            # for every (original and generated) query question
            for i, qq_query_weight in enumerate(qq_query_weights):
                idts_t = idts.transpose()
                # score all other question titles and generated questions
                idts_individual = np.array(
                    [idts_t[i]] + idts_t[len(qq_query_weights):].tolist(),
                    dtype=np.int32).transpose()
                # now we will add all scores
                scores_for_qq = eval_func(idts_individual)

                for j, cq_weights in enumerate(weights[1:]):
                    if len(individual_scores) == j:
                        individual_scores.append([])
                        individual_scores_weights.append([])
                    individual_scores[j] += scores_for_qq[:len(cq_weights
                                                               )].tolist()
                    individual_scores_weights[j] += [
                        cq_weight * qq_query_weight for cq_weight in cq_weights
                    ]
                    scores_for_qq = scores_for_qq[len(cq_weights):]

            # now we determine the weights
            scores = []
            for individual_scores_item, individual_scores_weights_item in zip(
                    individual_scores, individual_scores_weights):
                scores.append(
                    np.average(individual_scores_item,
                               weights=individual_scores_weights_item))

            assert len(scores) == len(labels)
            scores = np.array(scores)

            ranks = (-scores).argsort()
            ranked_labels = labels[ranks]
            res.append(ranked_labels)
        e = Evaluation(res)
        MAP = e.MAP() * 100
        MRR = e.MRR() * 100
        P1 = e.Precision(1) * 100
        P5 = e.Precision(5) * 100
        return MAP, MRR, P1, P5
Пример #9
0
 def evaluate(self, data, eval_func):
     res = []
     for t, b, labels in data:
         idts, idbs = myio.create_one_batch(t, b, self.padding_id)
         scores = eval_func(idts)
         #assert len(scores) == len(labels)
         ranks = (-scores).argsort()
         ranked_labels = labels[ranks]
         res.append(ranked_labels)
     e = Evaluation(res)
     MAP = e.MAP() * 100
     MRR = e.MRR() * 100
     P1 = e.Precision(1) * 100
     P5 = e.Precision(5) * 100
     return MAP, MRR, P1, P5
Пример #10
0
 def on_test_epoch_end(self):
     print("Calculating test accuracy...")
     vacc = self.testaccuracy.compute()
     e = Evaluation(self.eval_res)
     MAP = e.MAP() * 100
     MRR = e.MRR() * 100
     P1 = e.Precision(1) * 100
     P5 = e.Precision(5) * 100
     # print(e)
     print("Test accuracy:", vacc),
     print(MAP, MRR, P1, P5)
     self.log('test_acc_epoch', vacc)
     self.log('t_MAP', MAP)
     self.log('t_Mrr', MRR)
     self.log('t_p1', P1)
     self.log('t_p5', P5)
     return vacc, MAP, MRR, P1, P5
 def individual_evaluation(self):
     e = Evaluation()
     sql = "SELECT article_id, score1, score2, score3, score4, score5 From experiment_system_user Where system_type = 2"
     self.cursor.execute(sql)
     e2v_score = []
     e2v_kinship_scores = []
     e2v_romantic_scores = []
     e2v_friendship_scores = []
     e2v_teacher_student_scores = []
     e2v_business_scores = []
     e2v_others_scores = []
     scores_list = self.cursor.fetchall()
     for score_list in scores_list:
         sql = "SELECT relationship_type From articles Where id = " + str(
             score_list[0])
         self.cursor.execute(sql)
         relationship_type = self.cursor.fetchone()
         relationship_type = relationship_type[0]
         print("article_id:" + str(score_list[0]) + " relationship_type:" +
               str(relationship_type))
         if relationship_type == '1':
             e2v_score.append(score_list[1])
             e2v_score.append(score_list[2])
             e2v_score.append(score_list[3])
             e2v_score.append(score_list[4])
             e2v_score.append(score_list[5])
             e2v_kinship_scores.append(e2v_score)
             e2v_score = []
         if relationship_type == '2':
             e2v_score.append(score_list[1])
             e2v_score.append(score_list[2])
             e2v_score.append(score_list[3])
             e2v_score.append(score_list[4])
             e2v_score.append(score_list[5])
             e2v_romantic_scores.append(e2v_score)
             e2v_score = []
         if relationship_type == '3':
             e2v_score.append(score_list[1])
             e2v_score.append(score_list[2])
             e2v_score.append(score_list[3])
             e2v_score.append(score_list[4])
             e2v_score.append(score_list[5])
             e2v_friendship_scores.append(e2v_score)
             e2v_score = []
         if relationship_type == '4':
             e2v_score.append(score_list[1])
             e2v_score.append(score_list[2])
             e2v_score.append(score_list[3])
             e2v_score.append(score_list[4])
             e2v_score.append(score_list[5])
             e2v_teacher_student_scores.append(e2v_score)
             e2v_score = []
         if relationship_type == '5':
             e2v_score.append(score_list[1])
             e2v_score.append(score_list[2])
             e2v_score.append(score_list[3])
             e2v_score.append(score_list[4])
             e2v_score.append(score_list[5])
             e2v_business_scores.append(e2v_score)
             e2v_score = []
         if relationship_type == '6':
             e2v_score.append(score_list[1])
             e2v_score.append(score_list[2])
             e2v_score.append(score_list[3])
             e2v_score.append(score_list[4])
             e2v_score.append(score_list[5])
             e2v_others_scores.append(e2v_score)
             e2v_score = []
     print("kinship:" + str(e2v_kinship_scores))
     print("length:" + str(len(e2v_kinship_scores)))
     print("remantic:" + str(e2v_romantic_scores))
     print("length:" + str(len(e2v_romantic_scores)))
     print("friendship:" + str(e2v_friendship_scores))
     print("length:" + str(len(e2v_friendship_scores)))
     print("teacher student:" + str(e2v_teacher_student_scores))
     print("length:" + str(len(e2v_teacher_student_scores)))
     print("business:" + str(e2v_business_scores))
     print("length:" + str(len(e2v_business_scores)))
     print("others:" + str(e2v_others_scores))
     print("length:" + str(len(e2v_others_scores)))
     e2v_kinship_ndcg = e.average_ndcg(e2v_kinship_scores)
     e2v_romantic_ndcg = e.average_ndcg(e2v_romantic_scores)
     e2v_friendship_ndcg = e.average_ndcg(e2v_friendship_scores)
     e2v_teacher_student_ndcg = e.average_ndcg(e2v_teacher_student_scores)
     e2v_business_ndcg = e.average_ndcg(e2v_business_scores)
     e2v_others_ndcg = e.average_ndcg(e2v_others_scores)
     e2v_kinship_MAP = e.MAP(e2v_kinship_scores)
     e2v_romantic_MAP = e.MAP(e2v_romantic_scores)
     e2v_friendship_MAP = e.MAP(e2v_friendship_scores)
     e2v_teacher_student_MAP = e.MAP(e2v_teacher_student_scores)
     e2v_business_MAP = e.MAP(e2v_business_scores)
     e2v_others_MAP = e.MAP(e2v_others_scores)
     plt.title("Evaluate Different Relationship Performance NDCG@k")
     plt.xlabel("Top K Recommendation")
     plt.ylabel("NDCG@k")
     plt.plot(range(1, 6),
              e2v_kinship_ndcg,
              "-v",
              color='y',
              label="kinship")
     plt.plot(range(1, 6),
              e2v_romantic_ndcg,
              "-v",
              color='m',
              label="romantic relationship")
     plt.plot(range(1, 6),
              e2v_friendship_ndcg,
              "-v",
              color='g',
              label="friendship")
     plt.plot(range(1, 6),
              e2v_teacher_student_ndcg,
              "-v",
              color='b',
              label="teacher student relationship")
     plt.plot(range(1, 6),
              e2v_business_ndcg,
              "-v",
              color='c',
              label="business relationship")
     plt.plot(range(1, 6), e2v_others_ndcg, "-v", color='k', label="others")
     plt.legend(loc="best")
     # save image
     plt.savefig('image/Relationship_NDCG.png')
     plt.close()
     plt.title("Evaluate Different Relationship Performance MAP@k")
     plt.xlabel("Top K Recommendation")
     plt.ylabel("MAP@k")
     plt.plot(range(1, 6),
              e2v_kinship_MAP,
              "-v",
              color='y',
              label="kinship")
     plt.plot(range(1, 6),
              e2v_romantic_MAP,
              "-v",
              color='m',
              label="romantic relationship")
     plt.plot(range(1, 6),
              e2v_friendship_MAP,
              "-v",
              color='g',
              label="friendship")
     plt.plot(range(1, 6),
              e2v_teacher_student_MAP,
              "-v",
              color='b',
              label="teacher student relationship")
     plt.plot(range(1, 6),
              e2v_business_MAP,
              "-v",
              color='c',
              label="business relationship")
     plt.plot(range(1, 6), e2v_others_MAP, "-v", color='k', label="others")
     plt.legend(loc="best")
     # save image
     plt.savefig('image/Relationship_MAP.png')
     plt.close()
Пример #12
0
    def evaluate_z(self,
                   data,
                   data_raw,
                   ids_corpus,
                   zeval_func,
                   dump_path=None):
        args = self.args
        padding_id = self.padding_id
        tot_p1 = 0.0
        portion_title = 0.0
        tot_selected = 0.0
        res = []
        output_data = []
        for i in range(len(data)):
            idts, labels = data[i]
            pid, qids, _ = data_raw[i]
            scores, p1, z = zeval_func(idts)
            assert len(scores) == len(labels)
            ranks = (-scores).argsort()
            ranked_labels = labels[ranks]
            res.append(ranked_labels)
            tot_p1 += p1

            for wids_i, z_i, question_id in zip(idts.T, z.T, [pid] + qids):
                z2_i = [
                    zv for wid, zv in zip(wids_i, z_i) if wid != padding_id
                ]
                title, body = ids_corpus[question_id]
                #portion_title += sum(z2_i[:len(title)])
                if args.merge == 1 or question_id % 2 == 0:
                    portion_title += sum(z2_i[:len(title)])
                else:
                    portion_title += sum(z2_i[-len(title):])
                tot_selected += sum(z2_i)

            if dump_path is not None:
                output_data.append(("Query: ", idts[:, 0], z[:, 0], pid))
                for id in ranks[:3]:
                    output_data.append(("Retrieved: {}  label={}".format(
                        scores[id],
                        labels[id]), idts[:, id + 1], z[:, id + 1], qids[id]))
        if dump_path is not None:
            embedding_layer = self.embedding_layer
            padding = "<padding>"
            filter_func = lambda w: w != padding
            with open(dump_path, "w") as fout:
                for heading, wordids, z, question_id in output_data:
                    words = embedding_layer.map_to_words(wordids)
                    fout.write(heading + "\tID: {}\n".format(question_id))
                    fout.write("    " + " ".join(filter(filter_func, words)) +
                               "\n")
                    fout.write("------------\n")
                    fout.write("Rationale:\n")
                    fout.write("    " + " ".join(w if zv == 1 else "__"
                                                 for w, zv in zip(words, z)
                                                 if w != padding) + "\n")
                    fout.write("\n\n")

        e = Evaluation(res)
        MAP = e.MAP() * 100
        MRR = e.MRR() * 100
        P1 = e.Precision(1) * 100
        P5 = e.Precision(5) * 100
        return MAP, MRR, P1, P5, tot_p1 / len(data), portion_title / (
            tot_selected + 1e-8)