示例#1
0
def validate(valid_loader, model):
    model.eval()
    recalls10 = []
    mrrs10 = []
    recalls20 = []
    mrrs20 = []
    recalls50 = []
    mrrs50 = []
    with torch.no_grad():
        for seq, target, lens in tqdm(valid_loader,
                                      total=len(valid_loader),
                                      miniters=1000):
            seq = seq.to(device)
            target = target.to(device)
            outputs = model(seq, lens)
            logits = F.softmax(outputs, dim=1)
            recall10, mrr10 = metric.evaluate(logits, target, k=10)
            recall20, mrr20 = metric.evaluate(logits, target, k=20)
            recall50, mrr50 = metric.evaluate(logits, target, k=50)

            recalls10.append(recall10)
            mrrs10.append(mrr10)
            recalls20.append(recall20)
            mrrs20.append(mrr20)
            recalls50.append(recall50)
            mrrs50.append(mrr50)

    mean_recall10 = np.mean(recalls10)
    mean_mrr10 = np.mean(mrrs10)
    mean_recall20 = np.mean(recalls20)
    mean_mrr20 = np.mean(mrrs20)
    mean_recall50 = np.mean(recalls50)
    mean_mrr50 = np.mean(mrrs50)

    return mean_recall10, mean_mrr10, mean_recall20, mean_mrr20, mean_recall50, mean_mrr50
示例#2
0
    def evaluat(self):

        if self.test_file2:
            in_file = open(self.test_file2)
            gold_data = [int(line.strip().split()[0]) for line in in_file]
            in_file.close()

            in_file = open(self.result_file2)
            classifier_data = [
                int(line.strip().split()[0])
                for line in in_file.readlines()[1:]
            ]
            in_file.close()

            p, r, f, auc = evaluate(np.asarray(gold_data),
                                    np.asarray(classifier_data))

            return (p, r, f, auc)
        else:
            in_file = open(self.test_file)
            gold_data = [int(line.strip().split()[0]) for line in in_file]
            in_file.close()

            in_file = open(self.result_file)
            classifier_data = [
                int(line.strip().split()[0])
                for line in in_file.readlines()[1:]
            ]
            in_file.close()

            p, r, f, auc = evaluate(np.asarray(gold_data),
                                    np.asarray(classifier_data))

            return (p, r, f, auc)
示例#3
0
def predict(config):
    pprint.pprint(config)
    if config.model == 'bs.ngram_beam_search':
        model_func = bs.ngram_beam_search
    elif config.model == 's2s':
        model_func = s2s.query
    else:
        raise NotImplementedError()

    source = load_data(config.test_data_source)
    target = load_data(config.test_data_target)
    predictions = []
    ground_truth = []
    index = 0
    for truth, data in zip(target, source):
        if index % 10000 == 0:
            print(index)
            pprint.pprint(metric.evaluate(ground_truth, predictions, config.k))
        index += 1
        data = data.split('|')
        (context, pinyin) = data
        context = "".join(context.split())
        pinyin = pu.segment_with_hint(metric.normalize_text(pinyin))
        context = "".join(context.split())
        prediction = model_func(context.strip(), pinyin)
        #print(prediction)
        #print(truth)
        predictions.append(prediction)
        ground_truth.append(truth)
    pprint.pprint(metric.evaluate(target, predictions, config.k))
示例#4
0
文件: model.py 项目: MS0147/1234
 def validate(self):
     '''
     validate by auc value
     :return: auc
     '''
     y_, y_pred = self.predict(self.dataloader["val"])
     rocprc, rocauc, best_th, best_f1 = evaluate(y_, y_pred)
     return rocauc, best_th, best_f1
示例#5
0
文件: model.py 项目: MS0147/1234
    def test_type(self):
        self.G.eval()
        self.D.eval()
        res_th = self.opt.threshold
        save_dir = os.path.join(self.outf, self.model, self.dataset, "test",
                                str(self.opt.folder))

        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        y_N, y_pred_N = self.predict(self.dataloader["test_N"], scale=False)
        y_S, y_pred_S = self.predict(self.dataloader["test_S"], scale=False)
        y_V, y_pred_V = self.predict(self.dataloader["test_V"], scale=False)
        y_F, y_pred_F = self.predict(self.dataloader["test_F"], scale=False)
        y_Q, y_pred_Q = self.predict(self.dataloader["test_Q"], scale=False)
        over_all = np.concatenate(
            [y_pred_N, y_pred_S, y_pred_V, y_pred_F, y_pred_Q])
        over_all_gt = np.concatenate([y_N, y_S, y_V, y_F, y_Q])
        min_score, max_score = np.min(over_all), np.max(over_all)
        A_res = {"S": y_pred_S, "V": y_pred_V, "F": y_pred_F, "Q": y_pred_Q}
        self.analysisRes(y_pred_N, A_res, min_score, max_score, res_th,
                         save_dir)

        #save fig for Interpretable
        # self.predictForRight(self.dataloader["test_N"], save_dir=os.path.join(save_dir, "N"))
        self.predict_for_right(self.dataloader["test_S"],
                               min_score,
                               max_score,
                               res_th,
                               save_dir=os.path.join(save_dir, "S"))
        self.predict_for_right(self.dataloader["test_V"],
                               min_score,
                               max_score,
                               res_th,
                               save_dir=os.path.join(save_dir, "V"))
        self.predict_for_right(self.dataloader["test_F"],
                               min_score,
                               max_score,
                               res_th,
                               save_dir=os.path.join(save_dir, "F"))
        self.predict_for_right(self.dataloader["test_Q"],
                               min_score,
                               max_score,
                               res_th,
                               save_dir=os.path.join(save_dir, "Q"))
        aucprc, aucroc, best_th, best_f1 = evaluate(
            over_all_gt, (over_all - min_score) / (max_score - min_score))
        print("#############################")
        print("########  Result  ###########")
        print("ap:{}".format(aucprc))
        print("auc:{}".format(aucroc))
        print("best th:{} --> best f1:{}".format(best_th, best_f1))

        with open(os.path.join(save_dir, "res-record.txt"), 'w') as f:
            f.write("auc_prc:{}\n".format(aucprc))
            f.write("auc_roc:{}\n".format(aucroc))
            f.write("best th:{} --> best f1:{}".format(best_th, best_f1))
示例#6
0
def validate(valid_loader, model):
    model.eval()
    recalls = []
    mrrs = []
    with torch.no_grad():
        for seq, target, lens in tqdm(valid_loader):
            seq = seq.to(device)
            target = target.to(device)
            outputs = model(seq, lens)
            logits = F.softmax(outputs, dim=1)
            recall, mrr = metric.evaluate(logits, target, k=args.topk)
            recalls.append(recall)
            mrrs.append(mrr)

    mean_recall = np.mean(recalls)
    mean_mrr = np.mean(mrrs)
    return mean_recall, mean_mrr
示例#7
0
def validate(valid_loader, model):
    model.eval()
    recalls = []
    mrrs = []
    with torch.no_grad():
        for seq, target, lens in (valid_loader):
            seq = seq.to(device)
            target = target.to(device)
            lens = torch.tensor(lens).to(device)
            scores = model.full_sort_predict(seq, lens)
            logits = F.softmax(scores, dim=1)
            recall, mrr = metric.evaluate(logits, target, k=topk)
            recalls.append(recall)
            mrrs.append(mrr)

    mean_recall = np.mean(recalls)
    mean_mrr = np.mean(mrrs)
    return mean_recall, mean_mrr
示例#8
0
    def test_score_dist(self):
        self.load()
        self.G.eval()
        self.D.eval()
        y_, y_pred = self.predict(self.dataloader["test"])
        f = plt.figure()
        ax = f.add_subplot(111)
        X1 = []
        X2 = []
        for gt, sc in zip(y_, y_pred):
            if gt == 0:
                X1.append(sc)
            else:
                X2.append(sc)

        _, bins, _ = ax.hist(X1,
                             bins=55,
                             range=[0, 1],
                             density=True,
                             alpha=0.3,
                             color='r',
                             label="walk")
        _ = ax.hist(X2,
                    bins=bins,
                    alpha=0.3,
                    density=True,
                    color='b',
                    label="others")
        ax.set_yticks([])
        ax.set_xticks(np.arange(0, 1.2, 0.2))
        ax.legend()
        f.savefig(os.path.join("./output", "dist.pdf"))
        auc_prc, roc_auc, best_threshold, best_f1 = evaluate(y_, y_pred)
        print("ap:{}".format(auc_prc))
        print("auc:{}".format(roc_auc))
        print("best threshold:{}  ==> F1:{}".format(best_threshold, best_f1))
示例#9
0
文件: FPMC.py 项目: biayangQI/FPMC-1
    def learnSBPR_FPMC(self,
                       log_path_txt,
                       tr_data,
                       te_data=None,
                       n_epoch=10,
                       neg_batch_size=10,
                       eval_per_epoch=True,
                       Ks=[10, 20, 50],
                       Ks_auc=[100]):

        best_result_recall = [0] * len(Ks)
        best_result_mrr = [0] * len(Ks)
        best_result_ndcg = [0] * len(Ks)
        best_result_auc = [0] * len(Ks_auc)

        best_epoch_auc = [0] * len(Ks_auc)
        best_epoch_ndcg = [0] * len(Ks)
        best_epoch_recall = [0] * len(Ks)
        best_epoch_mrr = [0] * len(Ks)

        for epoch in range(n_epoch):
            self.learn_epoch(tr_data, neg_batch_size=neg_batch_size)

            if eval_per_epoch == True:
                scores, ground_truth = self.predict(tr_data)
                recall, mrr, ndcg, auc = evaluate(scores=scores,
                                                  ground_truth=ground_truth,
                                                  Ks=Ks,
                                                  Ks_auc=Ks_auc)

                # print train data result
                file_write(log_path_txt,
                           f'Current Train Data Result Epoch {epoch}:')
                print_result(log_path_txt, 'Recall', recall, Ks)
                print_result(log_path_txt, 'MRR', mrr, Ks)
                print_result(log_path_txt, 'NDCG', ndcg, Ks)
                print_auc_result(log_path_txt, 'AUC', auc, Ks_auc)

            # start test
            scores, ground_truth = self.predict(te_data)
            recall, mrr, ndcg, auc = evaluate(scores=scores,
                                              ground_truth=ground_truth,
                                              Ks=Ks,
                                              Ks_auc=Ks_auc)

            for i, topk in enumerate(Ks):
                if recall[i] >= best_result_recall[i]:
                    best_result_recall[i] = recall[i]
                    best_epoch_recall[i] = epoch
                if mrr[i] >= best_result_mrr[i]:
                    best_result_mrr[i] = mrr[i]
                    best_epoch_mrr[i] = epoch
                if ndcg[i] >= best_result_ndcg[i]:
                    best_result_ndcg[i] = ndcg[i]
                    best_epoch_ndcg[i] = epoch
            for i, k in enumerate(Ks_auc):
                if auc[i] >= best_result_auc[i]:
                    best_result_auc[i] = auc[i]
                    best_epoch_auc[i] = epoch

            # print test result
            cprint(log_path_txt, f'Current Test Data Result Epoch {epoch}:')
            print_result(log_path_txt, 'Recall', recall, Ks)
            print_result(log_path_txt, 'MRR', mrr, Ks)
            print_result(log_path_txt, 'NDCG', ndcg, Ks)
            print_auc_result(log_path_txt, 'AUC', auc, Ks_auc)

            file_write(log_path_txt, 'Best Result: ')
            file_write(log_path_txt, f'best_epoch_recall: {best_epoch_recall}')
            file_write(log_path_txt, f'best_epoch_mrr: {best_epoch_mrr}')
            file_write(log_path_txt, f'best_epoch_ndcg: {best_epoch_ndcg}')
            file_write(log_path_txt, f'best_epoch_auc: {best_epoch_auc}')

            print_result(log_path_txt, 'Recall', best_result_recall, Ks)
            print_result(log_path_txt, 'MRR', best_result_mrr, Ks)
            print_result(log_path_txt, 'NDCG', best_result_ndcg, Ks)
            print_auc_result(log_path_txt, 'AUC', best_result_auc, Ks_auc)

        return None
示例#10
0
def train_test():
    '''
    load config
    '''
    rel_level = 2
    max_q_len_consider = 10
    max_d_len_consider = 1000
    if args.config != None:
        model_config = json.load(open(args.config))
        print('model config: {}'.format(model_config))
    '''
    load word vector
    '''
    w2v_file = os.path.join(args.data_dir, 'w2v')
    vocab_file = os.path.join(args.data_dir, 'vocab')
    print('loading word vector ...')
    wv = WordVector(filepath=w2v_file)
    vocab = Vocab(filepath=vocab_file, file_format=args.format)
    print('vocab size: {}, word vector dim: {}'.format(wv.vocab_size, wv.dim))
    if not args.tfrecord:
        '''
        load data (placeholder)
        '''
        train_file = os.path.join(args.data_dir,
                                  'train.prep.{}'.format(args.paradigm))
        test_file = os.path.join(args.data_dir,
                                 'test.prep.{}'.format(args.paradigm))
        test_file_judge = os.path.join(args.data_dir, 'test.prep.pointwise')
        doc_file = os.path.join(args.data_dir, 'docs.prep')
        if args.format == 'ir':
            query_file = os.path.join(args.data_dir, 'query.prep')
        print('loading query doc content ...')
        doc_raw = load_prep_file(doc_file, file_format=args.format)
        if args.format == 'ir':
            query_raw = load_prep_file(query_file, file_format=args.format)
        else:
            query_raw = doc_raw
        print('truncate long document')
        d_long_count = 0
        avg_doc_len, avg_truncate_doc_len = 0, 0
        truncate_len = max(max_q_len_consider, max_d_len_consider)
        for d in doc_raw:
            avg_doc_len += len(doc_raw[d])
            if len(doc_raw[d]) > truncate_len:
                d_long_count += 1
                doc_raw[d] = doc_raw[d][:truncate_len]
                avg_truncate_doc_len += truncate_len
            else:
                avg_truncate_doc_len += len(doc_raw[d])
        avg_doc_len = avg_doc_len / len(doc_raw)
        avg_truncate_doc_len = avg_truncate_doc_len / len(doc_raw)
        print(
            'total doc: {}, long doc: {}, average len: {}, average truncate len: {}'
            .format(len(doc_raw), d_long_count, avg_doc_len,
                    avg_truncate_doc_len))
        max_q_len = min(max_q_len_consider,
                        max([len(query_raw[q]) for q in query_raw]))
        max_d_len = min(max_d_len_consider,
                        max([len(doc_raw[d]) for d in doc_raw]))
        print('data assemble with max_q_len: {}, max_d_len: {} ...'.format(
            max_q_len, max_d_len))

        def relevance_mapper(r):
            if r < 0:
                return 0
            if r >= rel_level:
                return rel_level - 1
            return r

        train_X, train_y, batcher = data_assemble(
            train_file,
            query_raw,
            doc_raw,
            max_q_len,
            max_d_len,
            relevance_mapper=relevance_mapper)
        '''
        doc_len_list = []
        for q_x in train_X:
            for d in q_x['qd_size']:
                doc_len_list.append(d[1])
        doc_len_list = np.array(doc_len_list, dtype=np.int32)
        doc_len_list = [min(max_jump_offset ** 2 / d, max_jump_offset) for d in doc_len_list]
        plt.hist(doc_len_list, bins=max_jump_offset)
        plt.xlim(xmin=0, xmax=max_jump_offset)
        plt.xlabel('preserve number')
        plt.ylabel('number')
        plt.show()
        '''
        test_X, test_y, _ = data_assemble(test_file,
                                          query_raw,
                                          doc_raw,
                                          max_q_len,
                                          max_d_len,
                                          relevance_mapper=relevance_mapper)
        if args.paradigm == 'pairwise':
            test_X_judge, test_y_judge, _ = data_assemble(
                test_file_judge,
                query_raw,
                doc_raw,
                max_q_len,
                max_d_len,
                relevance_mapper=relevance_mapper)
        else:
            text_X_judge, test_y_judge = test_X, test_y
        print('number of training samples: {}'.format(
            sum([len(x['query']) for x in train_X])))
        '''
        load judge file
        '''
        test_qd_judge = load_judge_file(test_file_judge,
                                        file_format=args.format,
                                        reverse=args.reverse)
        for q in test_qd_judge:
            for d in test_qd_judge[q]:
                test_qd_judge[q][d] = relevance_mapper(test_qd_judge[q][d])
    else:
        '''
        load data (tfrecord)
        '''
        max_q_len = max_q_len_consider
        max_d_len = max_d_len_consider
        batcher = None
    '''
    train and test the model
    '''
    model_config_ = {
        'max_q_len': max_q_len,
        'max_d_len': max_d_len,
        'max_jump_step': 100,
        'word_vector': wv.get_vectors(normalize=True),
        'oov_word_vector': None,
        'vocab': vocab,
        'word_vector_trainable': False,
        'use_pad_word': True,
        'interaction': 'dot',
        'glimpse': 'all_next_hard',
        'glimpse_fix_size': 10,
        'min_density': -1,
        'use_ratio': False,
        'min_jump_offset': 3,
        'jump': 'min_density_hard',
        'represent': 'interaction_cnn_hard',
        'separate': False,
        'aggregate': 'max',
        'rnn_size': 16,
        'max_jump_offset': 50,
        'max_jump_offset2': max_q_len,
        'rel_level': rel_level,
        'loss_func': 'classification',
        'keep_prob': 1.0,
        'paradigm': args.paradigm,
        'learning_rate': 0.0002,
        'random_seed': SEED,
        'n_epochs': 30,
        'batch_size': 256,
        'batch_num': 400,
        'batcher': batcher,
        'verbose': 1,
        'save_epochs': 1,
        'reuse_model': args.reuse_model_path,
        'save_model': args.save_model_path,
        'summary_path': args.tf_summary_path,
        'tfrecord': args.tfrecord,
    }
    if args.config != None:
        model_config_.update(model_config)
    rri = RRI(**model_config_)
    if not args.tfrecord:
        #train_X, train_y, test_X, test_y = train_X[:2560], train_y[:2560], test_X[:2560], test_y[:2560]
        print('train query: {}, test query: {}'.format(len(train_X),
                                                       len(test_X)))
        for e in rri.fit_iterable(train_X, train_y):
            start = time.time()
            loss, acc = rri.test(test_X, test_y)
            if args.format == 'ir':
                ranks, _ = rri.decision_function(test_X_judge)
                scores = evaluate(ranks, test_qd_judge, metric=ndcg, top_k=20)
                avg_score = np.mean(list(scores.values()))
            elif args.format == 'text':
                avg_score = None
            print('\t{:>7}:{:>5.3f}:{:>5.3f}:{:>5.3f}'.format(
                'test_{:>3.1f}'.format((time.time() - start) / 60), loss, acc,
                avg_score),
                  end='',
                  flush=True)
    else:
        for e in rri.fit_iterable_tfrecord(
                'data/bing/test.prep.pairwise.tfrecord-???-of-???'):
            print(e)
            input()
示例#11
0
文件: main.py 项目: RenQis/MFCF
    R_test5 = np.array(dataset5[1])
    #print(R_train1,R_test1)
    K = 19 # number of latent features
    # initialize P and Q with random values
    P = np.random.rand(R_train1.shape[0], K)
    Q = np.random.rand(R_train1.shape[1], K)
    print(R_test1.shape[0],R_test1.shape[1])

    print ('***************************************')
    print ('NMF1 started!!')
    print ('***************************************')

    P1, Q1 = nmf1.matrix_factorization(R_train1, P, Q, K)
    R_pred1 = np.dot(P1, Q1.T)
    #print(R_pred1)
    precision1, recall1, fvalue1, rmse1 = evaluate(R_test1, R_pred1)
    P2, Q2 = nmf1.matrix_factorization(R_train2, P, Q, K)
    R_pred2 = np.dot(P2, Q2.T)
    precision2, recall2, fvalue2, rmse2 = evaluate(R_test2, R_pred2)
    P3, Q3 = nmf1.matrix_factorization(R_train3, P, Q, K)
    R_pred3 = np.dot(P3, Q3.T)
    precision3, recall3, fvalue3, rmse3 = evaluate(R_test3, R_pred3)
    P4, Q4 = nmf1.matrix_factorization(R_train4, P, Q, K)
    R_pred4 = np.dot(P4, Q4.T)
    precision4, recall4, fvalue4, rmse4 = evaluate(R_test4, R_pred4)
    P5, Q5 = nmf1.matrix_factorization(R_train5, P, Q, K)
    R_pred5 = np.dot(P5, Q5.T)
    precision5, recall5, fvalue5, rmse5 = evaluate(R_test5, R_pred5)

    # 5 cross validation
    print ('**************precision***************')
示例#12
0
def main():
    import csv
    import time
    from scipy.stats import rankdata

    from dataloader import Data

    # ds = Data(root_dir="./data/ta_feng/")
    # # train, test, valid = ds.get_data(neg=0)
    # train, test, valid = ds.get_data(neg=2)

    # # Load network
    # n_usr = len(ds.usrset)
    # n_itm = len(ds.itemset)
    # k = 32
    # d=k
    # h=2
    # gamma=[1,1,1,1]
    # alpha=0.0

    # lr=0.0001
    # momentum=0
    # weight_decay=0.01

    # epochs=21
    # neg=2

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # model = ABFM(n_usr, n_itm, k).to(device=device)
    # model = FABFM(n_usr, n_itm, k, d, h, gamma, alpha).to(device=device)

    with open("./path_for_metric") as f:
        paths = [row[0] for row in csv.reader(f, delimiter="\n")]
    # paths = paths[35:36]
    paths = paths[41:]

    cnt = 0
    for i, path in enumerate(paths):
        print(f"{path:-^60}")

        # choice= "train"
        choice = "test"
        metric = True
        name = get_name(path)
        model, train, test, l_train, l_test, n_usr, n_itm = load_model(
            path, device, model_name=name, choice=choice)
        if metric:
            from metric import evaluate

            if choice == "train":
                result, r_result, diversity, auc = evaluate(train, l_train, model, n_usr, n_itm, \
                                                        device, C=100, beta=5, n_rank=10, fAUC=True)
                print(f"Data      : {choice}\n" \
                      f"HLU       : {result}\n" \
                      f"R@10      : {r_result}\n" \
                      f"Diversity : {diversity}\n" \
                      f"AUC       : {auc}\n" \
                       "{:-^60}".format(""), flush=True)
            elif choice == "test":
                result, r_result, diversity, auc = evaluate(test, l_test, model, n_usr, n_itm, \
                                                       device, C=100, beta=5, n_rank=10, fAUC=True)
                print(f"Data      : {choice}\n" \
                      f"HLU       : {result}\n" \
                      f"R@10      : {r_result}\n" \
                      f"Diversity : {diversity}\n" \
                      f"AUC       : {auc}\n" \
                       "{:-^60}".format(""), flush=True)
        else:
            criterion = nn.BCEWithLogitsLoss()
            if choice == "train":
                data = train
            elif choice == "test":
                data = test
            cnt = 0
            for i in data:
                if cnt == 50: break
                with torch.no_grad():
                    x, label = i[0].to(device), i[1].to(device)
                    print(f"\nLabel : {label.item()}")
                    # print(f"fm output : {model.fm(x, debug=True).item():>8.5f}")
                    print(f"fm output : {model.fm(x, debug=True).item()}")
                    print(
                        f"sig out   : {torch.sigmoid(model(x, label, pmi=1)).item()}"
                    )
                    if label == -1:
                        # print(f"Loss      : {criterion(torch.sigmoid(model(x, label, pmi=1)), label+1).item():>8.5f}")
                        print(
                            f"Loss      : {criterion(torch.sigmoid(model(x, label, pmi=1)), label+1).item()}"
                        )
                    else:
                        # print(f"Loss      : {criterion(torch.sigmoid(model(x, label, pmi=1)), label).item():>8.5f}")
                        print(
                            f"Loss      : {criterion(torch.sigmoid(model(x, label, pmi=1)), label).item()}"
                        )
                    # target_idx = (x[n:n+m]==1).nonzero()
                    # y = model.rank_list(x).cpu().numpy()
                    # rank = rankdata(-y, method="min")[target_idx]
                    # print("Rank      :", rank)
                cnt += 1
示例#13
0
文件: main.py 项目: yuuuuuki/RecSys
    R_train5 = np.array(dataset5[0])
    R_test5 = np.array(dataset5[1])

    K = 19 # number of latent features

    # initialize P and Q with random values
    P = np.random.rand(R_train1.shape[0], K)
    Q = np.random.rand(R_train1.shape[1], K)

    print '***************************************'
    print 'NMF1 started!!'
    print '***************************************'

    P1, Q1 = nmf1.matrix_factorization(R_train1, P, Q, K)
    R_pred1 = np.dot(P1, Q1.T)
    precision1, recall1, fvalue1, rmse1 = evaluate(R_test1, R_pred1)
    P2, Q2 = nmf1.matrix_factorization(R_train2, P, Q, K)
    R_pred2 = np.dot(P2, Q2.T)
    precision2, recall2, fvalue2, rmse2 = evaluate(R_test2, R_pred2)
    P3, Q3 = nmf1.matrix_factorization(R_train3, P, Q, K)
    R_pred3 = np.dot(P3, Q3.T)
    precision3, recall3, fvalue3, rmse3 = evaluate(R_test3, R_pred3)
    P4, Q4 = nmf1.matrix_factorization(R_train4, P, Q, K)
    R_pred4 = np.dot(P4, Q4.T)
    precision4, recall4, fvalue4, rmse4 = evaluate(R_test4, R_pred4)
    P5, Q5 = nmf1.matrix_factorization(R_train5, P, Q, K)
    R_pred5 = np.dot(P5, Q5.T)
    precision5, recall5, fvalue5, rmse5 = evaluate(R_test5, R_pred5)

    # 5 cross validation
    print '**************precision***************'
示例#14
0
 def validate(self):
     y_, y_pred = self.predict(self.dataloader["test"])
     rocprc, rocauc, best_th, best_f1 = evaluate(y_, y_pred)
     return rocauc, best_th, best_f1
示例#15
0
def combined_result(gold_file,
                    classifier_result1,
                    classifier_result2,
                    method='sum'):
    infile = open(classifier_result1)
    data = infile.readlines()
    label1 = data[0].strip().split()
    result1 = [line.strip().split() for line in data[1:]]
    infile.close()

    infile = open(classifier_result2)
    data = infile.readlines()
    label2 = data[0].strip().split()
    result2 = [line.strip().split() for line in data[1:]]
    infile.close()

    infile = open(gold_file)
    data = infile.readlines()
    gold_data = [int(line.strip().split()[0]) for line in data]
    infile.close()

    if len(result1) != len(result2):
        raise ValueError('two list should be equal!')

    if label1[1] != label2[1]:
        raise ValueError('train data not consistent')

    result = []
    if method == 'max' or method == None:
        for i in range(len(result1)):
            temp = [
                float(result1[i][1]),
                float(result1[i][2]),
                float(result2[i][1]),
                float(result2[i][2])
            ]
            index = temp.index(max(temp))

            if label1[1] == '-1':
                if index == 0 or index == 2:
                    result.append(-1)
                else:
                    result.append(1)
            else:
                if index == 0 or index == 2:
                    result.append(1)
                else:
                    result.append(-1)

    elif method == 'product':
        for i in range(len(result1)):
            if float(result1[i][1]) * float(result2[i][1]) >= float(
                    result1[i][2]) * float(result2[i][2]):
                result.append(1)
            else:
                result.append(-1)

    elif method == 'min':
        for i in range(len(result1)):
            temp = [
                min([float(result1[i][1]),
                     float(result2[i][1])]),
                min([float(result1[i][2]),
                     float(result2[i][2])])
            ]
            index = temp.index(max(temp))
            if index == 0:
                result.append(1)
            else:
                result.append(-1)

    elif method == 'sum':
        for i in range(len(result1)):
            temp = [
                float(result1[i][1]) + float(result2[i][1]),
                float(result1[i][2]) + float(result2[i][2])
            ]
            index = temp.index(max(temp))
            if index == 0:
                result.append(1)
            else:
                result.append(-1)
    p, r, f, auc = evaluate(np.asarray(gold_data), np.asarray(result))

    return (p, r, f, auc)
示例#16
0
def main():

    # tensorboard writer
    writer = SummaryWriter(settings.TENSORBOARD_DIR)
    # makedir snapshot
    makedir(settings.CHECKPOINT_DIR)

    # enable cudnn
    torch.backends.cudnn.enabled = True

    # create segmentor network
    model = Segmentor(pretrained=settings.PRETRAINED,
                      num_classes=settings.NUM_CLASSES,
                      modality=settings.MODALITY)

    model.train()
    model.cuda()

    torch.backends.cudnn.benchmark = True

    # dataset and dataloader
    dataset = TrainDataset()
    dataloader = data.DataLoader(dataset,
                                 batch_size=settings.BATCH_SIZE,
                                 shuffle=True,
                                 num_workers=settings.NUM_WORKERS,
                                 pin_memory=True,
                                 drop_last=True)

    dataloader_iter = enumerate(dataloader)

    # optimizer for generator network (segmentor)
    optim = SGD(model.optim_parameters(settings.LR),
                lr=settings.LR,
                momentum=settings.LR_MOMENTUM,
                weight_decay=settings.WEIGHT_DECAY)

    # losses
    ce_loss = CrossEntropyLoss2d(
        ignore_index=settings.IGNORE_LABEL)  # to use for segmentor

    # upsampling for the network output
    upsample = nn.Upsample(size=(settings.CROP_SIZE, settings.CROP_SIZE),
                           mode='bilinear',
                           align_corners=True)

    # confusion matrix ; to track metrics such as mIoU during training
    conf_mat = np.zeros((settings.NUM_CLASSES, settings.NUM_CLASSES))

    for i_iter in range(settings.MAX_ITER):

        # initialize losses
        loss_G_seg_value = 0

        # clear optim gradients and adjust learning rates
        optim.zero_grad()

        lr_poly_scheduler(optim, settings.LR, settings.LR_DECAY_ITER, i_iter,
                          settings.MAX_ITER, settings.LR_POLY_POWER)

        ####### train generator #######

        # get the batch of data
        try:
            _, batch = next(dataloader_iter)
        except:
            dataloader_iter = enumerate(dataloader)
            _, batch = next(dataloader_iter)

        images, depths, labels = batch
        images = images.cuda()
        depths = depths.cuda()
        labels = labels.cuda()

        # get a mask where an elemnt is True for every pixel with ignore_label value
        ignore_mask = (labels == settings.IGNORE_LABEL)
        target_mask = torch.logical_not(ignore_mask)
        target_mask = target_mask.unsqueeze(dim=1)

        # get the output of generator
        if settings.MODALITY == 'rgb':
            predict = upsample(model(images))
        elif settings.MODALITY == 'middle':
            predict = upsample(model(images, depths))

        # calculate cross-entropy loss
        loss_G_seg = ce_loss(predict, labels)

        # accumulate loss, backward and store value
        loss_G_seg.backward()

        loss_G_seg_value += loss_G_seg.data.cpu().numpy()
        ####### end of train generator #######

        optim.step()

        # get pred and gt to compute confusion matrix
        seg_pred = np.argmax(predict.detach().cpu().numpy(), axis=1)
        seg_gt = labels.cpu().numpy().copy()

        seg_pred = seg_pred[target_mask.squeeze(dim=1).cpu().numpy()]
        seg_gt = seg_gt[target_mask.squeeze(dim=1).cpu().numpy()]

        conf_mat += confusion_matrix(seg_gt,
                                     seg_pred,
                                     labels=np.arange(settings.NUM_CLASSES))

        ####### log ########
        if i_iter % (
            (settings.TRAIN_SIZE // settings.BATCH_SIZE)) == 0 and i_iter != 0:
            metrics = evaluate(conf_mat)
            writer.add_scalar('Pixel Accuracy/Train', metrics['pAcc'], i_iter)
            writer.add_scalar('Mean Accuracy/Train', metrics['mAcc'], i_iter)
            writer.add_scalar('mIoU/Train', metrics['mIoU'], i_iter)
            writer.add_scalar('fwavacc/Train', metrics['fIoU'], i_iter)
            conf_mat = np.zeros_like(conf_mat)

        writer.add_scalar('Loss_G_SEG/Train', loss_G_seg_value, i_iter)
        writer.add_scalar('learning_rate_G/Train', optim.param_groups[0]['lr'],
                          i_iter)

        print("iter = {:6d}/{:6d},\t loss_seg = {:.3f}".format(
            i_iter, settings.MAX_ITER, loss_G_seg_value))

        with open(settings.LOG_FILE, "a") as f:
            output_log = '{:6d},\t {:.8f}\n'.format(i_iter, loss_G_seg_value)
            f.write(output_log)

        # taking snapshot
        if i_iter >= settings.MAX_ITER:
            print('saving the final model ...')
            torch.save(
                model.state_dict(),
                osp.join(settings.CHECKPOINT_DIR,
                         'CHECKPOINT_' + str(settings.MAX_ITER) + '.pt'))
            break

        if i_iter % settings.SAVE_EVERY == 0 and i_iter != 0:
            print('taking snapshot ...')
            torch.save(
                model.state_dict(),
                osp.join(settings.CHECKPOINT_DIR,
                         'CHECKPOINT_' + str(i_iter) + '.pt'))
示例#17
0
def main():
    print('Loading data...')
    train, valid, test = load_data(args.dataset_path, valid_portion=args.valid_portion)
    
    train_data = RecSysDataset(train)
    valid_data = RecSysDataset(valid)
    test_data = RecSysDataset(test)
    train_loader = DataLoader(train_data, batch_size = args.batch_size, shuffle = True, collate_fn = collate_fn)
    valid_loader = DataLoader(valid_data, batch_size = args.batch_size, shuffle = False, collate_fn = collate_fn)
    test_loader = DataLoader(test_data, batch_size = args.batch_size, shuffle = False, collate_fn = collate_fn)

    n_items = 37484

    if args.test:
        for i in range(5):
            results=np.zeros((3,3))
            model = NARM(n_items, args.hidden_size, args.embed_dim, args.batch_size).to(device)
            optimizer = optim.Adam(model.parameters(), args.lr)
            criterion = nn.CrossEntropyLoss()
            scheduler = StepLR(optimizer, step_size=args.lr_dc_step, gamma=args.lr_dc)
            for epoch in tqdm(range(args.epoch)):
                # train for one epoch
                scheduler.step(epoch=epoch)
                trainForEpoch(train_loader, model, optimizer, epoch, args.epoch, criterion, log_aggr=1000)
            model.eval()
            recalls5,recalls10,recalls20 = [],[],[]
            mrrs5,mrrs10,mrrs20 = [],[],[]
            ndcgs5,ndcgs10,ndcgs20 = [],[],[]
            with torch.no_grad():
                for seq, target, lens in tqdm(valid_loader):
                    seq = seq.to(device)
                    target = target.to(device)
                    outputs = model(seq, lens)
                    logits = F.softmax(outputs, dim=1)
                    recall5, mrr5, ndcg5 = metric.evaluate(logits, target, k=5)
                    recall10, mrr10, ndcg10 = metric.evaluate(logits, target, k=10)
                    recall20, mrr20, ndcg20 = metric.evaluate(logits, target, k=args.topk)
                    recalls5.append(recall5)
                    mrrs5.append(mrr5)
                    ndcgs5.append(ndcg5)
                    recalls10.append(recall10)
                    mrrs10.append(mrr10)
                    ndcgs10.append(ndcg10)
                    recalls20.append(recall20)
                    mrrs20.append(mrr20)
                    ndcgs20.append(ndcg20)


            results[0,0]=np.mean(recalls5)
            results[0,1]=np.mean(mrrs5)
            results[0,2]=np.mean(ndcgs5)
            results[1, 0] = np.mean(recalls10)
            results[1, 1] = np.mean(mrrs10)
            results[1, 2] = np.mean(ndcgs10)
            results[2, 0] = np.mean(recalls20)
            results[2, 1] = np.mean(mrrs20)
            results[2, 2] = np.mean(ndcgs20)

            with open('recsys19/test_performances_on.txt', 'a') as f:
                f.write( str(results) + '\n')

    model = NARM(n_items, args.hidden_size, args.embed_dim, args.batch_size).to(device)
    optimizer = optim.Adam(model.parameters(), args.lr)
    criterion = nn.CrossEntropyLoss()
    scheduler = StepLR(optimizer, step_size = args.lr_dc_step, gamma = args.lr_dc)

    for epoch in tqdm(range(args.epoch)):
        # train for one epoch
        scheduler.step(epoch = epoch)
        trainForEpoch(train_loader, model, optimizer, epoch, args.epoch, criterion, log_aggr = 1000)

        recall, mrr ,ndcg= validate(test_loader, model)
        print('Epoch {} validation: Recall@{}: {:.4f}, MRR@{}: {:.4f} \n'.format(epoch, args.topk, recall, args.topk, mrr))

        # store best loss and save a model checkpoint
        ckpt_dict = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        torch.save(ckpt_dict, 'latest_checkpoint.pth.tar')