Пример #1
0
    def on_validation_epoch_end(self, trainer, pl_module):
        
        if trainer.running_sanity_check:
            return
            
        epoch = trainer.current_epoch
        if epoch % self.period == 0:
                
            # encoder = pl_module.get_encoder()
            encoder = pl_module.get_representations

            train_features = []
            train_target = []
            with torch.no_grad():
                for batch, target in trainer.datamodule.train_dataloader(transform=None): #(batch_size=512):
                    input_, seq_len = batch 
                    train_features.append(encoder(input_.to(pl_module.device), seq_len).cpu())
                    train_target.append(torch.Tensor(target))

            train_features = torch.cat(train_features, 0)
            train_target = torch.cat(train_target, 0)

            val_features = []
            val_target = []
            with torch.no_grad():
                for batch, target in trainer.datamodule.val_dataloader(transform=None): #(batch_size=512):
                    input_, seq_len = batch 
                    val_features.append(encoder(input_.to(pl_module.device), seq_len).cpu())
                    val_target.append(torch.Tensor(target))
            
            val_features = torch.cat(val_features, 0)
            val_target = torch.cat(val_target, 0)
            
            knn_acc = modules.knn(train_features.numpy(), val_features.numpy(), train_target.numpy(), val_target.numpy(), nn=1)
            
            # knn_acc = trainer.current_epoch
            # knn_acc = torch.Tensor([trainer.current_epoch])[0]

            # pl_module.log('knn_acc', knn_acc)
            trainer.logger.log_metrics({'knn_acc': knn_acc}, step=trainer.current_epoch) #
            trainer.logger_connector.callback_metrics.update({'knn_acc': knn_acc})
Пример #2
0
def lincls(args, model):

    # extract dataset features with the model
    dataset_type = vars(datasets)[args.dataset]
    data_args = args.__dict__
    data_args.update({'train_transforms': None, 'val_transforms': None})
    datamodule = dataset_type(**data_args)

    train_loader = datamodule.train_dataloader(shuffle=False, drop_last=False)
    val_loader = datamodule.val_dataloader()

    encoder = model.get_encoder().cuda()
    encoder.eval()

    train_features = []
    train_target = []
    with torch.no_grad():
        for batch, target in train_loader:
            input_, seq_len = batch
            train_features.append(encoder(input_.cuda(), seq_len).cpu())
            train_target.append(torch.Tensor(target))

    train_features = torch.cat(train_features, 0)
    train_target = torch.cat(train_target, 0)

    # train_features = torch.randn([20000, args.hidden_dim])
    # train_target = torch.randint(10, [20000])

    val_features = []
    val_target = []
    with torch.no_grad():
        for batch, target in val_loader:
            input_, seq_len = batch
            val_features.append(encoder(input_.cuda(), seq_len).cpu())
            val_target.append(torch.Tensor(target))

    val_features = torch.cat(val_features, 0)
    val_target = torch.cat(val_target, 0)

    # val_features = torch.randn([1000, args.hidden_dim])
    # val_target = torch.randint(10, [1000])

    batch_size = 512 if 'NTU' in args.dataset else 128
    # batch_size = 128
    print(datamodule.num_classes)
    datamodule = datasets.FeatureDataModule(train_features,
                                            train_target,
                                            val_features,
                                            val_target,
                                            num_workers=4,
                                            batch_size=batch_size)

    model_checkpoint = pl.callbacks.ModelCheckpoint(
        filepath=os.path.join(args.exp_dir, 'lincls'),
        save_top_k=1,
        mode='max',
        monitor='val_acc1_agg',
        period=1)  # , filename='{epoch}-{knn_acc}'

    # trainer = pl.Trainer(max_epochs = 100, progress_bar_refresh_rate=0, weights_summary=None, gpus=1) #, logger=logger, checkpoint_callback=model_checkpoint
    trainer = pl.Trainer(max_epochs=70,
                         weights_summary=None,
                         gpus=1,
                         checkpoint_callback=model_checkpoint,
                         progress_bar_refresh_rate=0)

    model = modules.LinearClassifierMod(input_dim=args.hidden_dim,
                                        n_label=datamodule.num_classes,
                                        learning_rate=2,
                                        momentum=0.9,
                                        weight_decay=0,
                                        epochs=70,
                                        lr_decay_rate=0.01)

    trainer.fit(model, datamodule)

    # best_ckpt = trainer.checkpoint_callback.best_model_path

    ckpts = list(
        filter(lambda x: 'lincls' in x and '.ckpt' in x,
               os.listdir(args.exp_dir)))
    best_ckpt = ckpts[-1] if len(ckpts) == 1 else ckpts[-2]
    best_ckpt = os.path.join(args.exp_dir, best_ckpt)
    # classifier = modules.LinearClassifierMod.load_from_checkpoint(checkpoint_path=best_ckpt)
    print(best_ckpt)
    best_model = modules.LinearClassifierMod.load_from_checkpoint(
        checkpoint_path=best_ckpt)

    result_dict = {}

    # lincls_result = trainer.test(model=best_model, verbose=True)[0]
    lincls_result = trainer.test(model=best_model, datamodule=datamodule)[0]

    ##########################################################################################
    # # baseline
    # classifier = best_model.classifier
    # # val_loader = datamodule.val_dataloader()
    # val_logits_trials = []

    # val_logits = []
    # val_targets = []
    # with torch.no_grad():
    #     for batch, target in val_loader:
    #         input_, seq_len = batch
    #         val_logits.append(classifier(encoder(input_.cuda(), seq_len)).cpu())
    #         val_targets.append(torch.Tensor(target))
    #         print(target[-1])

    # base_val_logits = torch.cat(val_logits, 0)
    # val_targets = torch.cat(val_targets, 0)

    # print('logit', base_val_logits[0])

    # acc1, acc5 = precision_at_k(base_val_logits, val_targets, top_k=(1, 5))

    # print('acc', acc1, acc5)
    # # results['baseline'] = np.array([acc1, acc5])[None,:]
    # # base_val_logits = base_val_logits.numpy()
    ##########################################################################################

    # print('best_ckpt', best_ckpt)
    # print('encoder weight')
    # print(encoder.encoder.rnn.bias_hh_l0[:10])
    # print('classifier weight')
    # print(best_model.classifier.weight[:5])

    result_dict.update(lincls_result)

    knn_acc = modules.knn(train_features.numpy(),
                          val_features.numpy(),
                          train_target.numpy(),
                          val_target.numpy(),
                          nn=1)
    result_dict['knn_acc'] = knn_acc

    return result_dict
result = []
for t in range(T):
    for tr_index, v_index in skf.split(X, y):
        #training data
        X_tr = X[tr_index, :]
        y_tr = y[tr_index]
        #validation data
        X_val = X[v_index, :]
        y_val = y[v_index]
        #standardize
        (X_tr_norm, X_val_norm) = utils.normalizing(X_tr, X_val)
        #Ridge regression on all features:
        result_L = []
        for w in weights:
            for n in N_K:
                result_l = modules.knn(n, w, X_tr_norm, y_tr, X_val_norm,
                                       y_val)
                result_L.append(result_l)
        result.append(result_L)

mean_Etrain = np.mean(np.array(result)[:, :, 2], axis=0)
var_Etrain = np.var(np.array(result)[:, :, 2], axis=0)
mean_Eval = np.mean(np.array(result)[:, :, 3], axis=0)
var_Eval = np.var(np.array(result)[:, :, 3], axis=0)
best_n_uniform = N_K[np.argmin(mean_Eval[0:9])]
best_n_distance = N_K[np.argmin(mean_Eval[9:18])]
print("KNN_uniform best k : " + str(best_n_uniform) + " " +
      str(mean_Etrain[np.argmin(mean_Eval[0:9])]) + " " +
      str(mean_Eval[np.argmin(mean_Eval[0:9])]))
print("KNN_distance best k : " + str(best_n_distance) + " " +
      str(mean_Etrain[9 + np.argmin(mean_Eval[9:18])]) + " " +
      str(mean_Eval[9 + np.argmin(mean_Eval[9:18])]))
Пример #4
0
def lincls(args, model):

    # extract dataset features with the model
    dataset_type = vars(datasets)[args.dataset]
    data_args = args.__dict__
    data_args.update({'train_transforms': None, 'val_transforms': None})
    datamodule = dataset_type(**data_args)
    
    train_loader = datamodule.train_dataloader(shuffle=False, drop_last=False)
    val_loader = datamodule.val_dataloader()

    encoder = model.get_encoder().cuda()
    encoder.eval()

    train_features = []
    train_target = []
    with torch.no_grad():
        for batch, target in train_loader:
            input_, seq_len = batch 
            train_features.append(encoder(input_.cuda(), seq_len).cpu())
            train_target.append(torch.Tensor(target))

    train_features = torch.cat(train_features, 0)
    train_target = torch.cat(train_target, 0)

    val_features = []
    val_target = []
    with torch.no_grad():
        for batch, target in val_loader:
            input_, seq_len = batch 
            val_features.append(encoder(input_.cuda(), seq_len).cpu())
            val_target.append(torch.Tensor(target))
    
    val_features = torch.cat(val_features, 0)
    val_target = torch.cat(val_target, 0)
    
    batch_size = 512 if 'NTU' in args.dataset else 128

    datamodule = datasets.FeatureDataModule(train_features, train_target, val_features, val_target, num_workers=4, batch_size=batch_size)
    
    #model_checkpoint = pl.callbacks.ModelCheckpoint(filepath=os.path.join(args.exp_dir,'lincls'), save_top_k=1, mode='max', monitor='val_acc1_agg', period=1) # , filename='{epoch}-{knn_acc}'
    model_checkpoint = pl.callbacks.ModelCheckpoint(save_top_k=1,
                                                    mode='max', monitor='val_acc1_agg',
                                                    period=1)  # , filename='{epoch}-{knn_acc}'

    trainer = pl.Trainer(max_epochs = 70, weights_summary=None, gpus=1, checkpoint_callback=model_checkpoint, progress_bar_refresh_rate=0)
    
    model = modules.LinearClassifierMod(input_dim = args.hidden_dim,
                                        n_label = datamodule.num_classes,
                                        learning_rate = 2,
                                        momentum = 0.9,
                                        weight_decay = 0,
                                        epochs = 70,
                                        lr_decay_rate = 0.01
                                        )


    trainer.fit(model, datamodule)
    
    best_ckpt = trainer.checkpoint_callback.best_model_path
    best_model = modules.LinearClassifierMod.load_from_checkpoint(checkpoint_path=best_ckpt)
        
    result_dict = {}

    # lincls_result = trainer.test(model=best_model, verbose=True)[0]
    lincls_result = trainer.test(model=best_model, datamodule=datamodule)[0]

    result_dict.update(lincls_result)
    
    nn_ = 9 if 'NTU' in args.dataset else 1
    knn_acc = modules.knn(train_features.numpy(), val_features.numpy(), train_target.numpy(), val_target.numpy(), nn=nn_)
    result_dict['knn_acc'] = knn_acc

    return result_dict
Пример #5
0
plt.plot(np.log10(L), etrain, 'g', label="train")
plt.plot(np.log10(L), etest, 'b', label="test")
plt.legend()
plt.show()
'''
------------------------------------------------
nonlinear regression
'''
#normalize pretraining data for non linear model
(Dpt_tr_norm, Dpt_test_norm) = utils.normalizing(Dpt_tr, Dpt_test)

#Baseline: KNN: weights = ['uniform', 'distance'], k = 1 to 10
for k in range(1, 10):
    (y_train_pred, y_test_pred, error_train,
     error_test) = modules.knn(k, 'distance', Dpt_tr_norm, ypt_tr,
                               Dpt_test_norm, ypt_test)
    print("----")
    print(k)
    print(error_train, error_test)
'''
using ABM model (tree based)
'''
#CART: min_impurity_decrease = [0.001, 0.1, 1, 10, 100, 1000] and plot --- find not overfitting region
#       then try similar leafs around and plot, choose simplist model around +- 1 std Etest
min_impurity_decrease = 10
max_leaf_nodes = 90
result = modules.cart(max_leaf_nodes, min_impurity_decrease, Dpt_tr_norm,
                      ypt_tr, Dpt_test_norm, ypt_test)
print("-----")
print(max_leaf_nodes)
print(result[0])