示例#1
0
    def __init__(
        self,
        log_interval=10,
        lr=1e-5,
        use_cuda=False,
        verbose=0,
        log_tensorboard=False,
        path="rnd_model/",
    ):
        self.predictor = predictor_generator()
        self.target = target_generator()
        for param in self.target.parameters():
            param.requires_grad = False
        self.target.eval()

        self.log_interval = log_interval
        self.optimizer = torch.optim.Adam(self.predictor.parameters(), lr=lr)
        self.loss_function = torch.nn.MSELoss(reduction='mean')

        self.device = torch.device('cuda' if use_cuda else 'cpu')
        self.target.to(self.device)
        self.predictor.to(self.device)

        self.running_stats = RunningMeanStd()

        self.verbose = verbose
        self.writer = SummaryWriter() if log_tensorboard else None
        self.n_iter = 0

        self.save_path = path
        Path(path).mkdir(parents=True, exist_ok=True)

        self.early_stopping = EarlyStopping(save_dir=self.save_path)
示例#2
0
def train(xtrain, ytrain, xvalid, yvalid, hidden_d, layers, dropout,
          learning_rate, n_epoch, pic_name, batch_size, device):
    def setup_seed(seed):
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        np.random.seed(seed)
        random.seed(seed)
        torch.backends.cudnn.deterministic = True

    def plot_loss(train_loss, valid_loss):
        plt.figure(figsize=(20, 10))
        plt.plot(train_loss, 'b', label='train_loss')
        plt.plot(valid_loss, 'r', label='valid_loss')
        plt.legend()
        # plt.show()
        plt.savefig(RESULT_SAVE_PATH + pic_name + '.jpg')

    train_dataset = TensorDataset(xtrain, ytrain)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True)

    setup_seed(0)
    model = Model(input_dim=xtrain.shape[-1],
                  hidden_dim=hidden_d,
                  n_layer=layers,
                  drop_out=dropout).to(device)
    criterion = torch.nn.L1Loss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, eps=1e-4)
    early_stopping = EarlyStopping(patience=50, verbose=True)

    train_loss = []
    valid_loss = []
    for epoch in range(n_epoch):
        train_loss_tmp = 0
        for step, (batch_x, batch_y) in enumerate(train_loader):
            prediction = model(batch_x)
            loss = criterion(prediction, batch_y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss_tmp += loss.data
        train_loss.append(train_loss_tmp / (step + 1))

        model.eval()
        valid_output = model(xvalid)
        valid_loss_data = criterion(valid_output, yvalid)
        scheduler.step(valid_loss_data)
        valid_loss.append(valid_loss_data.data)
        print('EPOCH: %d, TRAINING LOSS: %f, VALIDATION LOSS: %f' %
              (epoch, train_loss_tmp / (step + 1), valid_loss_data))
        early_stopping(valid_loss_data, model)
        if early_stopping.early_stop:
            print('Early stopped.')
            break
        model.train()
    plot_loss(train_loss, valid_loss)
    model.load_state_dict(torch.load('checkpoint.pt'))
    return model
示例#3
0
def model_train(model, data, optimizer, criterion, epochs=NUM_EPOCHS, patience=20):
    model.train()
    model_type = 'pointwise'
    scores = []
    train_losses = []
    valid_losses = []
    valid_indexes = []
    ndcg_list = []

    X = torch.tensor(data.train.feature_matrix, dtype=torch.float, requires_grad=False)  # gets input
    Y = torch.tensor(data.train.label_vector, requires_grad=False)  # gets correct output
    validation_data = torch.tensor(data.validation.feature_matrix, dtype=torch.float,
                                   requires_grad=False)  # validation input
    validation_target = torch.tensor(data.validation.label_vector, requires_grad=False)  # validation correct output

    # initialize the early_stopping object
    early_stopping = EarlyStopping(model_type, patience=patience, verbose=True, delta=0.0001)

    for epoch in tqdm(range(epochs), position=0, leave=True):
        optimizer.zero_grad()  # set gradients to zero
        y_pred = model(X)  # predict labels
        loss = criterion(y_pred, Y)  # calculate loss
        loss.backward()  # backpropagate loss
        optimizer.step()  # update weights
        train_losses.append(loss.item())  # append loss to list to plot

        print("validation ndcg at epoch " + str(epoch))
        model.eval()
        validation_y_pred = model(validation_data)
        validation_scores, validation_indexes = softmax_highest_score(validation_y_pred)
        scores.append(validation_scores)
        # calculate the loss
        valid_loss = criterion(validation_y_pred, validation_target)
        # record validation loss
        valid_losses.append(valid_loss.item())
        valid_indexes.append(validation_indexes)
        results = eval.evaluate(data.validation, validation_scores, print_results=False)
        ndcg_list.append(results['ndcg']['mean'])
        # print('ndcg: ', results["ndcg"])
        print("Epoch {} - train loss: {} - validation loss: {}".format(epoch, loss.item(),
                                                                       valid_loss.item()))  # print loss

        if epoch % 5 == 0:  # print performance of model on validation data
            epoch_len = len(str(epochs))
            print_msg = (f'[{epoch:>{epoch_len}}/{epochs:>{epoch_len}}] ' +
                         f'train_loss: {loss.item():.5f} ' +
                         f'valid_loss: {valid_loss.item():.5f}')
            print(print_msg)

        # early_stopping checks if validation loss has decresed
        early_stopping(valid_loss.item(), model)

        if early_stopping.early_stop:
            print("Early stopping")
            break

    # load the last checkpoint with the best model
    model.load_state_dict(torch.load('models/{}_checkpoint.pt'.format(model_type)))

    return model, optimizer, scores, train_losses, valid_losses, ndcg_list, validation_indexes
def main():
    #训练test_train-----------
    # print('[INFO] start training ')
    # train_losses, eval_losses, eval_r2s=[], [], []
    train_losses, eval_losses = [], []
    early_stopping = EarlyStopping(patience=PATIENCE, verbose=True)
    for epoch in range(NUM_EPOCH):
        print('[INFO] start training ')
        model.train()  #启用batchnormalization和dropout
        train_loss = 0.0
        #step_loss=0.0
        for step, (_, train_tongue, train_label) in enumerate(train_loader):
            train_tongue, train_label = Variable(
                train_tongue).cuda(), Variable(train_label).cuda()
            optimizer.zero_grad()  #梯度值初始化为0
            output = model(train_tongue)
            loss = loss_func(output, train_label)
            loss.backward()  #反向传播
            optimizer.step()  #更新参数
            train_loss += float(loss.item() * train_tongue.size(0))
            # print('Epoch:[%d/%d], Step:[%d/%d], Step loss: %.4f' % (epoch + 1, NUM_EPOCH, step + 1, len(train_datasets) // BATCH_SIZE, loss.item()))
            if step % 100 == 99:
                print('Epoch:[%d/%d], Step:[%d/%d], Step loss: %.4f' %
                      (epoch + 1, NUM_EPOCH, step + 1,
                       len(train_datasets) // BATCH_SIZE, loss.item()))
                #print('Epoch:[%d/%d], Step:[%d/%d], Average step loss:%.4f' % (epoch + 1, NUM_EPOCH, step + 1, len(train_datasets) // BATCH_SIZE, step_loss/50))
        train_losses.append(train_loss / len(train_datasets))
        print(
            '=====> Epoch:', epoch + 1, ' | Average epoch train loss: %.4f' %
            (train_loss / len(train_datasets)))

        adjust_lr(optimizer, epoch)

        #eval-----------
        print('[INFO] start evaluation')
        model.eval()  #不启用batchnormalization和dropout
        with torch.no_grad():
            # eval_loss,eval_r2 = 0.0, 0.0
            eval_loss = 0.0
            for step, (_, test_tongue, test_label) in enumerate(eval_loader):
                test_tongue, test_label = Variable(
                    test_tongue).cuda(), Variable(test_label).cuda()
                output = model(test_tongue)
                loss = loss_func(output, test_label)
                eval_loss += float(loss.item() * test_tongue.size(0))
            eval_losses.append(eval_loss / len(eval_datasets))
            print(
                '=====> Epoch:', epoch + 1,
                ' | Average epoch eval loss: %.4f ' %
                (eval_loss / len(eval_datasets)))
            #print('=====> Epoch:',epoch+1, ' | Average epoch test loss:%.4f ' % (eval_loss/len(test_datasets)), '| average r2 :%.4f ' % (eval_r2/len(test_datasets)))
            print('[INFO] evaluation complete')

        # early_stopping(train_loss/len(train_datasets),model)
        early_stopping(eval_loss / len(test_datasets), model)
        if early_stopping.early_stop:
            print('[INFO] early stop')
            break

    return train_losses, eval_losses
示例#5
0
def fit_siamese(train_loader,
                val_loader,
                model,
                loss_fn,
                optimizer,
                scheduler,
                patience,
                n_epochs,
                cuda,
                log_interval,
                metrics=[],
                start_epoch=0):
    """
    Loaders, model, loss function and metrics should work together for a given task,
    i.e. The model should be able to process data output of loaders,
    loss function should process target output of loaders and outputs from the model

    Examples: Classification: batch loader, classification model, NLL loss, accuracy metric
    Siamese network: Siamese loader, siamese model, contrastive loss
    Online triplet learning: batch loader, embedding model, online triplet loss
    """
    for epoch in range(0, start_epoch):
        scheduler.step()
    early_stopping = EarlyStopping(patience=patience, verbose=True)
    for epoch in range(start_epoch, n_epochs):
        scheduler.step()

        # Train stage
        train_loss, metrics = train_siamese(train_loader, model, loss_fn,
                                            optimizer, cuda, log_interval,
                                            metrics)
        Parameters.epoch += 1

        message = 'Epoch: {}/{}. Train set: Average loss: {:.4f}'.format(
            epoch + 1, n_epochs, train_loss)
        for metric in metrics:
            message += '\t{}: {}'.format(metric.name(), metric.value())

        val_loss, metrics = test_siamese(val_loader, model, loss_fn, cuda,
                                         metrics)
        val_loss /= len(val_loader)

        early_stopping(val_loss, model)

        for param in model.parameters():
            print(param.data)

        if early_stopping.early_stop:
            print("Early stopping")
            break

        message += '\nEpoch: {}/{}. Validation set: Average loss: {:.4f}'.format(
            epoch + 1, n_epochs, val_loss)
        for metric in metrics:
            message += '\t{}: {}'.format(metric.name(), metric.value())

        print(message)
示例#6
0
def train(data,
          mine_net,
          mine_net_optim,
          resp=0,
          cond=1,
          batch_size=100,
          iter_num=int(1e+4),
          log_freq=int(1e+3),
          avg_freq=int(1e+2),
          verbose=True,
          patience=20):
    # data is x or y
    result = list()
    ma_et = 1.

    #Early Stopping
    train_losses = []
    valid_losses = []
    avg_train_losses = []
    avg_valid_losses = []

    earlyStop = EarlyStopping(patience=patience, verbose=True)
    trainData, validData = create_dataset(data, batch_size)
    for i in range(iter_num):
        #get train data
        batchTrain = sample_batch(trainData, resp, cond, batch_size=batch_size)
        mi_lb, ma_et = learn_mine(batchTrain, mine_net, mine_net_optim, ma_et)
        result.append(mi_lb.detach().cpu().numpy())
        train_losses.append(result[-1].item())
        if verbose and (i + 1) % (log_freq) == 0:
            print(result[-1])

        batchValid = sample_batch(validData, resp, cond, batch_size=batch_size)
        mi_lb_valid = valid_mine(batchValid, mine_net)
        valid_losses.append(mi_lb_valid.item())

        if (i + 1) % (avg_freq) == 0:
            train_loss = np.average(train_losses)
            valid_loss = np.average(valid_losses)
            avg_train_losses.append(train_loss)
            avg_valid_losses.append(valid_loss)

            print_msg = "[{0}/{1}] train_loss: {2} valid_loss: {3}".format(
                i, iter_num, train_loss, valid_loss)
            print(print_msg)

            train_losses = []
            valid_losses = []

            earlyStop(valid_loss, mine_net)
            if (earlyStop.early_stop):
                print("Early stopping")
                break

    mine_net.load_state_dict(torch.load('checkpoint.pt'))
    return mine_net, avg_train_losses, avg_valid_losses
示例#7
0
def train(model, stock_ids):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               mode='min',
                                               factor=0.5,
                                               patience=patience,
                                               verbose=verbose,
                                               cooldown=1,
                                               min_lr=min_lr,
                                               eps=min_lr)

    earlyStop = EarlyStopping(model_name, models_folder, patience=10)
    X_train, X_test, y_train, y_test = load_train_data(stock_ids)
    pbar = tqdm(range(0, max_epoch))
    clean_models(model_name, models_folder)

    for epoch in pbar:
        optimizer.zero_grad()
        model.train()
        # forward + backward + optimize
        steps = y_train.shape[1] // len(predict_columns)
        dataset = BasicDataset(X_train, y_train)
        dataloader = DataLoader(dataset,
                                batch_size=10240,
                                shuffle=True,
                                num_workers=0)

        total_train_loss = []
        for _, items in enumerate(dataloader):
            train_outputs = model(items[0], steps)

            train_loss = criterion(train_outputs, items[1])
            train_loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5, norm_type=2)
            optimizer.step()
            total_train_loss.append(train_loss)

        train_loss = torch.mean(torch.stack(total_train_loss))

        with torch.no_grad():
            model.eval()
            outputs = model(X_test, steps)
            validate_loss = criterion(outputs, y_test)

        if epoch % 100 == 99:
            earlyStop(validate_loss, model)
            if earlyStop.early_stop:
                break

        scheduler.step(train_loss)
        pbar.set_description("{0:.6f}, {1:.6f}".format(train_loss,
                                                       validate_loss))

    return model
示例#8
0
    def update_model(self, HL_replay_buffer, logger):
        early_stopper = EarlyStopping(patience=7)
        split = 10.0
        state_norm = utils.normalization(HL_replay_buffer.obses,
                                         self.all_mean_var[0],
                                         self.all_mean_var[1])
        action_norm = utils.normalization(HL_replay_buffer.actions,
                                          self.all_mean_var[2],
                                          self.all_mean_var[3])
        delta_state_norm = utils.normalization(HL_replay_buffer.next_obses,
                                               self.all_mean_var[4],
                                               self.all_mean_var[5])
        train_capacity = int(HL_replay_buffer.capacity * (split - 1) / split)
        test_idxs = np.arange(-int(HL_replay_buffer.capacity / split), 0)

        state_test = torch.as_tensor(state_norm[test_idxs],
                                     device=self.device).float()
        action_test = torch.as_tensor(action_norm[test_idxs],
                                      device=self.device).float()
        delta_state_test = torch.as_tensor(delta_state_norm[test_idxs],
                                           device=self.device).float()

        for i in range(self.model_update_steps):
            self.update_step += 1
            idxs = np.random.randint(0, train_capacity, size=self.batch_size)
            # idxs = np.random.randint(0, 1100, size=self.batch_size)

            state = torch.as_tensor(state_norm[idxs],
                                    device=self.device).float()
            action = torch.as_tensor(action_norm[idxs],
                                     device=self.device).float()
            delta_state = torch.as_tensor(delta_state_norm[idxs],
                                          device=self.device).float()

            pred_delta_state = self.forward_model(state, action)
            model_loss = F.mse_loss(pred_delta_state, delta_state)
            self.model_optimizer.zero_grad()
            model_loss.backward()
            self.model_optimizer.step()

            logger.log('train/model_loss', model_loss)
            logger.dump(self.update_step)

            if (i + 1) % 100 == 0:
                pred_delta_state = self.forward_model(state_test, action_test)
                model_loss = F.mse_loss(pred_delta_state, delta_state_test)
                logger.log('train/val_loss', model_loss)
                logger.dump(self.update_step)
                early_stopper(model_loss)

            if early_stopper.early_stop:
                break

        self.save_data('.')
示例#9
0
 def train_model(self, last_hidden, last_rnn, train_loader, valid_loader):
     if last_rnn is not None:
         rnn_local = last_rnn
     else:
         rnn_local = GRUNet(len(forecast_strategy.factors_list)).to(self.device)
     optimizer_local = torch.optim.Adam(rnn_local.parameters(), lr=self.LR)  # optimize all cnn parameters
     loss = nn.MSELoss().to(self.device)
     early_stopping = EarlyStopping(patience=20, verbose=True, trace_func=logging.info)
     rnn_local, h_state = self.train_rnn(early_stopping, last_hidden, loss, optimizer_local, rnn_local, train_loader,
                                         valid_loader)
     return early_stopping, h_state, loss, rnn_local
示例#10
0
def main():
    train_losses, eval_losses=[], []
    train_lips_losses,train_tongue_losses,train_lipstongue_losses=[], [], []
    early_stopping=EarlyStopping(patience=PATIENCE,verbose=True)
    for epoch in range(NUM_EPOCH):
        print('[INFO] start training ')
        autoencoder.train()
        train_loss, train_lips_loss, train_tongue_loss, train_lipstongue_loss=0.0, 0.0, 0.0, 0.0
        for step, (train_lips, train_tongue, train_label) in enumerate(train_loader):
            train_lips, train_tongue, train_label = Variable(train_lips).cuda(), Variable(train_tongue).cuda(), Variable(train_label).cuda()
            optimizer.zero_grad()
            output, output_lips, output_tongue = autoencoder(train_lips,train_tongue)
            loss_lips=loss_func1(output_lips,train_lips)
            loss_tongue=loss_func2(output_tongue,train_tongue)
            loss_lipstongue=loss_func3(output,train_label)
            loss=loss_lipstongue+loss_lips+loss_tongue
            loss.backward()        
            optimizer.step()
            train_loss += float(loss.item()*train_lips.size(0))
            train_lips_loss += float(loss_lips.item()*train_lips.size(0))
            train_tongue_loss += float(loss_tongue.item()*train_lips.size(0))
            train_lipstongue_loss += float(loss_lipstongue.item()*train_lips.size(0))
            if step%100==99:
                print('Epoch:[%d/%d], Step:[%d/%d], Step loss: %.4f' % (epoch + 1, NUM_EPOCH, step + 1, len(train_datasets) // BATCH_SIZE, loss.item()))
        train_losses.append(train_loss/len(train_datasets))
        train_lips_losses.append(train_lips_loss/len(train_datasets))
        train_tongue_losses.append(train_tongue_loss/len(train_datasets))
        train_lipstongue_losses.append(train_lipstongue_loss/len(train_datasets))
        print('=====> Epoch:',epoch+1, ' | Average epoch train loss total: %.4f' % (train_loss/len(train_datasets)))
        
        print('[INFO] start evaluation')
        autoencoder.eval()
        with torch.no_grad():
            eval_loss=0.0
            for step,(test_lips, test_tongue, test_label) in enumerate(eval_loader):
                test_lips, test_tongue, test_label = Variable(test_lips).cuda(), Variable(test_tongue).cuda(), Variable(test_label).cuda()
                output, output_lips, output_tongue = autoencoder(test_lips,test_tongue)
                loss_lips=loss_func1(output_lips,test_lips)
                loss_tongue=loss_func2(output_tongue,test_tongue)
                loss_lipstongue=loss_func3(output,test_label)
                loss=loss_lipstongue+loss_lips+loss_tongue
                eval_loss += float(loss.item()*test_lips.size(0))
            eval_losses.append(eval_loss/len(eval_datasets))
            print('=====> Epoch:',epoch+1, ' | Average epoch eval loss: %.4f ' % (eval_loss/len(eval_datasets)))
            print('[INFO] evaluation complete')

        # early_stopping(train_loss/len(train_datasets),autoencoder)
        early_stopping(eval_loss/len(test_datasets),autoencoder)
        if early_stopping.early_stop:
            print('[INFO] early stop')
            break

        # torch.save(encoder.state_dict(),'./autoencoder.pth')
    return train_losses, eval_losses, train_lips_losses, train_tongue_losses, train_lipstongue_losses
示例#11
0
def train(model, optimizer, criterion, lr_scheduler, train_loader,
          valid_loader, test_loader, config):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    early_stopping = EarlyStopping(patience=5, verbose=True)
    valid_losses = []

    print(
        f"Number of mini-batches: {len(train_loader)} for batch_size {BATCH_SIZE}"
    )
    for epoch in range(20):
        running_loss = 0.0
        model.train()
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data[0].to(device), data[1].to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 200 == 0:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 200))
                running_loss = 0.0
                test(model, test_loader)

        if config["use_lr_decay"]:
            print(
                f"Decreasing learning rate to {lr_scheduler.get_lr()}, i.e. {config['lr_decay_rate']**(epoch+1)*100}%"
            )
            lr_scheduler.step()
        torch.save(model.state_dict(), f"model_epoch{epoch}.h5")
        wandb.save(f"model_epoch{epoch}.h5")

        model.eval()
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            output = model(inputs)
            loss = criterion(output, labels)
            valid_losses.append(loss.item())

        valid_loss = np.average(valid_losses)
        early_stopping(valid_loss, model)

        if early_stopping.early_stop:
            print("Early stopping")
            break

    print('Finished training.')
    test(model, test_loader, compute_confusion_matrix=True)
    def train_model(self):
        dur = []
        valid_losses = []
        avg_train_losses = []
        avg_valid_losses = []
        epoch_train_loss = []
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-2)
        early_stopping = EarlyStopping(patience=self.params["patience"],
                                       verbose=True)
        for epoch in range(self.params["n_epochs"]):
            if epoch >= 3:
                t0 = time.time()

            self.train()
            logits = self()
            logp = F.log_softmax(logits, 1)
            loss = F.nll_loss(logp[self.train_mask],
                              self.labels[self.train_mask])
            epoch_train_loss.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if epoch >= 3:
                dur.append(time.time() - t0)

            acc = calculate_accuracy(*self.evaluate())
            epoch_train_loss_mean = np.mean(epoch_train_loss)
            print(
                "Epoch {:05d} | loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}"
                .format(epoch, loss.item(), acc, np.mean(dur)))

            with torch.no_grad():
                logits = self()
                logp = F.log_softmax(logits, 1)
                loss_val = F.nll_loss(logp[self.test_mask],
                                      self.labels[self.test_mask])
                valid_losses.append(loss_val.item())

            valid_loss = np.average(valid_losses)
            epoch_len = len(str(self.params["n_epochs"]))

            train_losses = []
            valid_losses = []

            early_stopping(valid_loss, self)

            if early_stopping.early_stop:
                print("Early stopping")
                break

        return self, avg_train_losses, avg_valid_losses
示例#13
0
    def __init__(self,
                 model: nn.Module,
                 train_dataloader: DataLoader,
                 valid_dataloader,
                 optimizer: Optimizer,
                 loss: AbstractLoss,
                 early_stopping_patience=7,
                 model_backup_destination="./",
                 resume=False,
                 gradient_clipping_value=None):
        self.model: nn.Module = model
        self.train_dataloader: DataLoader = train_dataloader
        self.valid_dataloader: DataLoader = valid_dataloader
        self.optimizer: Optimizer = optimizer
        # Loss used for benchmarking agaisnt other runs only in case the loss function from which backprop is computed changes
        self.benchmark_MSE_loss: AbstractLoss = BatchSegmentMSELoss()
        # Custom loss is used for backpropagating
        self.custom_loss: AbstractLoss = loss
        self.gradient_clipping_value = gradient_clipping_value
        self.model_backup_destination = self._get_backup_destination(
            model_backup_destination, model, train_dataloader, optimizer, loss)
        self.early_stopper = EarlyStopping(
            patience=early_stopping_patience,
            verbose=True,
            destination_path=self.model_backup_destination)

        if resume:
            CometLogger.print("Resuming the training of {}".format(
                self.model_backup_destination))
            CometLogger.print(
                "Overriding the Model and Optimizer's state dictionaries with the checkpoint's dicts"
            )
            self.model.load_state_dict(
                self.early_stopper.load_model_checkpoint())
            self.optimizer.load_state_dict(
                self.early_stopper.load_optimizer_checkpoint())
示例#14
0
def train(model, stock_id):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               mode='min',
                                               factor=0.5,
                                               patience=patience,
                                               verbose=verbose,
                                               cooldown=1,
                                               min_lr=min_lr,
                                               eps=1e-05)

    X_train, X_test, y_train, y_test = load_train_data(stock_id)
    pbar = tqdm(range(0, max_epoch))
    earlyStop = EarlyStopping(get_model_name(stock_id),
                              models_folder,
                              patience=4,
                              delta=0.00001)
    clean_models(get_model_name(stock_id), models_folder)

    for epoch in pbar:
        optimizer.zero_grad()

        # forward + backward + optimize
        train_outputs = model(X_train)
        train_loss = criterion(train_outputs, y_train)
        train_loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 10, norm_type=2)
        optimizer.step()

        with torch.no_grad():
            outputs = model(X_test)
            validate_loss = criterion(outputs, y_test)

        if epoch % 100 == 99:
            earlyStop(validate_loss, model)
            if earlyStop.early_stop:
                break

        pbar.set_description("{0}:{1:.6f}, {2:.6f}".format(
            stock_id, train_loss, validate_loss))
        scheduler.step(validate_loss)

    return model
示例#15
0
def train_model(args, model, training_data, validation_data):
    logger.info("training model")
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    loss_history = []
    loss_val_history = []
    f1_history = []
    f1_val_history = []
    # initialize the early_stopping object

    early_stopping = EarlyStopping(verbose=False,
                                   patience=args.patience,
                                   epsilon=args.epsilon)

    for i in range(1, args.num_epochs + 1):
        loss, loss_val, f1, f1_val = train_epoch(i, model, training_data,
                                                 optimizer, args,
                                                 validation_data)
        loss_history.append(loss)
        loss_val_history.append(loss_val)
        f1_history.append(f1)
        f1_val_history.append(f1_val)
        print("Epoch # %d" % i)
        print("Loss : Training- %.5f" % loss, ", Validation- %.5f" % loss_val)
        #print("Validation loss in epoch %d is:" % i, loss_val)
        print("F1 score : Training- %.5f" % f1, ", Validation- %.5f" % f1_val)
        #print("Validation f1 in epoch %d is:" % i, f1_val)

        # early stopping
        early_stopping(loss_val, model)

        if early_stopping.early_stop:
            print("Early stopping")
            break

    if args.plot_loss:
        stats = {
            'loss_history': loss_history,
            'loss_val_history': loss_val_history
        }
        plot_loss(stats)
        stats = {'f1_history': f1_history, 'f1_val_history': f1_val_history}
        plot_f1(stats)
    if args.is_save_model:
        save_model(args, model)
def multi_train(model,
                train_loader,
                validate_loader,
                train_batch_size=16,
                validate_batch_size=32,
                patience=3,
                epoches=30,
                lr=1e-5,
                weight=None):
    # for storing individual gene mse loss
    temp_lis = []
    # for storing total mse loss during training
    train_loss_lis = []
    # for storing total mse loss during validate
    validate_loss_lis = []
    # for storing individual mse loss during validation
    val_loss_ind_lis = []

    # define early stopping
    early_stopping = EarlyStopping(patience=patience, verbose=False)

    for i in range(epoches):

        train_loss = train(model,
                           train_loader,
                           train_batch_size,
                           lr=lr,
                           weight=weight)
        train_loss_lis.append(train_loss)

        val_loss, val_loss_ind = validate(model, validate_loader,
                                          validate_batch_size)
        validate_loss_lis.append(val_loss)
        val_loss_ind_lis.append(val_loss_ind)

        early_stopping(val_loss, model)

        if early_stopping.early_stop:
            print("Early stopping")
            break

    return train_loss_lis, validate_loss_lis, val_loss_ind_lis
示例#17
0
def main_worker(epochs,best_model_spec, checkpoints_files,args,device):
    global ARCH_NAME


    ARCH_NAME = best_model_spec["model_name"]

    data_transforms = transforms.Compose([ToTensorRescale()])

    train_dataset    = CatDogsDataset(CAT_TRAIN_PATH, DOG_TRAIN_PATH, transform=data_transforms)
    val_dataset      = CatDogsDataset(CAT_VAL_PATH, DOG_VAL_PATH, transform=data_transforms)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE,shuffle=True)
    val_dataloader   = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)


    model_name = best_model_spec["model_name"]
    start_epoch = 0
    model, optimizer = build_model(model_name,best_model_spec)
    model.to(device)
    losses_dict= {'train': {}, 'test': {}, 'accuracy': {}}
    criterion = nn.NLLLoss()

    if LOAD_CHECKPOINT:
        model, start_epoch, optimizer, losses_dict = load_checkpoint(model,optimizer,checkpoints_files[-1])

    early_stopping = EarlyStopping(patience=4, verbose=True)

    for e in range(start_epoch,epochs):
        print("{} out of {}".format(e+1, epochs))
        time.sleep(1)
        model, train_loss = train(train_dataloader, model, criterion, optimizer, epochs,device)
        model, test_loss, test_accuracy = validate(val_dataloader, model, criterion,device)
        current_metrics = [e,train_loss, test_loss,test_accuracy]
        losses_dict["train"][e] = train_loss
        losses_dict["test"][e] = test_loss
        losses_dict["accuracy"][e] = test_accuracy
        if early_stopping.early_stop:
            break
        if e % 2 == 0:
            checkpoints_files = save_checkpoint(model,optimizer, current_metrics, checkpoints_files, losses_dict)
    
    return checkpoints_files
示例#18
0
    def objective(params, epochs=1500):

        global ITERATION
        ITERATION += 1
        
        params['loss_weights'] = loss_weights
        params['num_classes'] = len(loss_weights)
        params['rnn_hid_size'] = int(params['rnn_hid_size'])
        
        data_train, label_count = data_loader.get_loader(filename=datapath, indices=train_idx, batch_size=batch_size)
        data_val = data_loader.get_loader(filename=datapath, indices=val_idx, batch_size=batch_size)  

        model = myrits.Model()
        print(params)
        model.set_params(**params)
        model = model.cuda()
        optimizer = optim.Adam(model.parameters(), lr=model.lr)
        early_stopping = EarlyStopping(patience=20, verbose=True, save_mode=1, runname='run_{}'.format(ITERATION), save_path=args.savepath)
        
        start = timer()
        
        val_loss = float('Inf')
        accuracy = 0.0
        for epoch in range(1, epochs+1):
            time_glob = time.time()
            
            train(model, data_train, optimizer, epoch)
            stop_early, val_loss = evaluate(model, data_val, epoch, early_stopping, ITERATION)

            time_ep = time.time() - time_glob
            print('Epoch time {}'.format(time_ep))

            if stop_early:
                break
        run_time = timer() - start
        of_connection = open(out_file, 'a')
        writer = csv.writer(of_connection)
        writer.writerow([val_loss, params, epoch, ITERATION, run_time])
        of_connection.close()
        return {'loss': val_loss, 'params': params, 'iteration': ITERATION, 'train_time': run_time, 'status': STATUS_OK}
示例#19
0
    def train(self, n_epochs=5):
        self.change_model()
        self.model = self.model.to(self.device)
        train_losses = []
        val_losses = []
        pth = './gz2hub_checkpoints/gz2hubcheckpoint'
        early_stopping = EarlyStopping(patience=10, verbose=True, path=pth)
        print('Training beginning')
        for epoch in range(n_epochs):
            train_loss = self.train_phase(tr='train')
            train_losses.append(train_loss)
            print("Epoch: {} Train Loss: {}".format(epoch + 1, train_loss))

            val_loss = self.train_phase(tr='val')
            self.scheduler.step(val_loss)
            val_losses.append(val_loss)
            print("Epoch: {} Val Loss: {}".format(epoch + 1, val_loss))

            early_stopping(val_loss, self.model, epoch)
            if early_stopping.early_stop:
                ep = epoch - 10
                self.model.load_state_dict(
                    torch.load(
                        './gz2hub_checkpoints/gz2hubcheckpoint{}.pt'.format(
                            ep)))
                print("Early stopping")
                break

        pickle.dump(train_losses, open('./losses/gz2hub_train', 'wb'))
        pickle.dump(val_losses, open('./losses/gz2hub_val', 'wb'))
        torch.save(self.model, self.savePath)
        print('Model saved: ' + self.savePath)
        plt.plot(train_losses, label='Training loss')
        plt.plot(val_losses, label='Validation loss')
        plt.legend(frameon=False)
        plt.show()
        print("Training complete")
        return None
示例#20
0
    def train(self, n_epochs=5):
        self.model = self.model.to(self.device)
        train_losses = []
        val_losses = []
        pth = './gz2_checkpoints/gz2checkpoint'
        early_stopping = EarlyStopping(patience=5, verbose=True, path=pth)
        print("Training beginning")
        for epoch in range(n_epochs):
            train_loss = self.train_phase(tr='train')
            train_losses.append(train_loss)
            print("[TST] Epoch: {} Train Loss: {}".format(
                epoch + 1, train_loss))
            torch.cuda.empty_cache()

            val_loss = self.train_phase(tr='val')
            val_losses.append(val_loss)
            print("[TST] Epoch: {} Val Loss: {}".format(epoch + 1, val_loss))
            early_stopping(val_loss, self.model, epoch)
            if early_stopping.early_stop:
                ep = epoch - 10
                self.model.load_state_dict(
                    torch.load(
                        './gz2_checkpoints/gz2checkpoint{}.pt'.format(ep)))
                print("Early stopping")
                break
        pickle.dump(train_losses, open('./losses/gz2_train', 'wb'))
        pickle.dump(val_losses, open('./losses/gz2_val', 'wb'))
        torch.save(self.model, self.savePath)
        print('Model saved: ' + self.savePath)
        plt.plot(train_losses, label='Training loss')
        plt.plot(val_losses, label='Validation loss')
        plt.legend(frameon=False)
        plt.show()
        print("Training complete, evaluation on unseen set beginning")
        unseen_loss = self.unseen_phase()
        print('Unseen loss: {}'.format(unseen_loss))
        return None
示例#21
0
def validation(epoch):
    global  log,best_loss,best_acc
    train_loader, val_loader= tumor_dataset()
    net.eval()
    val_loss=0
    correct=0
    total=0
    early_stop = EarlyStopping(patience=10,verbose=True)
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):

            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
        if best_acc<100. * correct / total:
            best_acc=100. * correct / total
            print_log('Update best acc : {:<5.3f}'.format(best_acc),log)
        if (val_loss / (batch_idx + 1)) < best_loss:
            best_loss = (val_loss / (batch_idx + 1))
            print_log('Save best model | Loss : {}| Acc : {}'.format(val_loss / (batch_idx + 1), 100. * correct / total), log)
            torch.save(net, './{}/{}_model.pth'.format(log_folder, args.teacher_backbone))
            torch.save(net.state_dict(), './{}/{}_weight.pth'.format(log_folder, args.teacher_backbone))
        print_log('Teacher Val    |Batch_idx:{:<3d}|Val Loss  :{:<8.3f}|Val Acc:{:<8.3f}'.format(batch_idx, (val_loss / (batch_idx + 1)), 100. * correct / total),log)
        writer.add_scalar('val/loss', (val_loss / (batch_idx + 1)), epoch)
        writer.add_scalar('val/acc', (100. * correct / total), epoch)
        early_stop(val_loss, net)
        while early_stop.early_stop:
            print_log("Early stop",log)
            writer.close()
            log.close()
            break
示例#22
0
文件: train.py 项目: HiitLee/SALNet
    def train(self, model_file, pretrain_file, get_loss_CNN, get_loss_Attn_LSTM, evalute_CNN_SSL, pseudo_labeling,evalute_Attn_LSTM,evalute_CNN,evalute_Attn_LSTM_SSL, generating_lexiocn, data_parallel=False):
     
        """ Train Loop """
        self.model.train() # train mode
        self.load3(model_file, pretrain_file)
        
        
        self.model2.train() # train mode
        model = self.model.to(self.device)
        model2 = self.model2.to(self.device)
        t =  self.kkk
        
        if(self.dataName == 'IMDB'):
            rnn_save_name = "./IMDB_model_save/checkpoint_RNN"+str(t)+".pt"
            cnn_save_name = "./IMDB_model_save/checkpoint_CNN"+str(t)+".pt"
            result_name = "./result/result_IMDB.txt"
            pseudo_name = "./result/pseudo_train_set_IMDB.txt"
        elif(self.dataName == "AGNews"):
            rnn_save_name = "./AGNews_model_save/checkpoint_RNN"+str(t)+".pt"
            cnn_save_name = "./AGNews_model_save/checkpoint_CNN"+str(t)+".pt"
            result_name = "./result/result_AGNews.txt"
            pseudo_name = "./result/pseudo_train_set_AGNews.txt"
        elif(self.dataName == "DBpedia"):
            rnn_save_name = "./DBpedia_model_save/checkpoint_RNN"+str(t)+".pt"
            cnn_save_name = "./DBpedia_model_save/checkpoint_CNN"+str(t)+".pt"
            result_name = "./result/result_DBpedia.txt"
            pseudo_name = "./result/pseudo_train_set_DBpedia.txt"
        elif(self.dataName == "yahoo"):
            rnn_save_name = "./yahoo_model_save/checkpoint_RNN"+str(t)+".pt"
            cnn_save_name = "./yahoo_model_save/checkpoint_CNN"+str(t)+".pt"
            result_name = "./result/result_yahoo.txt"
            pseudo_name = "./result/pseudo_train_set_yahoo.txt"

        
        
        num_a=0
        global_step = 0 # global iteration steps regardless of epochs
        global_step3 = 0

        before = -50
        curTemp=0
        print("self.cfg.n_epochs#:", self.cfg.n_epochs)
        ddf = open(result_name,'a', encoding='UTF8')
        ddf.write("############################################"+str(t)+": ramdom_samplimg###########################################"+'\n')
        ddf.close()
        
        ddf = open(pseudo_name,'a', encoding='UTF8')
        ddf.write("############################################"+str(t)+": ramdom_samplimg###########################################"+'\n')
        ddf.close()
                
        for e in range(self.cfg.n_epochs):
            if(e==0):
                temp=987654321
                early_stopping = EarlyStopping(patience=10, verbose=True)
                valid_losses = []
                
                while(1):
                    self.optimizer = optim.optim4GPU(self.cfg, model, len(self.data_iter3_b))
                    global_step = 0 # global iteration steps regardless of epochs
                    global_step3 = 0
                    loss_sum = 0. # the sum of iteration losses to get average loss in every epoch
                    iter_bar = tqdm(self.data_iter3_b, desc='Iter (loss=X.XXX)')
                    model.train()
                    for i, batch in enumerate(iter_bar):
                        batch = [t.to(self.device) for t in batch]

                        self.optimizer.zero_grad()
                        loss = get_loss_CNN(model, batch, global_step).mean() # mean() for Data Parallelism
                        loss.backward()
                        self.optimizer.step()

                        global_step += 1
                        loss_sum += loss.item()
                        iter_bar.set_description('Iter (loss=%5.3f)'%loss.item())



                    print('Epoch %d/%d : Average Loss %5.3f'%(e+1, self.cfg.n_epochs, loss_sum/(i+1)))
                    model.eval()# evaluation mode

                    loss_sum = 0.
                    global_step3 = 0
                    iter_bar_dev = tqdm(self.dataset_dev_b, desc='Iter (loss=X.XXX)')
                    self.optimizer = optim.optim4GPU(self.cfg, model, len(self.dataset_dev_b))
            
                    for i, batch in enumerate(iter_bar_dev):
                        batch = [t.to(self.device) for t in batch]
                        loss = get_loss_CNN(model, batch,global_step3).mean() # mean() for Data Parallelism
                        valid_losses.append(loss.item())
                        global_step3 += 1
                        loss_sum += loss.item()
                        iter_bar_dev.set_description('Iter (loss=%5.3f)'%loss.item())



                    print('Epoch %d/%d : Average Loss %5.3f'%(e+1, self.cfg.n_epochs, loss_sum/(i+1)))

                    valid_loss = np.average(valid_losses)
                    loss_min=early_stopping(valid_loss, model,"./model_save/checkpoint_BERT_real.pt")
                    valid_losses = []

                    if early_stopping.early_stop:
                        print("Early stopping")
                        break

 
                        
                model.load_state_dict(torch.load("./model_save/checkpoint_BERT_real.pt"))
                print("Early stopping")
                model.eval()# evaluation mode
                
                p=[]
                l=[]
                p3=[]
                p2=[]
                iter_bar = tqdm(self.data_iter2_b, desc='Iter (f1-score=X.XXX)')
                for batch in iter_bar:
                    batch = [t.to(self.device) for t in batch]
                    with torch.no_grad(): # evaluation without gradient calculation
                        label_id, y_pred1 = evalute_CNN(model, batch) # accuracy to print
                        softmax = nn.Softmax()
                        y_pred3 = softmax(y_pred1)
                        #print("y_pred3#:", y_pred3)
                        y_pred33, y_pred1 = torch.max(y_pred3, 1)
                        print(y_pred1)
                        p2.append(np.ndarray.flatten(y_pred3[:, 1].data.cpu().numpy()))
                        p.append(np.ndarray.flatten(y_pred1.data.cpu().numpy()))
                        l.append(np.ndarray.flatten(label_id.data.cpu().numpy()))
                    result2  = 0
                    iter_bar.set_description('Iter(roc=%5.3f)'%result2)
                p2 = [item for sublist in p2 for item in sublist]
                p = [item for sublist in p for item in sublist]
                l = [item for sublist in l for item in sublist]
                p=np.array(p)
                l=np.array(l)
                F1score = f1_score(l,p,average='micro')
                accur = accuracy_score(l,p)
                ddf = open(result_name,'a', encoding='UTF8')
                ddf.write(str(t)+": "+ str(num_a)+"aucr: "+str(accur)+"f1-score: "+str(F1score)+'\n')
                ddf.close()
                num_a+=1
                
  
                temp=987654321
                early_stopping = EarlyStopping(patience=30, verbose=True)
                valid_losses = []
                while(1):
                    model2.train()
                    loss_sum = 0
                    global_step3 = 0
                    iter_bar3 = tqdm(self.data_iter3, desc='Iter (loss=X.XXX)')
                    for i, batch in enumerate(iter_bar3):
                        batch = [t.to(self.device) for t in batch]
                        loss = get_loss_Attn_LSTM(model2, batch, global_step3).mean() # mean() for Data Parallelism
                        self.optimizer2.zero_grad()
                        loss.backward()
                        self.optimizer2.step()
                        global_step3 += 1
                        loss_sum += loss.item()
                        iter_bar3.set_description('Iter (loss=%5.3f)'%loss.item())

                        if global_step3 % self.cfg.save_steps == 0: # save
                            self.save(global_step3)

                        if self.cfg.total_steps and self.cfg.total_steps < global_step3:
                            print('Epoch %d/%d : Average Loss %5.3f'%(e+1, self.cfg.n_epochs, loss_sum/(i+1)))
                            print('The Total Steps have been reached.')
                            self.save(global_step3) # save and finish when global_steps reach total_steps
                            return
                        
                    print('Epoch %d/%d : Average Loss %5.3f'%(e+1, self.cfg.n_epochs, loss_sum/(i+1)))
                    model2.eval()
                    loss_sum = 0.
                    global_step3 = 0
                    iter_bar_dev = tqdm(self.dataset_dev, desc='Iter (loss=X.XXX)')
                    for i, batch in enumerate(iter_bar_dev):
                        batch = [t.to(self.device) for t in batch]
                        loss = get_loss_Attn_LSTM(model2, batch, global_step3).mean() # mean() for Data Parallelism
                        valid_losses.append(loss.item())
                        global_step3 += 1
                        loss_sum += loss.item()
                        iter_bar_dev.set_description('Iter (loss=%5.3f)'%loss.item())

                        if global_step3 % self.cfg.save_steps == 0: # save
                            self.save(global_step3)

                        if self.cfg.total_steps and self.cfg.total_steps < global_step3:
                            print('Epoch %d/%d : Average Loss %5.3f'%(e+1, self.cfg.n_epochs, loss_sum/(i+1)))
                            print('The Total Steps have been reached.')
                            self.save(global_step3) # save and finish when global_steps reach total_steps
                            return

                    print('Epoch %d/%d : Average Loss %5.3f'%(e+1, self.cfg.n_epochs, loss_sum/(i+1)))
                    valid_loss = np.average(valid_losses)
                    loss_min=early_stopping(valid_loss, model2,"./model_save/checkpoint_LSTM_real.pt")
                    valid_losses = []
                    if early_stopping.early_stop:
                        print("Early stopping")
                        break

                
                model2.eval()
                p=[]
                l=[]
                p3=[]
                iter_bar4 = tqdm(self.data_iter2, desc='Iter (f1-score=X.XXX)')
                global_step3=0
                for batch in iter_bar4:
                    batch = [t.to(self.device) for t in batch]
                    with torch.no_grad(): # evaluation without gradient calculation
                        label_id, y_pred1 = evalute_Attn_LSTM(model2, batch, global_step3,len(iter_bar4))# accuracy to print
                        _, y_pred3 = y_pred1.max(1)
                        global_step3+=1
                        p2=[]
                        l2=[]
                        for i in range(0,len(y_pred3)):
                            p3.append(np.ndarray.flatten(y_pred3[i].data.cpu().numpy()))
                            l.append(np.ndarray.flatten(label_id[i].data.cpu().numpy()))
                            p2.append(np.ndarray.flatten(y_pred3[i].data.cpu().numpy()))
                            l2.append(np.ndarray.flatten(label_id[i].data.cpu().numpy()))
                    p2 = [item for sublist in p2 for item in sublist]
                    l2 = [item for sublist in l2 for item in sublist]
                    result2  = f1_score(l2, p2,average='micro')
                    iter_bar4.set_description('Iter(roc=%5.3f)'%result2)
                p3 = [item for sublist in p3 for item in sublist]
                l = [item for sublist in l for item in sublist]
                p=np.array(p)
                l=np.array(l)
                results2  = accuracy_score(l, p3)
                F1score = f1_score(l,p3,average='micro')
                ddf = open(result_name,'a', encoding='UTF8')
                ddf.write(str(t)+": "+str(num_a)+"aucr: "+str(results2)+"f1-score: "+str(F1score)+'\n')
                ddf.close()
                num_a+=1
                

                
            elif(e%2==1):
                global_step1 = 0
                model2.eval()
                labell=[]
                iter_bar = tqdm(self.data_iter, desc='Iter (loss=X.XXX)')
                for batch in iter_bar:
                    batch = [t.to(self.device) for t in batch]
                    with torch.no_grad(): # evaluation without gradient calculation
                        label_id, y_pred1 = generating_lexiocn(model2, batch,global_step1,len(iter_bar),e) # accuracy to print
                        global_step1+=1
                        
                        
                        
                        
                global_step1 = 0
                model.eval()
                labell=[]
                iter_bar = tqdm(self.data_iter_b, desc='Iter (loss=X.XXX)')
                for batch in iter_bar:
                    batch = [t.to(self.device) for t in batch]
                    with torch.no_grad(): # evaluation without gradient calculation
                        label_id, y_pred1 = evalute_CNN_SSL(model, batch,global_step1) # accuracy to print
                        global_step1+=1
                        
                        
                
                global_step1 = 0

                model2.eval()
                sen = []
                labell=[]
                iter_bar = tqdm(self.data_iter, desc='Iter (loss=X.XXX)')
                for batch in iter_bar:
                    batch = [t.to(self.device) for t in batch]
                    with torch.no_grad(): # evaluation without gradient calculation
                        label_id, y_pred1,result_label,result3,data_temp, data_temp_b, data_iter_temp_na, data_iter_temp_na_b = pseudo_labeling( model2,batch,global_step1,len(iter_bar),e) # accuracy to print
                        global_step1+=1
        
                self.data_iter_temp = data_temp
                self.data_iter_temp_b = data_temp_b
                self.data_iter = data_iter_temp_na
                self.data_iter_b = data_iter_temp_na_b
                #print(result3)
                num_good=0
                num_label=0
                num_label1=0
                ddf = open(pseudo_name,'a', encoding='UTF8')
                
                for i in range(0, len(result3)):
                    sen.append(result3[i])
                
                num_label=0
                num_label1=0
                num_good = 0
                for i in range(0, len(result3)):
                    if(result3[i] != -1):
                        num_good +=1
                        if(result3[i] == result_label[i]):
                            num_label+=1
                
                ddf.write(str(t)+"  " +"number of good :"+str(num_good)+" ")
                ddf.write("number of label :"+str(num_label)+" ")
                ddf.write("\n")
                ddf.close()
                print("num_good#:", num_good)
                print("before#:", before)
                if(num_good  < self.stopNum):
                    curTemp+=1
                else:
                    curTemp=0
                if(curTemp>=2):
                    break
          

                    

            elif(e%2==0 ):
                self.model.train() # train mode
                self.load3(model_file, pretrain_file)
                model = self.model.to(self.device)
        
                b=0
                early_stopping = EarlyStopping(patience=1, verbose=True)
                valid_losses = []
                bb=987654321
                
                
                while(1):
                    self.optimizer = optim.optim4GPU(self.cfg, model, len(self.data_iter_temp_b))
                    iter_bar = tqdm(self.data_iter_temp_b, desc='Iter (loss=X.XXX)')
                    model.train()
                    global_step = 0 
                    global_step3 = 0
                    valid_losses2 = []
                    for i, batch in enumerate(iter_bar):
                        batch = [t.to(self.device) for t in batch]
                        self.optimizer.zero_grad()
                        loss = get_loss_CNN(model, batch, global_step).mean() # mean() for Data Parallelism
                        valid_losses2.append(loss.item())
                        loss.backward()
                        self.optimizer.step()
                        global_step += 1
                        loss_sum += loss.item()
                        iter_bar.set_description('Iter (loss=%5.3f)'%loss.item())

                    print('Epoch %d/%d : Average Loss %5.3f'%(e+1, self.cfg.n_epochs, loss_sum/(i+1)))
                    valid_loss2 = np.average(valid_losses2)
                    bb= min(bb, valid_loss2.item())
                               
                    valid_losses2 = []
                    model.eval()# evaluation mode
                    loss_sum = 0.
                    global_step3 = 0
                    iter_bar_dev = tqdm(self.dataset_dev_b, desc='Iter (loss=X.XXX)')
                    self.optimizer = optim.optim4GPU(self.cfg, model, len(self.dataset_dev_b))
            
                    for i, batch in enumerate(iter_bar_dev):
                        batch = [t.to(self.device) for t in batch]
                        loss = get_loss_CNN(model, batch,global_step3).mean() # mean() for Data Parallelism
                        valid_losses.append(loss.item())
                        global_step3 += 1
                        loss_sum += loss.item()
                        iter_bar_dev.set_description('Iter (loss=%5.3f)'%loss.item())

                    print('Epoch %d/%d : Average Loss %5.3f'%(e+1, self.cfg.n_epochs, loss_sum/(i+1)))

                    valid_loss = np.average(valid_losses)
                    loss_min=early_stopping(valid_loss, model,cnn_save_name)
                    valid_losses = []

                    if early_stopping.early_stop:
                        print("Early stopping")
                        break
   
                model.load_state_dict(torch.load(cnn_save_name))
                model.eval()# evaluation mode
                self.model.eval()# evaluation mode
                
                p=[]
                l=[]
                p3=[]
                p2=[]
                iter_bar = tqdm(self.data_iter2_b, desc='Iter (f1-score=X.XXX)')
                for batch in iter_bar:
                    batch = [t.to(self.device) for t in batch]
                    with torch.no_grad(): # evaluation without gradient calculation
                        label_id, y_pred1 = evalute_CNN(model, batch) # accuracy to print
                        softmax = nn.Softmax()
                        y_pred3 = softmax(y_pred1)
                        y_pred33, y_pred1 = torch.max(y_pred3, 1)
                        p2.append(np.ndarray.flatten(y_pred3[:, 1].data.cpu().numpy()))
                        p.append(np.ndarray.flatten(y_pred1.data.cpu().numpy()))
                        l.append(np.ndarray.flatten(label_id.data.cpu().numpy()))
                    result2  = 0
                    iter_bar.set_description('Iter(roc=%5.3f)'%result2)
                p2 = [item for sublist in p2 for item in sublist]
                p = [item for sublist in p for item in sublist]
                l = [item for sublist in l for item in sublist]
                p=np.array(p)
                l=np.array(l)
                F1score = f1_score(l,p,average='micro')
                accur = accuracy_score(l,p)

                ddf = open(result_name,'a', encoding='UTF8')
                ddf.write(str(t)+": "+str(num_a)+"aucr: "+str(accur)+"f1-score: "+str(F1score)+'\n')
                ddf.close()
                num_a+=1
               
     
                valid_losses = []            
                temp = 987654321
                early_stopping = EarlyStopping(patience=10, verbose=True)
                while(1):
                    model2.train()
                    l=0
                    l_sum=0
                    loss_sum = 0
                    global_step3 = 0
                    iter_bar3 = tqdm(self.data_iter_temp, desc='Iter (loss=X.XXX)')
                    for i, batch in enumerate(iter_bar3):
                        batch = [t.to(self.device) for t in batch]
                        loss = get_loss_Attn_LSTM(model2, batch, global_step3).mean() # mean() for Data Parallelism
                        self.optimizer2.zero_grad()
                        loss.backward()
                        self.optimizer2.step()
                        global_step3 += 1
                        loss_sum += loss.item()
                        iter_bar3.set_description('Iter (loss=%5.3f)'%loss.item())

                     
                        
                    print('Epoch %d/%d : Average Loss %5.3f'%(e+1, self.cfg.n_epochs, loss_sum/(i+1)))
                    model2.eval()
                    loss_sum = 0.
                    global_step3 = 0
                    iter_bar_dev = tqdm(self.dataset_dev, desc='Iter (loss=X.XXX)')
                    for i, batch in enumerate(iter_bar_dev):
                        batch = [t.to(self.device) for t in batch]
                        loss = get_loss_Attn_LSTM(model2, batch, global_step3).mean() # mean() for Data Parallelism
                        valid_losses.append(loss.item())
                        global_step3 += 1
                        loss_sum += loss.item()
                        iter_bar_dev.set_description('Iter (loss=%5.3f)'%loss.item())

                       

                    print('Epoch %d/%d : Average Loss %5.3f'%(e+1, self.cfg.n_epochs, loss_sum/(i+1)))
                    valid_loss = np.average(valid_losses)
                    loss_min=early_stopping(valid_loss, model2,rnn_save_name)
                    valid_losses = []

                    if early_stopping.early_stop:
                        print("Early stopping")
                        break

                model2.load_state_dict(torch.load(rnn_save_name))   
                model2.eval()
                p=[]
                l=[]
                p3=[]
                
                iter_bar4 = tqdm(self.data_iter2, desc='Iter (f1-score=X.XXX)')
                for batch in iter_bar4:
                    batch = [t.to(self.device) for t in batch]
                    with torch.no_grad(): 
                        label_id, y_pred1 = evalute_Attn_LSTM_SSL(model2, batch) 
                        _, y_pred3 = y_pred1.max(1)
                        p2=[]
                        l2=[]
                        
                        for i in range(0,len(y_pred3)):
                            p3.append(np.ndarray.flatten(y_pred3[i].data.cpu().numpy()))
                            l.append(np.ndarray.flatten(label_id[i].data.cpu().numpy()))
                            p2.append(np.ndarray.flatten(y_pred3[i].data.cpu().numpy()))
                            l2.append(np.ndarray.flatten(label_id[i].data.cpu().numpy()))
                    p2 = [item for sublist in p2 for item in sublist]
                    l2 = [item for sublist in l2 for item in sublist]
      
                    result2  = f1_score(l2, p2,average='micro')
                    iter_bar4.set_description('Iter(roc=%5.3f)'%result2)
                p3 = [item for sublist in p3 for item in sublist]
                l = [item for sublist in l for item in sublist]
                p=np.array(p)
                l=np.array(l)
                results2  = accuracy_score(l, p3)
                F1score = f1_score(l,p3,average='micro')
                ddf = open(result_name,'a', encoding='UTF8')
                ddf.write(str(t)+": "+str(num_a)+"aucr: "+str(results2)+"f1-score: "+str(F1score)+'\n')
                ddf.close()
                num_a+=1
print_step = 250
# save_steps = print_step
if not eval_model:

    write_train_para(writer, config)
    logger.info('------Training START--------')
    running_avg_loss, running_avg_rl_loss = 0, 0
    sum_total_reward = 0
    step = 0
    step = load_step + step
    start_ep = int(load_step / save_steps)

    # initialize the early_stopping object
    early_stopping = EarlyStopping(config,
                                   logger,
                                   vocab,
                                   loggerName,
                                   patience=3,
                                   verbose=True)
    try:
        for epoch in range((start_ep + 1), config.max_epochs + 1):
            for batch in train_loader:
                step += 1
                loss_st = time.time()
                inner_c, package = get_package(batch)
                if inner_c: continue
                parallel_model.module.train()
                mle_loss, pred_probs = train_one(package)
                if config.train_rl:
                    rl_loss, batch_reward = train_one_rl(package, batch)

                    if step % print_step == 0:
示例#24
0
def train_free():
    # Scale and initialize the parameters
    best_prec1 = 0
    configs.TRAIN.epochs = int(
        math.ceil(configs.TRAIN.epochs / configs.ADV.n_repeats))
    configs.ADV.fgsm_step /= configs.DATA.max_color_value
    configs.ADV.clip_eps /= configs.DATA.max_color_value

    # Create output folder
    if not os.path.isdir(os.path.join('trained_models', configs.output_name)):
        os.makedirs(os.path.join('trained_models', configs.output_name))

    # Log the config details
    logger.info(pad_str(' ARGUMENTS '))
    for k, v in configs.items():
        print('{}: {}'.format(k, v))
    logger.info(pad_str(''))

    # Create the model
    if configs.pretrained:
        print("=> using pre-trained model '{}'".format(configs.TRAIN.arch))
        model = models.__dict__[configs.TRAIN.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(configs.TRAIN.arch))
        model = models.__dict__[configs.TRAIN.arch]()

    # Wrap the model into DataParallel
    model = torch.nn.DataParallel(model).cuda()

    # Criterion:
    criterion = nn.CrossEntropyLoss().cuda()

    # Optimizer:
    optimizer = torch.optim.SGD(model.parameters(),
                                configs.TRAIN.lr,
                                momentum=configs.TRAIN.momentum,
                                weight_decay=configs.TRAIN.weight_decay)

    # Resume if a valid checkpoint path is provided
    if configs.resume:
        if os.path.isfile(configs.resume):
            print("=> loading checkpoint '{}'".format(configs.resume))
            checkpoint = torch.load(configs.resume)
            configs.TRAIN.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                configs.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(configs.resume))

    # setup data loader
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(configs.DATA.cifar10_mean,
                             configs.DATA.cifar10_std)
    ])
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(configs.DATA.cifar10_mean,
                             configs.DATA.cifar10_std)
    ])
    train_dataset = torchvision.datasets.CIFAR10(root='../data',
                                                 train=True,
                                                 download=True,
                                                 transform=transform_train)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=configs.DATA.batch_size,
        shuffle=True,
        num_workers=configs.DATA.workers,
        pin_memory=True,
        sampler=None)

    testset = torchvision.datasets.CIFAR10(root='../data',
                                           train=False,
                                           download=True,
                                           transform=transform_test)
    val_loader = torch.utils.data.DataLoader(
        testset,
        batch_size=configs.DATA.batch_size,
        shuffle=False,
        num_workers=configs.DATA.workers,
        pin_memory=True)

    # If in evaluate mode: perform validation on PGD attacks as well as clean samples
    if configs.evaluate:
        logger.info(pad_str(' Performing PGD Attacks '))
        for pgd_param in configs.ADV.pgd_attack:
            validate_pgd(val_loader, model, criterion, pgd_param[0],
                         pgd_param[1], configs, logger)
        validate(val_loader, model, criterion, configs, logger)
        return

    early_stopping = EarlyStopping(patience=15, verbose=True)

    for epoch in range(configs.TRAIN.start_epoch, configs.TRAIN.epochs):
        adjust_learning_rate(configs.TRAIN.lr, optimizer, epoch,
                             configs.ADV.n_repeats)

        # train for one epoch
        do_train_free(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1, stopped, early_stopping = validate(val_loader, model, criterion,
                                                  configs, logger,
                                                  early_stopping)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': configs.TRAIN.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best, os.path.join('trained_models', configs.output_name))
        if (stopped):
            break

    # Automatically perform PGD Attacks at the end of training
    logger.info(pad_str(' Performing PGD Attacks '))
    for pgd_param in configs.ADV.pgd_attack:
        validate_pgd(val_loader, model, criterion, pgd_param[0], pgd_param[1],
                     configs, logger)
示例#25
0
experiment.log_parameters(parameters)
experiment.set_name(opts.exp_name)

models_path = "models/"
use_gpu = parameters['use_gpu']

mapping_file = 'models/mapping.pkl'

name = parameters['name']
model_name = models_path + name  #get_name(parameters)

if not os.path.exists(models_path):
    os.makedirs(models_path)

early_stopping = EarlyStopping(patience=20, verbose=True, path=model_name)

lower = parameters['lower']
zeros = parameters['zeros']
tag_scheme = parameters['tag_scheme']

train_sentences = loader.load_sentences(opts.train, lower, zeros)
dev_sentences = loader.load_sentences(opts.dev, lower, zeros)
test_sentences = loader.load_sentences(opts.test, lower, zeros)
test_train_sentences = loader.load_sentences(opts.test_train, lower, zeros)

update_tag_scheme(train_sentences, tag_scheme)
update_tag_scheme(dev_sentences, tag_scheme)
update_tag_scheme(test_sentences, tag_scheme)
update_tag_scheme(test_train_sentences, tag_scheme)
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    
    
    global_info = []
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    early_stopping = EarlyStopping(patience=25, verbose=True)
    for epoch in range(num_epochs):
        local_info = []
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                
                model.train()  # Set model to training mode
            else:
                
                model.eval()   # Set model to evaluate mode
                if epoch >0:
                    scheduler.step(val_loss)
            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            if phase == 'val':
                val_loss = running_loss / dataset_sizes['val']
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            #(Variable(x).data).cpu().numpy()
            if phase == 'train':
                local_info.append(epoch_loss)
                ea = epoch_acc.cpu().numpy()
                local_info.append(ea)
            else:
                local_info.append(epoch_loss)
                ea = epoch_acc.cpu().numpy()
                local_info.append(ea)

                

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))


            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        lr_get = get_lr(optimizer)
        print("Current learning rate : {:.8f}".format(lr_get))
        global_info.append(local_info)
        if phase =='val':
            early_stopping(epoch_loss, model)
        
            if early_stopping.early_stop:
                print("Early stopping")
                break
        
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    data = pd.DataFrame(global_info, columns = ['train_loss', 'train_acc', 'val_loss', 'val_acc'])
    data.to_csv('./csv_save/googlenet_raf.csv', header=True, index=True)
    return model
示例#27
0
gradient_clipping_value = 0
var_len = True

lstm = LSTM(dataset,
            input_size,
            hidden_size,
            num_layers,
            batch_size=batch_size,
            dropout=dropout)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

valid_acc_history = []
use_cuda = False

early_stopping = EarlyStopping(patience=3, verbose=True, delta=0)

for epoch in range(num_epochs):

    print('Epoch:', epoch)
    train_loss_avg = 0

    idx = np.array(np.random.permutation(range(Ntrain)))
    idx_torch = torch.LongTensor(idx)
    train_data = torch.index_select(train_data, 0, idx_torch)
    train_labels = torch.index_select(train_labels, 0, idx_torch)
    Lentrain = Lentrain[idx]

    for i in range(int(np.ceil(Ntrain // batch_size))):

        if (batch_size * (i + 1)) <= Ntrain:
示例#28
0
def train_model(model, batch_size, patience, n_epochs):
    # to track the training loss as the model trains
    train_losses = []
    # to track the validation loss as the model trains
    valid_losses = []
    # to track the average training loss per epoch as the model trains
    avg_train_losses = []
    # to track the average validation loss per epoch as the model trains
    avg_valid_losses = []

    # initialize the early_stopping object
    early_stopping = EarlyStopping(patience=patience, verbose=True)

    for epoch in range(1, n_epochs + 1):

        ###################
        # train the model #
        ###################
        model.train()  # prep model for training
        hidden = model.init_hidden(batch_size)
        for batch, (data, target) in enumerate(train_loader):
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            print('data', data)
            print('tar', target)
            # forward pass: compute predicted outputs by passing inputs to the model
            # output, hidden = model(Variable(data).float(), hidden)
            output, hidden = model(Variable(data).float(), hidden)
            # calculate the loss
            loss = criterion(output, Variable(target).view(-1))
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward(retain_graph=True)
            # perform a single optimization step (parameter update)
            optimizer.step()
            # record training loss
            train_losses.append(loss.item())

        ######################
        # validate the model #
        ######################
        model.eval()  # prep model for evaluation
        for data, target in valid_loader:
            # forward pass: compute predicted outputs by passing inputs to the model
            output, hidden = model(Variable(data).float(), hidden)
            # calculate the loss
            loss = criterion(output, Variable(target).view(-1))
            # record validation loss
            valid_losses.append(loss.item())

        # print training/validation statistics
        # calculate average loss over an epoch
        train_loss = np.average(train_losses)
        valid_loss = np.average(valid_losses)
        avg_train_losses.append(train_loss)
        avg_valid_losses.append(valid_loss)

        epoch_len = len(str(n_epochs))

        print_msg = (f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}] ' +
                     f'train_loss: {train_loss:.5f} ' +
                     f'valid_loss: {valid_loss:.5f}')

        print(print_msg)
        file.write(print_msg)
        file.write('***************')
        # clear lists to track next epoch
        train_losses = []
        valid_losses = []

        # early_stopping needs the validation loss to check if it has decresed,
        # and if it has, it will make a checkpoint of the current model
        early_stopping(valid_loss, model)

        if early_stopping.early_stop:
            print("Early stopping")
            break

    # load the last checkpoint with the best model
    model.load_state_dict(torch.load('checkpoint.pt'))

    return model, avg_train_losses, avg_valid_losses
def main(args):

    dataset_name = args.dataset
    model_name = args.model
    n_inner_iter = args.adaptation_steps
    batch_size = args.batch_size
    save_model_file = args.save_model_file
    load_model_file = args.load_model_file
    lower_trial = args.lower_trial
    upper_trial = args.upper_trial
    is_test = args.is_test
    stopping_patience = args.stopping_patience
    epochs = args.epochs
    fast_lr = args.learning_rate
    slow_lr = args.meta_learning_rate
    noise_level = args.noise_level
    noise_type = args.noise_type
    resume = args.resume

    first_order = False
    inner_loop_grad_clip = 20
    task_size = 50
    output_dim = 1
    checkpoint_freq = 10
    horizon = 10
    ##test

    meta_info = {
        "POLLUTION": [5, 50, 14],
        "HR": [32, 50, 13],
        "BATTERY": [20, 50, 3]
    }

    assert model_name in ("FCN", "LSTM"), "Model was not correctly specified"
    assert dataset_name in ("POLLUTION", "HR", "BATTERY")

    window_size, task_size, input_dim = meta_info[dataset_name]

    grid = [0., noise_level]
    output_directory = "output/"

    train_data_ML = pickle.load(
        open(
            "../../Data/TRAIN-" + dataset_name + "-W" + str(window_size) +
            "-T" + str(task_size) + "-ML.pickle", "rb"))
    validation_data_ML = pickle.load(
        open(
            "../../Data/VAL-" + dataset_name + "-W" + str(window_size) + "-T" +
            str(task_size) + "-ML.pickle", "rb"))
    test_data_ML = pickle.load(
        open(
            "../../Data/TEST-" + dataset_name + "-W" + str(window_size) +
            "-T" + str(task_size) + "-ML.pickle", "rb"))

    for trial in range(lower_trial, upper_trial):

        output_directory = "../../Models/" + dataset_name + "_" + model_name + "_MAML/" + str(
            trial) + "/"
        save_model_file_ = output_directory + save_model_file
        save_model_file_encoder = output_directory + "encoder_" + save_model_file
        load_model_file_ = output_directory + load_model_file
        checkpoint_file = output_directory + "checkpoint_" + save_model_file.split(
            ".")[0]

        try:
            os.mkdir(output_directory)
        except OSError as error:
            print(error)

        with open(output_directory + "/results2.txt", "a+") as f:
            f.write("Learning rate :%f \n" % fast_lr)
            f.write("Meta-learning rate: %f \n" % slow_lr)
            f.write("Adaptation steps: %f \n" % n_inner_iter)
            f.write("Noise level: %f \n" % noise_level)

        if model_name == "LSTM":
            model = LSTMModel(batch_size=batch_size,
                              seq_len=window_size,
                              input_dim=input_dim,
                              n_layers=2,
                              hidden_dim=120,
                              output_dim=output_dim)
            model2 = LinearModel(120, 1)
        optimizer = torch.optim.Adam(list(model.parameters()) +
                                     list(model2.parameters()),
                                     lr=slow_lr)
        loss_func = mae
        #loss_func = nn.SmoothL1Loss()
        #loss_func = nn.MSELoss()
        initial_epoch = 0

        #torch.backends.cudnn.enabled = False

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        meta_learner = MetaLearner(model2, optimizer, fast_lr, loss_func,
                                   first_order, n_inner_iter,
                                   inner_loop_grad_clip, device)
        model.to(device)

        early_stopping = EarlyStopping(patience=stopping_patience,
                                       model_file=save_model_file_encoder,
                                       verbose=True)
        early_stopping2 = EarlyStopping(patience=stopping_patience,
                                        model_file=save_model_file_,
                                        verbose=True)

        if resume:
            checkpoint = torch.load(checkpoint_file)
            model.load_state_dict(checkpoint["model"])
            meta_learner.load_state_dict(checkpoint["meta_learner"])
            initial_epoch = checkpoint["epoch"]
            best_score = checkpoint["best_score"]
            counter = checkpoint["counter_stopping"]

            early_stopping.best_score = best_score
            early_stopping2.best_score = best_score

            early_stopping.counter = counter
            early_stopping2.counter = counter

        total_tasks, task_size, window_size, input_dim = train_data_ML.x.shape
        accum_mean = 0.0

        for epoch in range(initial_epoch, epochs):

            model.zero_grad()
            meta_learner._model.zero_grad()

            #train
            batch_idx = np.random.randint(0, total_tasks - 1, batch_size)

            #for batch_idx in range(0, total_tasks-1, batch_size):

            x_spt, y_spt = train_data_ML[batch_idx]
            x_qry, y_qry = train_data_ML[batch_idx + 1]

            x_spt, y_spt = to_torch(x_spt), to_torch(y_spt)
            x_qry = to_torch(x_qry)
            y_qry = to_torch(y_qry)

            # data augmentation
            epsilon = grid[np.random.randint(0, len(grid))]

            if noise_type == "additive":
                y_spt = y_spt + epsilon
                y_qry = y_qry + epsilon
            else:
                y_spt = y_spt * (1 + epsilon)
                y_qry = y_qry * (1 + epsilon)

            train_tasks = [
                Task(model.encoder(x_spt[i]), y_spt[i])
                for i in range(x_spt.shape[0])
            ]
            val_tasks = [
                Task(model.encoder(x_qry[i]), y_qry[i])
                for i in range(x_qry.shape[0])
            ]

            adapted_params = meta_learner.adapt(train_tasks)
            mean_loss = meta_learner.step(adapted_params,
                                          val_tasks,
                                          is_training=True)
            #accum_mean += mean_loss.cpu().detach().numpy()

            #progressBar(batch_idx, total_tasks, 100)

            #print(accum_mean/(batch_idx+1))

            #test

            val_error = test(validation_data_ML, meta_learner, model, device,
                             noise_level)
            test_error = test(test_data_ML, meta_learner, model, device, 0.0)
            print("Epoch:", epoch)
            print("Val error:", val_error)
            print("Test error:", test_error)

            early_stopping(val_error, model)
            early_stopping2(val_error, meta_learner)

            #checkpointing
            if epochs % checkpoint_freq == 0:
                torch.save(
                    {
                        "epoch": epoch,
                        "model": model.state_dict(),
                        "meta_learner": meta_learner.state_dict(),
                        "best_score": early_stopping2.best_score,
                        "counter_stopping": early_stopping2.counter
                    }, checkpoint_file)

            if early_stopping.early_stop:
                print("Early stopping")
                break

        print("hallo")
        model.load_state_dict(torch.load(save_model_file_encoder))
        model2.load_state_dict(
            torch.load(save_model_file_)["model_state_dict"])
        meta_learner = MetaLearner(model2, optimizer, fast_lr, loss_func,
                                   first_order, n_inner_iter,
                                   inner_loop_grad_clip, device)

        validation_error = test(validation_data_ML,
                                meta_learner,
                                model,
                                device,
                                noise_level=0.0)
        test_error = test(test_data_ML,
                          meta_learner,
                          model,
                          device,
                          noise_level=0.0)

        validation_error_h1 = test(validation_data_ML,
                                   meta_learner,
                                   model,
                                   device,
                                   noise_level=0.0,
                                   horizon=1)
        test_error_h1 = test(test_data_ML,
                             meta_learner,
                             model,
                             device,
                             noise_level=0.0,
                             horizon=1)

        model.load_state_dict(torch.load(save_model_file_encoder))
        model2.load_state_dict(
            torch.load(save_model_file_)["model_state_dict"])
        meta_learner2 = MetaLearner(model2, optimizer, fast_lr, loss_func,
                                    first_order, 0, inner_loop_grad_clip,
                                    device)

        validation_error_h0 = test(validation_data_ML,
                                   meta_learner2,
                                   model,
                                   device,
                                   noise_level=0.0,
                                   horizon=1)
        test_error_h0 = test(test_data_ML,
                             meta_learner2,
                             model,
                             device,
                             noise_level=0.0,
                             horizon=1)

        model.load_state_dict(torch.load(save_model_file_encoder))
        model2.load_state_dict(
            torch.load(save_model_file_)["model_state_dict"])
        meta_learner2 = MetaLearner(model2, optimizer, fast_lr, loss_func,
                                    first_order, n_inner_iter,
                                    inner_loop_grad_clip, device)
        validation_error_mae = test(validation_data_ML, meta_learner2, model,
                                    device, 0.0)
        test_error_mae = test(test_data_ML, meta_learner2, model, device, 0.0)
        print("test_error_mae", test_error_mae)

        with open(output_directory + "/results2.txt", "a+") as f:
            f.write("Test error: %f \n" % test_error)
            f.write("Validation error: %f \n" % validation_error)
            f.write("Test error h1: %f \n" % test_error_h1)
            f.write("Validation error h1: %f \n" % validation_error_h1)
            f.write("Test error h0: %f \n" % test_error_h0)
            f.write("Validation error h0: %f \n" % validation_error_h0)
            f.write("Test error mae: %f \n" % test_error_mae)
            f.write("Validation error mae: %f \n" % validation_error_mae)

        print(test_error)
        print(validation_error)
示例#30
0
def train(args):

    torch.manual_seed(123)
    torch.cuda.manual_seed(123)
    np.random.seed(123)
    random.seed(123)
    torch.backends.cudnn.enabled = False
    torch.backends.cudnn.deterministic = True

    console = Console()

    opt1 = optim.AdamW(
        params=args.model.parameters(),
        lr=args.learning_rate,
        weight_decay=args.weight_decay,
    )

    if args.use_count:
        pass
    else:
        # criterion1 = nn.BCEWithLogitsLoss(reduction='mean')  # input: logit, target \in {0, 1}.
        criterion1 = nn.MSELoss(reduction='mean')

    writer = SummaryWriter(f'./runs/{args.experiment}')
    early_stopping = EarlyStopping(patience=10,
                                   verbose=False,
                                   path=f'./parameter/{args.experiment}.pth')

    steps_per_epoch = len(args.train_loader)
    for epoch in range(1, args.epochs + 1):

        total_loss = 0
        args.model.train()

        with tqdm(total=steps_per_epoch, leave=False,
                  dynamic_ncols=True) as pbar:
            for i, batch in enumerate(args.train_loader):

                x = batch['input'].to(args.device)
                y = batch['target'].to(args.device)

                opt1.zero_grad()

                pred = args.model(x)

                recon_loss = criterion1(pred, y)
                recon_loss.backward()
                opt1.step()

                # opt2.zero_grad()
                # pred, z_fake = args.model(x)
                # z_real = Normal(loc = torch.zeros_like(z_fake), scale=1).sample()
                # c_fake_loss = criterion2(args.model.C(z_fake), y_fake)
                # c_real_loss = criterion2(args.model.C(z_real), y_real)
                # c_loss = 0.5 * (c_fake_loss + c_real_loss)
                # c_loss.backward()
                # nn.utils.clip_grad_norm_(args.model.C.parameters(), 1.)
                # opt2.step()

                pbar.update(1)

                # train_G_adv_loss += g_adv_loss.item()
                # train_G_recon_loss += recon_loss.item()
                # train_C_fake_loss += c_fake_loss.item()
                # train_C_real_loss += c_real_loss.item()

                total_loss += recon_loss

            # avg_adv_loss = train_G_adv_loss / steps_per_epoch
            # avg_recon_loss = train_G_recon_loss / steps_per_epoch
            avg_recon_loss = total_loss / steps_per_epoch
            # avg_fake_loss = train_C_fake_loss / steps_per_epoch
            # avg_real_loss = train_C_real_loss / steps_per_epoch

            # early_stopping(avg_recon_loss, args.model)
            early_stopping(avg_recon_loss, args.model)

            if early_stopping.early_stop:
                print('Early stopping')
                break

            console.print(f"Train [{epoch:>04}]/[{args.epochs:>04}]: ",
                          end='',
                          style="Bold Cyan")
            # console.print(f"adv_loss:{avg_adv_loss:.4f}", sep=' | ', style='Bold Blue')
            console.print(f"recon_loss:{avg_recon_loss:.4f}",
                          sep=' | ',
                          style='Bold Blue')
            # console.print(f"fake_loss:{avg_fake_loss:.4f}", sep=' | ', style='Bold Blue')
            # console.print(f"real_loss:{avg_real_loss:.4f}", sep=' | ', style='Bold Blue')

            # writer.add_scalar(tag='adv_loss', scalar_value=avg_adv_loss, global_step=epoch)
            writer.add_scalar(tag='recon_loss',
                              scalar_value=avg_recon_loss,
                              global_step=epoch)
            # writer.add_scalar(tag='fake_loss', scalar_value=avg_fake_loss, global_step=epoch)
            # writer.add_scalar(tag='real_loss', scalar_value=avg_real_loss, global_step=epoch)

            if epoch % 10 == 0:
                torch.save(
                    args.model.state_dict(),
                    os.path.join("D:\프로젝트\메타플레이\jinsoo\parameter",
                                 f"{args.experiment}_epoch_{epoch:04d}.pt"))