def train(Ps, dataloader, encoder, decoder, criterion, optimizer): r""" Performs one epoch's training. :param train_loader: DataLoader for training data :param encoder: encoder model :param decoder: decoder model :param criterion: loss layer :param optimizer: optimizer to update encoder and decoder 's weights (if fine-tuning) """ encoder.train() decoder.train() loss_total = 0.0 acc_total = 0; acc_ref = 0 for i, (imgs, caps, caplens) in enumerate(dataloader, 1): # Forward prop imgs = encoder(imgs) scores, caps_sorted, decode_lengths, alphas, sort_idx = decoder(imgs, caps, caplens) # Since we decoded starting with <start>, the targets are all words after <start>, up to <end> targets = caps_sorted[:, 1:] # Remove timesteps that we didn't decode at, or are pads # pack_padded_sequence is an easy trick to do this scores, _ = torch.nn.utils.rnn.pack_padded_sequence(scores, decode_lengths, batch_first=True) targets, _ = torch.nn.utils.rnn.pack_padded_sequence(targets, decode_lengths, batch_first=True) # Calculate loss loss = criterion(scores, targets) # Add doubly stochastic attention regularization loss += Ps["alpha_c"] * ((1. - alphas.sum(dim=1)) ** 2).mean() # Back prop optimizer.zero_grad() loss.backward() #Clip gradients if Ps["grad_clip"] is not None: clip_gradient(optimizer, Ps["grad_clip"]) # Update weights optimizer.step() # accumulate accuracy top5 = accuracy(scores, targets, 5) acc_total += top5[0]; acc_ref += top5[1] # accumulate loss loss_total += loss.item() loss_final = loss_total / i acc_final = acc_total / acc_ref * 100. return loss_final, acc_final
def validate(val_loader, model, criterion): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): if config.gpu is not None: input = input.cuda(config.gpu, non_blocking=True) target = target.cuda(config.gpu, non_blocking=True) # compute output output = model(input) loss_input = [output, target] if config.experiments[0] == 'fisher': loss_input.append(model) loss = criterion(*loss_input) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() logger.val_batch_log(top1.avg, losses.avg) if i % config.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return top1.avg
def validate(Ps, dataloader, encoder, decoder, criterion): r""" Performs one epoch's validation. :param val_loader: DataLoader for validation data. :param encoder: encoder model :param decoder: decoder model :param criterion: loss layer """ encoder.eval() decoder.eval() loss_total = 0.0 acc_total = 0; acc_ref = 0 with torch.no_grad(): for i, (imgs, caps, caplens, allcaps) in enumerate(dataloader, 1): # Forward prop imgs = encoder(imgs) scores, caps_sorted, decode_lengths, alphas, sort_idx = decoder(imgs, caps, caplens) # Since we decoded starting with <start>, the targets are all words after <start>, up to <end> targets = caps_sorted[:, 1:] # Remove timesteps that we didn't decode at, or are pads # pack_padded_sequence is an easy trick to do this scores_copy = scores.clone() scores, _ = torch.nn.utils.rnn.pack_padded_sequence(scores, decode_lengths, batch_first=True) targets, _ = torch.nn.utils.rnn.pack_padded_sequence(targets, decode_lengths, batch_first=True) # Calculate loss loss = criterion(scores, targets) # Add doubly stochastic attention regularization loss += Ps["alpha_c"] * ((1. - alphas.sum(dim=1)) ** 2).mean() # accumulate accuracy top5 = accuracy(scores, targets, 5) acc_total += top5[0]; acc_ref += top5[1] # accumulate loss loss_total += loss.item() loss_final = loss_total / i acc_final = acc_total / acc_ref * 100. return loss_final, acc_final
def kfold(X, Y, k, clf): X = np.array(X) Y = np.array(Y).reshape(-1, 1) indexes = np.arange(X.shape[0]) slices = [] aux1 = 0 aux2 = int(X.shape[0] / k) for i in range(k): if i == k - 1: _slice = np.isin(indexes, indexes[aux1:]) else: _slice = np.isin(indexes, indexes[aux1:(aux1 + aux2)]) slices.append(_slice) aux1 += aux2 accuracies = [] for s in slices: x_train = X[~s] y_train = Y[~s] x_test = X[s] y_test = Y[s] clf.fit(x_train, y_train) y_predict = clf.predict(x_test) accuracies.append(accuracy(y_test, y_predict)) return np.mean(accuracies)
def train(self, X, y, cv, test_set, cv_y, test_y): self.clear() col = self.config['input_size'] cv = np.reshape(cv, (len(cv)/col, col)) test_set = np.reshape(test_set, (len(test_set)/col, col)) m = 0 X, mu, sigma = normalize(X) # save X, mu and sigma np.savetxt('X.csv', X, delimiter=',') np.savetxt('mu.csv', mu, delimiter=',') np.savetxt('sigma.csv', sigma, delimiter=',') self.nn_params = optimize.fmin_cg(f, self.nn_params, args=(self, X, y), maxiter=50, fprime=fprime) # save nn_parameters hidden_size = self.config['hidden_size'] input_size = self.config['input_size'] num_labels = self.config['num_labels'] theta1 = self.nn_params[:((hidden_size) * (input_size + 1))].reshape( (hidden_size, input_size + 1)) theta2 = self.nn_params[((hidden_size) * (input_size + 1)):].reshape( (num_labels, hidden_size + 1)) np.savetxt('Theta1.csv', theta1, delimiter=',') np.savetxt('Theta2.csv', theta2, delimiter=',') #test the model p = predict1(theta1, theta2, X) _accuracy = accuracy(p, y) l_fscores = list_of_fscores(p, y, num_labels) fscore = total_fscore(l_fscores) #test the model on cross validation set fscores_cv = 0.0 accuracy_cv = 0.0 l_fscores_cv = 0.0 no = 0 X1, mu1, sigma1 = normalize(np.array(cv), mu, sigma) p_cv = predict1(theta1, theta2, X1) accuracy_cv = accuracy(p_cv, cv_y) l_fscores_cv = list_of_fscores(p_cv, cv_y, num_labels) fscores_cv = total_fscore(l_fscores_cv) #test the model on test set X_test, mu_test, sigma_test = normalize(np.array(test_set), mu, sigma) p_test = predict1(theta1, theta2, X_test) accuracy_test = accuracy(p_test, test_y) l_fscores_test = list_of_fscores(p_test, test_y, num_labels) fscore_test = total_fscore(l_fscores_test) print 'here are the shizz results:' print 'fscores in cross validation:' print fscores_cv, ' ', accuracy_cv print 'fscores in test set: ', fscore_test print 'accuracy_test: ', accuracy_test return self.nn_cfx(X, y, self.nn_params), self.nn_params, fscores_cv, fscore_test
def train(train_loaders, model, criterion, optimizer, epoch, scheduled_actions=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, batches in enumerate(zip(*train_loaders)): for b_id, (input, target) in enumerate(batches): # measure data loading time n_iter = (epoch * len(batches)) + i data_time.update(time.time() - end) if config.gpu is not None: input = input.cuda(config.gpu, non_blocking=True) target = target.cuda(config.gpu, non_blocking=True) # compute output output = model(input) loss_input = [output, target] if config.experiments[0] == 'fisher': loss_input.append(model) loss = criterion(*loss_input) if config.experiments[0] == 'fisher': criterion.swap_task() # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) if b_id == 1: logger.val_batch_log(prec1, loss, fisher=True) if i % 10 == 0 and i < 30: pass # make_full_embedding(model, n_iter) continue losses.update(loss.item(), input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() clip_and_track_grad(model, config) # clip_grad_value_(model.parameters(), config.grad_clip) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() logger.train_batch_log(model, top1.avg, losses.avg) if i % config.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loaders[0]), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) if scheduled_actions: next(scheduled_actions)