def train(trainloader, model, criterion, optimizer, epoch, cuda=False, compute_step_variance=False): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for batch_idx, (inputs, targets) in enumerate(trainloader): # measure data loading time data_time.update(time.time() - end) # found this that suggest changing `async` to `non_blocking`: https://github.com/quark0/darts/pull/25 if cuda: inputs, targets = inputs.cuda(), targets.cuda(non_blocking=True) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress progress_str = 'Loss: %.3f | Acc: %.3f%% (%d/%d)'\ % (losses.avg, top1.avg, top1.sum, top1.count) progress_bar(batch_idx, len(trainloader), progress_str) iteration = epoch * len(trainloader) + batch_idx track.metric(iteration=iteration, epoch=epoch, avg_train_loss=losses.avg, avg_train_acc=top1.avg, cur_train_loss=loss.item(), cur_train_acc=prec1.item()) return (losses.avg, top1.avg)
def tsne_embeddings(vecs, train_iters, batch_size, perplexity=30, cuda=False): track.debug("[track]\tComputing image densities PMF") densities = _compute_densities(vecs, perplexity=perplexity) i, j = np.indices(densities.shape) i = i.ravel() j = j.ravel() track.debug("[track]\tTraining the TSNE embedding") tsne = TSNE(len(densities), 2, 2) # visualize in 2d tsne_train_wrapper = Wrapper(tsne, batchsize=batch_size, cuda=cuda) for k in range(train_iters): # plot progress progress_bar(k, train_iters) tsne_train_wrapper.fit(densities, i, j) return tsne.logits.weight.detach().cpu().numpy()
def test(testloader, model, criterion, epoch, cuda=False): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) if cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs = torch.autograd.Variable(inputs, volatile=True) targets = torch.autograd.Variable(targets, volatile=True) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress progress_str = 'Loss: %.3f | Acc: %.3f%% (%d/%d)'\ % (losses.avg, top1.avg, top1.sum, top1.count) progress_bar(batch_idx, len(testloader), progress_str) track.metric(iteration=0, epoch=epoch, avg_test_loss=losses.avg, avg_test_acc=top1.avg) return (losses.avg, top1.avg)
def train(trainloader, model, optimizer, epoch, cuda=True): # switch to train mode model.train() hyperparameters = model.hp mode = model.mode if type(model) is nn.DataParallel: inp_dim = model.module.inp_dim pw_ph = model.module.pw_ph cx_cy = model.module.cx_cy stride = model.module.stride else: inp_dim = model.inp_dim pw_ph = model.pw_ph cx_cy = model.cx_cy stride = model.stride if cuda: pw_ph = pw_ph.cuda() cx_cy = cx_cy.cuda() stride = stride.cuda() batch_time = AverageMeter() data_time = AverageMeter() avg_loss = AverageMeter() avg_iou = AverageMeter() avg_conf = AverageMeter() avg_no_conf = AverageMeter() avg_pos = AverageMeter() avg_neg = AverageMeter() end = time.time() break_flag = 0 if mode['show_temp_summary'] == True: writer = SummaryWriter(os.path.join(track.trial_dir(), 'temp_vis/')) for batch_idx, (inputs, targets) in enumerate(trainloader): # measure data loading time data_time.update(time.time() - end) if cuda: inputs = inputs.cuda() # compute output raw_pred = model(inputs, torch.cuda.is_available()) true_pred = util.transform(raw_pred.clone().detach(), pw_ph, cx_cy, stride) iou_list = util.get_iou_list(true_pred, targets, hyperparameters, inp_dim) resp_raw_pred, resp_cx_cy, resp_pw_ph, resp_stride, no_obj = util.build_tensors( raw_pred, iou_list, pw_ph, cx_cy, stride, hyperparameters) stats = helper.get_progress_stats(true_pred, no_obj, iou_list, targets) if hyperparameters['wasserstein'] == True: no_obj = util.get_wasserstein_matrices(raw_pred, iou_list, inp_dim) try: loss = util.yolo_loss(resp_raw_pred, targets, no_obj, resp_pw_ph, resp_cx_cy, resp_stride, inp_dim, hyperparameters) except RuntimeError: print('bayes opt failed') break_flag = 1 break # measure accuracy and record loss avg_loss.update(loss.item()) avg_iou.update(stats['iou']) avg_conf.update(stats['pos_conf']) avg_no_conf.update(stats['neg_conf']) avg_pos.update(stats['pos_class']) avg_neg.update(stats['neg_class']) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if mode['show_output'] == True: # plot progress progress_str = 'Loss: %.4f | AvIoU: %.3f | AvPConf: %.3f | AvNConf: %.5f | AvClass: %.3f | AvNClass: %.5f'\ % (loss.item(), stats['iou'], stats['pos_conf'], stats['neg_conf'],stats['pos_class'],stats['neg_class']) progress_bar(batch_idx, len(trainloader), progress_str) iteration = epoch * len(trainloader) + batch_idx if mode['show_temp_summary'] == True: writer.add_scalar('AvLoss/train', avg_loss.avg, iteration) writer.add_scalar('AvIoU/train', avg_iou.avg, iteration) writer.add_scalar('AvPConf/train', avg_conf.avg, iteration) writer.add_scalar('AvNConf/train', avg_no_conf.avg, iteration) writer.add_scalar('AvClass/train', avg_pos.avg, iteration) writer.add_scalar('AvNClass/train', avg_neg.avg, iteration) track.metric(iteration=iteration, epoch=epoch, avg_train_loss=avg_loss.avg, avg_train_iou=avg_iou.avg, avg_train_conf=avg_conf.avg, avg_train_neg_conf=avg_no_conf.avg, avg_train_pos=avg_pos.avg, avg_train_neg=avg_neg.avg) outcome = { 'avg_loss': avg_loss.avg, 'avg_iou': avg_iou.avg, 'avg_pos': avg_pos.avg, 'avg_neg': avg_neg.avg, 'avg_conf': avg_conf.avg, 'avg_no_conf': avg_no_conf.avg, 'broken': break_flag } return outcome
def run(ensemble, proj_df, dataroot='./data', batch_size=128, cuda=False, class_ind=0, num_batches=4, tsne_train_iters=4000, **kwargs): """ let's do some dimensionality reduction """ track.debug("[tsne] starting experiment with class %d" % class_ind) trainloader, testloader = build_single_class_dataset( 'cifar10', class_ind=2, dataroot=dataroot, batch_size=batch_size, eval_batch_size=batch_size, num_workers=2) # stores for any loader; we have to copy these to the last two dicts train_activations = {} labels = [] track.debug("[tsne] starting forward passes") ensemble.models = ensemble.models[0::4] # plot every 4 epochs for now for model_ind, model in enumerate(ensemble.models): # plot progress progress_bar(model_ind, len(ensemble.models)) model_activations = [] # this hook will aggregate a list of model outputs in `activations` model.linear.register_forward_pre_hook( _create_preactivation_hook(model_activations)) with torch.no_grad(): for inputs, _ in islice(trainloader, 0, num_batches): model(inputs) train_activations[model_ind] = torch.cat(model_activations) labels.extend([model_ind] * len(train_activations[model_ind])) # now, we have all activations for all models! we can do tsne track.debug("[tsne] forward pass done! starting stacking + embedding") all_train_activations = torch.cat( [vec for vec in train_activations.values()]) embedding = tsne_embeddings(all_train_activations, tsne_train_iters, batch_size=len(all_train_activations), cuda=cuda) f = plt.figure() # create labels for the models by iteration y = np.array(labels) plt.scatter(embedding[:, 0], embedding[:, 1], c=y * 1.0 / y.max()) # plot the model means too model_means = [] num_model_vecs = len(list(train_activations.values())[0]) endpoints = [] start = 0 for stop in range(0, len(embedding), num_model_vecs): if stop - start > 0: endpoints.append((start, stop)) start = stop for start, stop in endpoints: model_means.append(embedding[start:stop, :].mean(axis=0)) model_means = np.array(model_means) ys = np.array(list(range(len(model_means)))) / float(len(model_means)) plt.scatter(model_means[:, 0], model_means[:, 1], c=ys, s=100, linewidth=2, edgecolors='black', marker='D') plt.axis('off') plt.savefig('/Users/noah/Dev/SGLD/embeddings.png', bbox_inches='tight') plt.close(f) track.debug("[tsne] done! saved to embeddings.jpg")
def test(testloader, model, criterion, epoch, cuda=False, metric=True, criterion_has_labels=True, compute_acc=True): """ criterion = torch.nn.Loss instance. criterion_has_labels (bool): if true, the above criterion is called as criterion(outputs, labels). otherwise, just criterion(outputs). returns (test_loss, test_acc) if compute_acc is True otherwise, returns test_loss alone """ batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) if cuda: inputs, targets = inputs.cuda(), targets.cuda() with torch.no_grad(): # compute output outputs = model(inputs) if criterion_has_labels: loss = criterion(outputs, targets) else: loss = criterion(outputs) # measure accuracy and record loss losses.update(loss.item(), inputs.size(0)) if compute_acc: prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress if compute_acc: progress_str = 'Loss: %.3f | Acc: %.3f%% (%d/%d)'\ % (losses.avg, top1.avg, top1.sum, top1.count) else: progress_str = 'Loss: %.3f (%d/%d)'\ % (losses.avg, batch_idx*inputs.size(0), losses.count) progress_bar(batch_idx, len(testloader), progress_str) if metric: track.metric(iteration=0, epoch=epoch, avg_test_loss=losses.avg, avg_test_acc=top1.avg) if compute_acc: return (losses.avg, top1.avg) else: return losses.avg
def train(trainloader, model, criterion, optimizer, epoch, cuda=False, num_chunks=4): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for batch_idx, (all_inputs, all_targets) in enumerate(trainloader): # measure data loading time data_time.update(time.time() - end) # do mini-mini-batching for large batch sizes xs = all_inputs.chunk(num_chunks) ys = all_targets.chunk(num_chunks) optimizer.zero_grad() batch_prec1 = 0.0 batch_loss = 0.0 for (inputs, targets) in zip(xs, ys): if cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) # compute output outputs = model(inputs) mini_loss = criterion(outputs, targets) / num_chunks batch_loss += mini_loss.item() mini_loss.backward() # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) batch_prec1 += prec1.item() / num_chunks losses.update(num_chunks * mini_loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.step(epoch) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress progress_str = 'Loss: %.3f | Acc: %.3f%% (%d/%d)'\ % (losses.avg, top1.avg, top1.sum, top1.count) progress_bar(batch_idx, len(trainloader), progress_str) iteration = epoch * len(trainloader) + batch_idx track.metric(iteration=iteration, epoch=epoch, avg_train_loss=losses.avg, avg_train_acc=top1.avg, cur_train_loss=batch_loss, cur_train_acc=batch_prec1) return (losses.avg, top1.avg)