def evaluate(loader, model, criterion): """Evaluate the model on dataset of the loader""" losses = AverageMeter() accuracies = AverageMeter() model.eval() # put model to evaluation mode with torch.no_grad(): for t, data in enumerate(loader): x = data['image'].to(device=device, dtype=dtype) # move to device, e.g. GPU y = data['target'].to(device=device, dtype=torch.long) scores = model(x) loss = criterion(scores, y) # DEBUG logging.info('Val loss = %.4f' % loss.item()) _, preds = scores.max(1) accuracy = (y == preds).float().mean() losses.update(loss.item(), x.size(0)) accuracies.update( accuracy.item(), 1) # average already taken for accuracy for each pixel return losses.avg, accuracies.avg
def class_inference(dataloader, exp_dir, model, n_classes, batch_size, print_freq=10, score=False, class_nms=None, tile_predict=False, gpu=False): """Perform class inference on the dataset""" batch_time = AverageMeter() if gpu: model = model.cuda() # switch to evaluate mode model.eval() if score: score_metrics = runningScore(n_classes, class_nms) end = time.time() for i, vals in enumerate(dataloader): image_ids = vals[0] image_ids = image_ids.numpy() img = vals[1] if gpu: img = img.cuda() if score: # we will need ground truth to score target = vals[2] class_mask = target[:, :n_classes, :, :] with torch.no_grad(): if tile_predict: output = model.tile_predict(img, n_classes) else: output = model(img) output = F.sigmoid(output) if score: score_metrics.update(output, class_mask) outdir = '{}/npy'.format(exp_dir) if not os.path.exists(outdir): os.makedirs(outdir) for k, image_id in enumerate(image_ids): save(output[k], outdir, str(image_id), suffix='class') # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Val: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'. format(i, len(dataloader), batch_time=batch_time)) if score: score, class_iou = score_metrics.get_scores() score_metrics.print_stat() if score: score, class_iou = score_metrics.get_scores() score_metrics.print_stat()
def offset_inference(dataloader, exp_dir, model, offset_list, batch_size, print_freq=10, score=False, gpu=False): """Perform offset inference on the dataset""" batch_time = AverageMeter() n_offsets = len(offset_list) if gpu: model = model.cuda() # switch to evaluate mode model.eval() if score: offset_metrics = offsetIoU(offset_list) end = time.time() for i, vals in enumerate(dataloader): image_ids = vals[0] image_ids = image_ids.numpy() img = vals[1] if gpu: img = img.cuda() if score: target = vals[2] bound_mask = target[:, -n_offsets:, :, :] with torch.no_grad(): output = model(img) output = F.sigmoid(output) if score: offset_metrics.update(output, bound_mask) outdir = '{}/npy'.format(exp_dir) if not os.path.exists(outdir): os.makedirs(outdir) for k, image_id in enumerate(image_ids): save(output[k], outdir, str(image_id), suffix='offset') # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Val: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'. format(i, len(dataloader), batch_time=batch_time)) if score: iou, mean_iou = offset_metrics.get_scores() offset_metrics.print_stat() if score: iou, mean_iou = offset_metrics.get_scores() offset_metrics.print_stat()
def validate(val_loader, model, embedding, criterion, epoch): losses = AverageMeter() accuracies = AverageMeter() num_class = args.classes_per_it_val num_support = args.num_support_val num_query = args.num_query_val total_epoch = len(val_loader) * (epoch - 1) model.eval() embedding.eval() for i, data in enumerate(val_loader): x, y = data[0].to(device), data[1].to(device) x_support, x_query, y_support, y_query = split_support_query_set( x, y, num_class, num_support, num_query) support_vector = embedding(x_support) query_vector = embedding(x_query) _size = support_vector.size() support_vector = support_vector.view(num_class, num_support, _size[1], _size[2], _size[3]).sum(dim=1) support_vector = support_vector.repeat(num_class * num_query, 1, 1, 1) query_vector = torch.stack( [x for x in query_vector for _ in range(num_class)]) _concat = torch.cat((support_vector, query_vector), dim=1) y_pred = model(_concat).view(-1, num_class) y_one_hot = torch.zeros(num_query * num_class, num_class).to(device).scatter_( 1, y_query.unsqueeze(1), 1) loss = criterion(y_pred, y_one_hot) losses.update(loss.item(), y_pred.size(0)) y_hat = y_pred.argmax(1) accuracy = y_hat.eq(y_query).float().mean() accuracies.update(accuracy) if i == 0: y_hat = y_pred.argmax(1) writer.add_figure('y_prediction vs. y/Val', plot_classes_preds(y_hat, y_pred, [x_support, x_query], [y_support, y_query], num_class, num_support, num_query), global_step=total_epoch) writer.add_scalar("Loss/Val", loss.item(), total_epoch + i) writer.add_scalar("Acc/Val", accuracy, total_epoch + i) return losses.avg, accuracies.avg
def train(train_loader, model, embedding, model_optimizer, embed_optimizer, criterion, epoch): losses = AverageMeter() num_class = args.classes_per_it_tr num_support = args.num_support_tr num_query = args.num_query_tr total_epoch = len(train_loader) * (epoch - 1) model.train() embedding.train() for i, data in enumerate(train_loader): x, y = data[0].to(device), data[1].to(device) x_support, x_query, y_support, y_query = split_support_query_set( x, y, num_class, num_support, num_query) support_vector = embedding(x_support) query_vector = embedding(x_query) _size = support_vector.size() support_vector = support_vector.view(num_class, num_support, _size[1], _size[2], _size[3]).sum(dim=1) support_vector = support_vector.repeat(num_class * num_query, 1, 1, 1) query_vector = torch.stack( [x for x in query_vector for _ in range(num_class)]) _concat = torch.cat((support_vector, query_vector), dim=1) y_pred = model(_concat).view(-1, num_class) y_one_hot = torch.zeros(num_query * num_class, num_class).to(device).scatter_( 1, y_query.unsqueeze(1), 1) loss = criterion(y_pred, y_one_hot) losses.update(loss.item(), y_pred.size(0)) model_optimizer.zero_grad() embed_optimizer.zero_grad() loss.backward() model_optimizer.step() embed_optimizer.step() if i == 0: y_hat = y_pred.argmax(1) writer.add_figure('y_prediction vs. y/Train', plot_classes_preds(y_hat, y_pred, [x_support, x_query], [y_support, y_query], num_class, num_support, num_query), global_step=total_epoch) writer.add_scalar("Loss/Train", loss.item(), total_epoch + i) return losses.avg
def train(train_loader, model, criterion, optimizer, epoch): """ Interface for training Args: train_loader: data loader model: torch.nn.Module criterion: loss function optimizer: epoch: current epoch Returns: dict: average loss and accuracy """ batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() # switch to train mode model.train() toc = time.time() for batch_ind, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - toc) input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1 = calc_accuracy(output, target, topk=(1,)) losses.update(loss.item(), input.size(0)) top1.update(prec1[0].item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - toc) toc = time.time() if batch_ind % args.log_interval == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, batch_ind, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1)) return {'loss': losses.avg, 'acc': top1.avg}
def predicate(data_loader, feature_extractor, output_path=None): batch_time = AverageMeter() model = feature_extractor.model outputs_dict = dict() # switch to evaluate mode model.eval() with torch.no_grad(): toc = time.time() for batch_ind, (input, _) in enumerate(data_loader): input = input.cuda(non_blocking=True) # forward to get intermediate outputs _ = model(input) # synchronize so that everything is calculated torch.cuda.synchronize() # print(feature_extractor.target_outputs) for target_layer, target_output in feature_extractor.target_outputs.items( ): if target_layer in outputs_dict: outputs_dict[target_layer].append( target_output.data.numpy()) else: outputs_dict[target_layer] = [target_output.data.numpy()] # measure elapsed time batch_time.update(time.time() - toc) toc = time.time() if batch_ind % args.log_interval == 0: print('Predicate: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'. format(batch_ind, len(data_loader), batch_time=batch_time)) if output_path is not None: def _squeeze_dict(d): for key, val in d.items(): d[key] = np.concatenate(val, 0) return d outputs_dict = _squeeze_dict(outputs_dict) np.savez_compressed(output_path, **outputs_dict)
def validate(val_loader, model): """ Interface for validating model Args: val_loader: data loader model: nn.Module Returns: number: top1 accuracy """ batch_time = AverageMeter() top1 = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): toc = time.time() for batch_ind, (input, target) in enumerate(val_loader): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(input) # measure accuracy and record loss prec1 = calc_accuracy(output, target, topk=(1, )) top1.update(prec1[0].item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - toc) toc = time.time() if batch_ind % args.log_interval == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'.format( batch_ind, len(val_loader), batch_time=batch_time, top1=top1)) # print(' * Prec@1 {top1.avg:.3f}'.format(top1=top1)) return top1.avg
def validate(val_loader, model, criterion, epoch): losses = AverageMeter() top1 = AverageMeter() num_support = args.num_support_val total_epoch = len(val_loader) * (epoch - 1) # switch to evaluate mode model.eval() for i, data in enumerate(val_loader): x, y = data[0].to(device), data[1].to(device) y_pred = model(x) loss, acc1 = criterion(y_pred, y, num_support) losses.update(loss.item(), x.size(0)) top1.update(acc1.item(), x.size(0)) writer.add_scalar("Loss/Val", loss.item(), total_epoch + i) writer.add_scalar("Acc/Val", acc1.item(), total_epoch + i) return losses.avg, top1.avg
def train(train_loader, model, optimizer, criterion, epoch): losses = AverageMeter() total_epoch = len(train_loader) * (epoch - 1) model.train() model.custom_train() for i, data in enumerate(train_loader): x, _y = data[0].to(device), data[1].to(device) y_pred, y = model(x, _y) loss = criterion(y_pred, y) losses.update(loss.item(), y_pred.size(0)) optimizer.zero_grad() loss.backward() clip_grad_norm_(model.parameters(), 1) optimizer.step() if i == 0: num_class = args.classes_per_it_tr num_support = args.num_support_tr num_query = args.num_query_tr x_support, x_query, y_support, y_query = split_support_query_set( x, _y, num_class, num_support, num_query) y_hat = y_pred.argmax(1) writer.add_figure('y_prediction vs. y/Train', plot_classes_preds(y_hat, y_pred, [x_support, x_query], [y_support, y_query], num_class, num_support, num_query), global_step=total_epoch) writer.add_scalar("Loss/Train", loss.item(), total_epoch + i) return losses.avg
def validate(val_loader, model, criterion, epoch): losses = AverageMeter() accuracies = AverageMeter() total_epoch = len(val_loader) * (epoch - 1) model.eval() model.custom_eval() for i, data in enumerate(val_loader): x, _y = data[0].to(device), data[1].to(device) y_pred, y = model(x, _y) loss = criterion(y_pred, y) acc = y_pred.argmax(dim=1).eq(y).float().mean() losses.update(loss.item(), y_pred.size(0)) accuracies.update(acc.item(), y_pred.size(0)) if i == 0: num_class = args.classes_per_it_val num_support = args.num_support_val num_query = args.num_query_val x_support, x_query, y_support, y_query = split_support_query_set( x, _y, num_class, num_support, num_query) y_hat = y_pred.argmax(1) writer.add_figure('y_prediction vs. y/Val', plot_classes_preds(y_hat, y_pred, [x_support, x_query], [y_support, y_query], num_class, num_support, num_query), global_step=total_epoch) writer.add_scalar("Loss/Val", loss.item(), total_epoch + i) writer.add_scalar("Acc/Val", acc.item(), total_epoch + i) return losses.avg, accuracies.avg
def validate(val_loader, model, criterion, args): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (video, audio, target) in enumerate(val_loader): if args.gpu is not None: video = video.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(video) output = output.view(-1, args.clip_per_video, args.num_class) target = target.view(-1, args.clip_per_video) output = torch.mean(output, dim=1) # make sure 10 clips belong to the same video for j in range(1, args.clip_per_video): assert all(target[:, 0] == target[:, j]) target = target[:, 0] loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), video.size(0)) top1.update(acc1[0], video.size(0)) top5.update(acc5[0], video.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) # TODO: this should also be done with the ProgressMeter print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) return losses.avg, top1.avg, top5.avg
def train(train_loader, model, optimizer, criterion, epoch): losses = AverageMeter() num_support = args.num_support_tr total_epoch = len(train_loader) * (epoch - 1) # switch to train mode model.train() for i, data in enumerate(train_loader): x, y = data[0].to(device), data[1].to(device) y_pred = model(x) loss, acc1 = criterion(y_pred, y, num_support) losses.update(loss.item(), x.size(0)) # compute gradient and do optimize step optimizer.zero_grad() loss.backward() optimizer.step() writer.add_scalar("Loss/Train", loss.item(), total_epoch + i) writer.add_scalar("Acc/Train", acc1.item(), total_epoch + i) return losses.avg
def train(self): # Dataloader train_loader, valid_loader = get_train_validation_loader(self.config.data_dir, self.config.batch_size, self.config.num_train, self.config.augment, self.config.way, self.config.valid_trials, self.config.shuffle, self.config.seed, self.config.num_workers, self.config.pin_memory) # Model, Optimizer, criterion model = SiameseNet() if self.config.optimizer == "SGD": optimizer = optim.SGD(model.parameters(), lr=self.config.lr) else: optimizer = optim.Adam(model.parameters()) criterion = torch.nn.BCEWithLogitsLoss() if self.config.use_gpu: model.cuda() # Load check point if self.config.resume: start_epoch, best_epoch, best_valid_acc, model_state, optim_state = self.load_checkpoint(best=False) model.load_state_dict(model_state) optimizer.load_state_dict(optim_state) one_cycle = OneCyclePolicy(optimizer, num_steps=self.config.epochs - start_epoch, lr_range=(self.config.lr, 1e-1), momentum_range=(0.85, 0.95)) else: best_epoch = 0 start_epoch = 0 best_valid_acc = 0 one_cycle = OneCyclePolicy(optimizer, num_steps=self.config.epochs, lr_range=(self.config.lr, 1e-1), momentum_range=(0.85, 0.95)) # create tensorboard summary and add model structure. writer = SummaryWriter(os.path.join(self.config.logs_dir, 'logs'), filename_suffix=self.config.num_model) im1, im2, _ = next(iter(valid_loader)) writer.add_graph(model, [torch.rand((1, 1, 105, 105)).to(self.device), torch.rand(1, 1, 105, 105).to(self.device)]) counter = 0 num_train = len(train_loader) num_valid = len(valid_loader) print( f"[*] Train on {len(train_loader.dataset)} sample pairs, validate on {valid_loader.dataset.trials} trials") # Train & Validation main_pbar = tqdm(range(start_epoch, self.config.epochs), initial=start_epoch, position=0, total=self.config.epochs, desc="Process") for epoch in main_pbar: train_losses = AverageMeter() valid_losses = AverageMeter() # TRAIN model.train() train_pbar = tqdm(enumerate(train_loader), total=num_train, desc="Train", position=1, leave=False) for i, (x1, x2, y) in train_pbar: if self.config.use_gpu: x1, x2, y = x1.to(self.device), x2.to(self.device), y.to(self.device) out = model(x1, x2) loss = criterion(out, y.unsqueeze(1)) # compute gradients and update optimizer.zero_grad() loss.backward() optimizer.step() # store batch statistics train_losses.update(loss.item(), x1.shape[0]) # log loss writer.add_scalar("Loss/Train", train_losses.val, epoch * len(train_loader) + i) train_pbar.set_postfix_str(f"loss: {train_losses.val:0.3f}") one_cycle.step() # VALIDATION model.eval() valid_acc = 0 correct_sum = 0 valid_pbar = tqdm(enumerate(valid_loader), total=num_valid, desc="Valid", position=1, leave=False) with torch.no_grad(): for i, (x1, x2, y) in valid_pbar: if self.config.use_gpu: x1, x2, y = x1.to(self.device), x2.to(self.device), y.to(self.device) # compute log probabilities out = model(x1, x2) loss = criterion(out, y.unsqueeze(1)) y_pred = torch.sigmoid(out) y_pred = torch.argmax(y_pred) if y_pred == 0: correct_sum += 1 # store batch statistics valid_losses.update(loss.item(), x1.shape[0]) # compute acc and log valid_acc = correct_sum / num_valid writer.add_scalar("Loss/Valid", valid_losses.val, epoch * len(valid_loader) + i) valid_pbar.set_postfix_str(f"accuracy: {valid_acc:0.3f}") writer.add_scalar("Acc/Valid", valid_acc, epoch) # check for improvement if valid_acc > best_valid_acc: is_best = True best_valid_acc = valid_acc best_epoch = epoch counter = 0 else: is_best = False counter += 1 # checkpoint the model if counter > self.config.train_patience: print("[!] No improvement in a while, stopping training.") return if is_best or epoch % 5 == 0 or epoch == self.config.epochs: self.save_checkpoint( { 'epoch': epoch, 'model_state': model.state_dict(), 'optim_state': optimizer.state_dict(), 'best_valid_acc': best_valid_acc, 'best_epoch': best_epoch, }, is_best ) main_pbar.set_postfix_str(f"best acc: {best_valid_acc:.3f} best epoch: {best_epoch} ") tqdm.write( f"[{epoch}] train loss: {train_losses.avg:.3f} - valid loss: {valid_losses.avg:.3f} - valid acc: {valid_acc:.3f} {'[BEST]' if is_best else ''}") # release resources writer.close()
def train(train_loader, augmentation_gpu, criterion, G_criterion, netG, netD, optimizer_g, optimizer_d, epoch, args, writer): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses_g = AverageMeter('Loss_G', ':.4f') losses_d = AverageMeter('Loss_D', ':.4f') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses_g, losses_d, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode netG.train() netD.train() end = time.time() loss_g_tmp = 1 for i, (video, audio) in enumerate(train_loader): # measure data loading time # print('========================================') data_time.update(time.time() - end) if args.gpu is not None: video[0] = video[0].cuda(args.gpu, non_blocking=True) video[1] = video[1].cuda(args.gpu, non_blocking=True) video[0] = augmentation_gpu(video[0]) video[1] = augmentation_gpu(video[1]) im_q_fake = netG(video[0]) q_fake, q_real, output, target = netD(im_q_fake, im_q=video[0], im_k=video[1], loss_gan=loss_g_tmp) set_requires_grad([netD], False) # Ds require no gradients when optimizing Gs optimizer_g.zero_grad() # set G_A and G_B's gradients to zero loss_g = -100 * G_criterion(q_fake, q_real) loss_g_tmp = math.exp(loss_g) loss_g.backward(retain_graph=True) optimizer_g.step() # update generator's weights set_requires_grad([netD], True) optimizer_d.zero_grad() # set discriminator's gradients to zero loss_d = criterion(output, target) loss_d.backward() optimizer_d.step() # update D_A and D_B's weights # acc1/acc5 are (K+1)-way contrast classifier accuracy # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses_g.update(loss_g.item(), video[0].size(0)) losses_d.update(loss_d.item(), video[0].size(0)) top1.update(acc1[0], video[0].size(0)) top5.update(acc5[0], video[0].size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) if writer is not None: total_iter = i + epoch * len(train_loader) writer.add_scalar('moco_train/loss', loss_d, total_iter) writer.add_scalar('moco_train/loss', loss_g, total_iter) writer.add_scalar('moco_train/acc1', acc1, total_iter) writer.add_scalar('moco_train/acc5', acc5, total_iter) writer.add_scalar('moco_train_avg/loss_g', losses_g.avg, total_iter) writer.add_scalar('moco_train_avg/loss_d', losses_d.avg, total_iter) writer.add_scalar('moco_train_avg/acc1', top1.avg, total_iter) writer.add_scalar('moco_train_avg/acc5', top5.avg, total_iter)
def validate(val_loader, model, criterion, log_every=1): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') acc = AverageMeter('Acc', ':6.4f') f1 = AverageMeter('F1', ':6.4f') prec = AverageMeter('Prec', ':6.4f') rec = AverageMeter('Recall', ':6.4f') progress = ProgressMeter( len(val_loader), [batch_time, losses, acc, f1, prec, rec], prefix='Test: ') # model.eval() evaluate mode highly decreases performance model.train() correct = 0 error = 0 precision = 0. recall = 0. with torch.no_grad(): end = time.time() for batch_no, (samples, targets) in enumerate(val_loader): # move data to gpu (or cpu if device is unavailable) samples = [t.to(device) for t in samples] targets = targets.squeeze(1).long().to(device) # compute output output = model(samples) # compute loss loss = criterion(output, targets) losses.update(loss.item(), targets.size(0)) # compute f1 score f, (p, r) = f1_score(output, targets.float()) f1.update(f) prec.update(p) rec.update(r) # compute accuracy acc.update(pixel_accuracy(output, targets), targets.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_no % log_every == 0: progress.display(batch_no) return acc.avg
def train(train_loader:DataLoader, model:SegnetConvLSTM, criterion, optimizer, epoch, log_every=1): """ Do a training step, iterating over all batched samples as returned by the DataLoader passed as argument. Various measurements are taken and returned, such as accuracy, loss, precision, recall, f1 and batch time. """ batch_time = AverageMeter('BatchTime', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') acc = AverageMeter('Acc', ':6.4f') f1 = AverageMeter('F1', ':6.4f') prec = AverageMeter('Prec', ':6.4f') rec = AverageMeter('Recall', ':6.4f') progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, acc, f1, prec, rec], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for batch_no, (list_batched_samples, batched_targets) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # move data to gpu (or cpu if device is unavailable) list_batched_samples = [t.to(device) for t in list_batched_samples] batched_targets = batched_targets.long().to(device) # squeeze target channels to compute loss batched_targets = batched_targets.squeeze(1) # compute output output = model(list_batched_samples) # print("Output size:", output.size(), "Target size:", batched_targets.size()) # loss executes Sigmoid inside (efficiently) loss = criterion(output, batched_targets) # print("Train loss value:",loss.item()) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # detach output to compute metrics without storing computational graph output = output.detach() # record loss, dividing by sample size losses.update(loss.item(), batched_targets.size(0)) batched_targets = batched_targets.float() accuracy = pixel_accuracy(output, batched_targets) acc.update(accuracy, batched_targets.size(0)) f, (p, r) = f1_score(output, batched_targets) f1.update(f) prec.update(p) rec.update(r) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_no % log_every == 0: print("Output min", output.min().item(), "Output (softmax-ed) sum:", (output > 0.).float().sum().item(), "Output max:", torch.max(output).item()) print("Targets sum:", batched_targets.sum())#, "Targets max:", torch.max(batched_targets)) print("Base acc:{} - base prec: {}- base recall: {}- base f1: {}". format(pixel_accuracy(output, batched_targets), p, r, f)) progress.display(batch_no) # torch.cuda.empty_cache() return losses.avg, acc.avg, f1.avg
def train(train_loader, model, criterion, optimizer, epoch, args, writer): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) """ Switch to eval mode: Under the protocol of linear classification on frozen features/models, it is not legitimate to change any part of the pre-trained model. BatchNorm in train mode may revise running mean/std (even if it receives no gradient), which are part of the model parameters too. """ model.eval() end = time.time() for i, (video, audio, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: video = video.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(video) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), video.size(0)) top1.update(acc1[0], video.size(0)) top5.update(acc5[0], video.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) if writer is not None: total_iter = i + epoch * len(train_loader) writer.add_scalar('lincls_train/loss', loss, total_iter) writer.add_scalar('lincls_train/acc1', acc1, total_iter) writer.add_scalar('lincls_train/acc5', acc5, total_iter) writer.add_scalar('lincls_train_avg/lr', optimizer.param_groups[0]['lr'], total_iter) writer.add_scalar('lincls_train_avg/loss', losses.avg, total_iter) writer.add_scalar('lincls_train_avg/acc1', top1.avg, total_iter) writer.add_scalar('lincls_train_avg/acc5', top5.avg, total_iter)
def main(): parser = argparse.ArgumentParser(description='Voxelnet for semantic') parser.add_argument('--lr', default=0.001, type=float, help='Initial learning rate') parser.add_argument('--epochs', default=100, help='epochs') parser.add_argument('--batchsize', default=4, help='epochs') parser.add_argument('--weight_file', default='', help='weights to load') parser.add_argument( '--test_area', type=int, default=5, help='Which area to use for test, option: 1-6 [default: 6]') parser.add_argument('--num_point', type=int, default=4096, help='Point number [default: 4096]') args = parser.parse_args() NUM_POINT = args.num_point BATCH_SIZE = args.batchsize lr = args.lr ALL_FILES = getDataFiles('indoor3d_sem_seg_hdf5_data/all_files.txt') room_filelist = [ line.rstrip() for line in open('indoor3d_sem_seg_hdf5_data/room_filelist.txt') ] # Load ALL data data_batch_list = [] label_batch_list = [] for h5_filename in ALL_FILES: data_batch, label_batch = loadDataFile(h5_filename) data_batch_list.append(data_batch) label_batch_list.append(label_batch) data_batches = np.concatenate(data_batch_list, 0) label_batches = np.concatenate(label_batch_list, 0) print(data_batches.shape) print(label_batches.shape) test_area = 'Area_' + str(args.test_area) train_idxs = [] test_idxs = [] for i, room_name in enumerate(room_filelist): if test_area in room_name: test_idxs.append(i) else: train_idxs.append(i) train_data = data_batches[ train_idxs, ...] # ... means ellipsis, the same as [train_idxs, :, :] train_label = label_batches[train_idxs].astype(np.int64) test_data = data_batches[test_idxs, ...] test_label = label_batches[test_idxs].astype(np.int64) print(train_data.shape, train_label.shape) print(test_data.shape, test_label.shape) time_string = datetime.now().strftime('%Y-%m-%d-%H-%M-%S') log_dir = os.path.join('log_ptn/train', test_area + '_' + time_string) if not os.path.exists(log_dir): os.makedirs(log_dir) checkpoint_dir = os.path.join(log_dir, 'checkpoint') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) #writer = SummaryWriter(log_dir=os.path.join( log_dir, 'tensorboard')) start_epoch = 0 epochs = args.epochs model = get_model() model.cuda() # print(model) optimizer = torch.optim.Adam(model.parameters(), lr) criterion = nn.CrossEntropyLoss().cuda() if args.weight_file != '': pre_trained_model = torch.load(args.weight_file) start_epoch = pre_trained_model['epoch'] model_state = model.state_dict() model_state.update(pre_trained_model['state_dict']) model.load_state_dict(model_state) global_counter = 0 for epoch in range(start_epoch, epochs): learn_rate_now = adjust_learning_rate(optimizer, global_counter, BATCH_SIZE, lr) #writer.add_scalar('train/learning_rate', learn_rate_now, global_counter) losses = AverageMeter() top1 = AverageMeter() model.train() train_data_shuffled, train_label_shuffled, _ = shuffle_data( train_data[:, 0:NUM_POINT, :], train_label) file_size = train_data_shuffled.shape[0] num_batches = file_size // BATCH_SIZE for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE feature = train_data_shuffled[start_idx:end_idx, :, :] label = train_label_shuffled[start_idx:end_idx] feature = np.expand_dims(feature, axis=1) input = Variable(torch.from_numpy(feature).cuda(), requires_grad=True) input = torch.transpose(input, 3, 1) target = Variable(torch.from_numpy(label).cuda(), requires_grad=False) target = target.view(-1, ) output = model(input) output_reshaped = output.permute(0, 3, 2, 1).contiguous().view(-1, 13) loss = criterion(output_reshaped, target) prec1 = accuracy(output_reshaped.data, target.data, topk=(1, )) #prec1[0] = prec1[0].cpu().numpy()[0] prec1 = prec1[0].cpu().numpy() #losses.update(loss.data[0], BATCH_SIZE) losses.update(loss.data, BATCH_SIZE) #top1.update(prec1[0], BATCH_SIZE) top1.update(prec1, BATCH_SIZE) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch: [{0}][{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(epoch, batch_idx, loss=losses, top1=top1)) with open(os.path.join(log_dir, 'train_log.txt'), 'a') as f: f.write('Epoch: [{0}][{1}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) \n'.format( epoch, batch_idx, loss=losses, top1=top1)) global_counter += 1 #writer.add_scalar('train/loss', losses.avg, global_counter) #writer.add_scalar('train/accuracy', top1.avg, global_counter) losses = AverageMeter() top1 = AverageMeter() model.eval() file_size = test_data.shape[0] num_batches = file_size // BATCH_SIZE for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE feature = test_data[start_idx:end_idx, :, :] label = test_label[start_idx:end_idx] feature = np.expand_dims(feature, axis=1) input = Variable(torch.from_numpy(feature).cuda(), requires_grad=True) input = torch.transpose(input, 3, 1) target = Variable(torch.from_numpy(label).cuda(), requires_grad=False) target = target.view(-1, ) output = model(input) output_reshaped = output.permute(0, 3, 2, 1).contiguous().view(-1, 13) loss = criterion(output_reshaped, target) prec1 = accuracy(output_reshaped.data, target.data, topk=(1, )) #prec1[0] = prec1[0].cpu().numpy()[0] prec1 = prec1[0].cpu().numpy() #losses.update(loss.data[0], BATCH_SIZE) losses.update(loss.data, BATCH_SIZE) #top1.update(prec1[0], BATCH_SIZE) top1.update(prec1, BATCH_SIZE) #writer.add_scalar('val/loss', losses.avg, global_counter) #writer.add_scalar('val/accuracy', top1.avg, global_counter) print('Epoch {} Val Loss {:.3f} Val Acc {:.3f} \t'.format( epoch, losses.avg, top1.avg)) with open(os.path.join(log_dir, 'test_log.txt'), 'a') as f: f.write('Epoch: [{0}]\t' 'Loss {loss.avg:.4f} \t' 'Prec@1 {top1.avg:.3f} \n'.format(epoch, loss=losses, top1=top1)) if (epoch % 5 == 0): torch.save( { 'epoch': epoch + 1, 'args': args, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join(checkpoint_dir, 'checkpoint_' + str(epoch) + '.pth.tar'))
def evaluate(room_path, out_data_label_filename, out_gt_label_filename): total_correct = 0 total_seen = 0 total_seen_class = [0 for _ in range(NUM_CLASSES)] total_correct_class = [0 for _ in range(NUM_CLASSES)] if args.visu: fout = open( os.path.join(DUMP_DIR, os.path.basename(room_path)[:-4] + '_pred.obj'), 'w') fout_gt = open( os.path.join(DUMP_DIR, os.path.basename(room_path)[:-4] + '_gt.obj'), 'w') fout_data_label = open(out_data_label_filename, 'w') fout_gt_label = open(out_gt_label_filename, 'w') current_data, current_label = room2blocks_wrapper_normalized( room_path, NUM_POINT) current_data = current_data[:, 0:NUM_POINT, :].astype(np.float32) current_label = np.squeeze(current_label).astype(np.int64) # Get room dimension.. data_label = np.load(room_path) data = data_label[:, 0:6] max_room_x = max(data[:, 0]) max_room_y = max(data[:, 1]) max_room_z = max(data[:, 2]) file_size = current_data.shape[0] num_batches = file_size // BATCH_SIZE losses = AverageMeter() top1 = AverageMeter() model.eval() for batch_idx in range(num_batches): start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE cur_batch_size = end_idx - start_idx feature = current_data[start_idx:end_idx, :, :] label = current_label[start_idx:end_idx] feature = np.expand_dims(feature, axis=1) input = Variable(torch.from_numpy(feature).cuda(), requires_grad=True) input = torch.transpose(input, 3, 1) target = Variable(torch.from_numpy(label).cuda(), requires_grad=False) target = target.view(-1, ) output = model(input) output_reshaped = output.permute(0, 3, 2, 1).contiguous().view(-1, 13) loss = criterion(output_reshaped, target) prec1 = accuracy(output_reshaped.data, target.data, topk=(1, )) prec1[0] = prec1[0].cpu().numpy()[0] losses.update(loss.data[0], BATCH_SIZE) top1.update(prec1[0], BATCH_SIZE) pred_label = np.reshape( np.argmax(output_reshaped.data.cpu().numpy(), axis=1), (BATCH_SIZE, -1)) pred_val = np.reshape(output_reshaped.data.cpu().numpy(), (BATCH_SIZE, -1, 13)) # Save prediction labels to OBJ file for b in range(BATCH_SIZE): pts = current_data[start_idx + b, :, :] l = current_label[start_idx + b, :] pts[:, 6] *= max_room_x pts[:, 7] *= max_room_y pts[:, 8] *= max_room_z pts[:, 3:6] *= 255.0 pred = pred_label[b, :] for i in range(NUM_POINT): color = g_label2color[pred[i]] color_gt = g_label2color[current_label[start_idx + b, i]] if args.visu: fout.write('v %f %f %f %d %d %d\n' % (pts[i, 6], pts[i, 7], pts[i, 8], color[0], color[1], color[2])) fout_gt.write('v %f %f %f %d %d %d\n' % (pts[i, 6], pts[i, 7], pts[i, 8], color_gt[0], color_gt[1], color_gt[2])) fout_data_label.write( '%f %f %f %d %d %d %f %d\n' % (pts[i, 6], pts[i, 7], pts[i, 8], pts[i, 3], pts[i, 4], pts[i, 5], pred_val[b, i, pred[i]], pred[i])) fout_gt_label.write('%d\n' % (l[i])) correct = np.sum(pred_label == current_label[start_idx:end_idx, :]) total_correct += correct total_seen += (cur_batch_size * NUM_POINT) for i in range(start_idx, end_idx): for j in range(NUM_POINT): l = current_label[i, j] total_seen_class[l] += 1 total_correct_class[l] += (pred_label[i - start_idx, j] == l) print('eval accuracy: %f' % (total_correct / float(total_seen))) fout_data_label.close() fout_gt_label.close() if args.visu: fout.close() fout_gt.close() return total_correct, total_seen