def evaluate(model, criterion, data_loader, device): model.eval() metric_logger = MetricLogger(delimiter=" ") header = 'Test:' with torch.no_grad(): for video, target in metric_logger.log_every(data_loader, 100, header): start_time = time.time() video = video.to(device, non_blocking=True) target = target.to(device, non_blocking=True) output = model(video) time_diff = time.time() - start_time print("Predicting on a video of shape {} took {} seconds".format( video.shape, time_diff)) print("target shape {}".format(target.shape)) print("target {}".format(target)) loss = criterion(output, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = video.shape[0] metric_logger.update(loss=loss.item()) metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) print( ' * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}'. format(top1=metric_logger.acc1, top5=metric_logger.acc5)) return metric_logger.acc1.global_avg
def train_one_epoch(model, optimizer, lr_scheduler, data_loader, epoch, print_freq, checkpoint_fn=None): model.train() metric_logger = MetricLogger(delimiter=" ") metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value}')) metric_logger.add_meter('batch/s', SmoothedValue(window_size=10, fmt='{value:.3f}')) header = 'Epoch: [{}]'.format(epoch) for step, batched_inputs in enumerate( metric_logger.log_every(data_loader, print_freq, header)): start_time = time.time() loss = model(batched_inputs) if checkpoint_fn is not None and np.random.random() < 0.005: checkpoint_fn() optimizer.zero_grad() loss.backward() optimizer.step() metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) metric_logger.meters['batch/s'].update((time.time() - start_time)) lr_scheduler.step() if checkpoint_fn is not None: checkpoint_fn()
def train_linear_one_epoch(train_loader, model, criterion, optimizer, config, device): log_header = 'EPOCH {}'.format(epoch + 1) losses = AverageMeter('Loss', fmt=':.4f') top1 = AverageMeter('Top1', fmt=':4.2f') top5 = AverageMeter('Top5', fmt=':4.2f') lr = AverageMeter('Lr', fmt=":.4f") metric_logger = MetricLogger(delimeter=" | ") metric_logger.add_meter(losses) metric_logger.add_meter(top1) metric_logger.add_meter(top5) metric_logger.add_meter(lr) for step, (img, target) in enumerate( metric_logger.log_every(train_loader, config.system.print_freq, log_header)): img = img.to(device) target = target.to(device) logit = model_sl(img) loss = criterion(logit, target) acc1, acc5 = accuracy(logit, target, topk=(1, 5)) lr_ = optimizer.param_groups[0]['lr'] metric_logger.update(Loss=loss.detach().cpu().item(), Top1=acc1.detach().cpu().item(), Top5=acc5.detach().cpu().item(), Lr=lr_) optimizer.zero_grad() loss.backward() optimizer.step()
def evaluate(model, epoch, criterion, data_loader, device, writer): model.eval() metric_logger = MetricLogger(delimiter=" ") header = 'Test:' cntr = 0 running_accuracy = 0.0 with torch.no_grad(): for video, target in metric_logger.log_every(data_loader, 100, header): video = video.to(device, non_blocking=True) target = target.to(device, non_blocking=True) output = model(video) loss = criterion(output, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = video.shape[0] running_accuracy += acc1.item() if cntr % 10 == 9: # average loss over the accumulated mini-batch writer.add_scalar('validation accuracy', running_accuracy / 10, epoch * len(data_loader) + cntr) running_accuracy = 0.0 cntr += 1 metric_logger.update(loss=loss.item()) metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) print(' * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}' .format(top1=metric_logger.acc1, top5=metric_logger.acc5)) return metric_logger.acc1.global_avg
def train_one_epoch(train_loader, model, criterion, optimizer, writer, epoch, total_step, config): log_header = 'EPOCH {}'.format(epoch) losses = AverageMeter('Loss', fmt=':.4f') if config.method != 'byol': top1 = AverageMeter('Acc1', fmt=':4.2f') top5 = AverageMeter('Acc5', fmt=':4.2f') lr = AverageMeter('Lr', fmt=":.6f") metric_logger = MetricLogger(delimeter=" | ") metric_logger.add_meter(losses) if config.method != 'byol': metric_logger.add_meter(top1) metric_logger.add_meter(top5) metric_logger.add_meter(lr) # ce = nn.CrossEntropyLoss().cuda(config.system.gpu) # num_steps_per_epoch = int(len(train_loader.dataset) // config.train.batch_size) # global_step = num_steps_per_epoch * epoch for step, (images, _) in enumerate( metric_logger.log_every(train_loader, config.system.print_freq, log_header)): total_step.val += 1 if config.system.gpu is not None: images[0] = images[0].cuda(config.system.gpu, non_blocking=True) images[1] = images[1].cuda(config.system.gpu, non_blocking=True) # [pos, neg] # output = model(view_1=images[0], view_2=images[1]) # loss, logits, targets = criterion(output) if config.method != 'byol': logits, targets, logits_original = model(view_1=images[0], view_2=images[1]) loss = criterion(logits, targets) acc1, acc5 = accuracy(logits_original, targets, topk=(1, 5)) else: loss_pre = model(view_1=images[0], view_2=images[1]) loss = loss_pre.mean() lr_ = optimizer.param_groups[0]['lr'] if config.method != 'byol': metric_logger.update(Loss=loss.detach().cpu().item(), Acc1=acc1.detach().cpu().item(), Acc5=acc5.detach().cpu().item(), Lr=lr_) else: metric_logger.update(Loss=loss.detach().cpu().item(), Lr=lr_) writer.add_scalar('loss', loss.detach().cpu().item(), total_step.val) if config.method != 'byol': writer.add_scalar('top1', acc1.detach().cpu().item(), total_step.val) optimizer.zero_grad() loss.backward() optimizer.step()
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): model.train() metric_logger = MetricLogger(delimiter=" ") metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0: warmup_factor = 1. / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) for images, targets in metric_logger.log_every(data_loader, print_freq, header): images = list(image.to(device) for image in images) #images = list(np.array(img) for img in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() optimizer.step() if lr_scheduler is not None: lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) return metric_logger
def evaluate(model, data_loader, device): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = MetricLogger(delimiter=" ") header = 'Test:' coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for images, targets in metric_logger.log_every(data_loader, 100, header): images = list(img.to(device) for img in images) if torch.cuda.is_available(): torch.cuda.synchronize() model_time = time.time() outputs = model(images) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) return coco_evaluator
def train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, device, epoch, print_freq, writer): model.train() metric_logger = MetricLogger(delimiter=" ") metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value}')) metric_logger.add_meter('clips/s', SmoothedValue(window_size=10, fmt='{value:.3f}')) running_loss = 0.0 running_accuracy = 0.0 header = 'Epoch: [{}]'.format(epoch) cntr = 0 for video, target in metric_logger.log_every(data_loader, print_freq, header): start_time = time.time() video, target = video.to(device), target.to(device) output = model(video) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() acc1, acc5 = accuracy(output, target, topk=(1, 5)) batch_size = video.shape[0] running_loss += loss.item() running_accuracy += acc1.item() if cntr % 10 == 9: #average loss over the accumulated mini-batch writer.add_scalar('training loss', running_loss / 10, epoch * len(data_loader) + cntr) writer.add_scalar('learning rate', optimizer.param_groups[0]["lr"], epoch * len(data_loader) + cntr) writer.add_scalar('accuracy', running_accuracy / 10, epoch * len(data_loader) + cntr) running_loss = 0.0 running_accuracy = 0.0 cntr = cntr + 1 metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) metric_logger.meters['clips/s'].update(batch_size / (time.time() - start_time)) lr_scheduler.step()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) params = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(params, lr=lr, momentum=momentum, weight_decay=weight_decay) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma) # train for epoch in range(num_epochs): metric_logger = MetricLogger(delimiter=' ') metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) model.train() for images, targets in metric_logger.log_every(train_data_loader, print_freq, header): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() optimizer.zero_grad() losses.backward() optimizer.step() lr_scheduler.step()