def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0: warmup_factor = 1. / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) for images, targets in metric_logger.log_every(data_loader, print_freq, header): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() optimizer.step() if lr_scheduler is not None: lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) sys.stdout.flush() return metric_logger
def evaluate(model, criterion, data_loader, device, print_freq=100, log_suffix=""): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = f"Test: {log_suffix}" num_processed_samples = 0 with torch.inference_mode(): for image, target in metric_logger.log_every(data_loader, print_freq, header): image = image.to(device, non_blocking=True) target = target.to(device, non_blocking=True) output = model(image) loss = criterion(output, target) acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = image.shape[0] metric_logger.update(loss=loss.item()) metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) num_processed_samples += batch_size # gather the stats from all processes num_processed_samples = utils.reduce_across_processes( num_processed_samples) if (hasattr(data_loader.dataset, "__len__") and len(data_loader.dataset) != num_processed_samples and torch.distributed.get_rank() == 0): # See FIXME above warnings.warn( f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} " "samples were used for the validation, which might bias the results. " "Try adjusting the batch size and / or the world size. " "Setting the world size to 1 is always a safe bet.") metric_logger.synchronize_between_processes() print( f"{header} Acc@1 {metric_logger.acc1.global_avg:.3f} Acc@5 {metric_logger.acc5.global_avg:.3f}" ) return metric_logger.acc1.global_avg
def evaluate(model, data_loader, device): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' coco = get_coco_api_from_dataset(data_loader.dataset) print("[*] Successfully get coco api from dataset") iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] torch.cuda.synchronize() model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) return coco_evaluator
def extract_features(model, data_loader): metric_logger = utils.MetricLogger(delimiter=" ") features = None for samples, index in metric_logger.log_every(data_loader, 10): samples = samples.cuda(non_blocking=True) index = index.cuda(non_blocking=True) feats = model(samples.float()).clone() # init storage feature matrix if dist.get_rank() == 0 and features is None: features = torch.zeros(len(data_loader.dataset), feats.shape[-1]) if args.use_cuda: features = features.cuda(non_blocking=True) print(f"Storing features into tensor of shape {features.shape}") # get indexes from all processes y_all = torch.empty(dist.get_world_size(), index.size(0), dtype=index.dtype, device=index.device) y_l = list(y_all.unbind(0)) y_all_reduce = torch.distributed.all_gather(y_l, index, async_op=True) y_all_reduce.wait() index_all = torch.cat(y_l) # share features between processes feats_all = torch.empty( dist.get_world_size(), feats.size(0), feats.size(1), dtype=feats.dtype, device=feats.device, ) output_l = list(feats_all.unbind(0)) output_all_reduce = torch.distributed.all_gather(output_l, feats, async_op=True) output_all_reduce.wait() # update storage feature matrix if dist.get_rank() == 0: if args.use_cuda: features.index_copy_(0, index_all, torch.cat(output_l)) else: features.index_copy_(0, index_all.cpu(), torch.cat(output_l).cpu()) return features
def train_one_epoch(epoch, model, data_loader, optimizer, device, lr_update=None, lr_scheduler=None, print_freq=100): model.train() # logger metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) print('----' * 20) print("[Epoch {}] The number of batch: {}".format(epoch, len(data_loader))) for idx, (images, targets) in enumerate( metric_logger.log_every(data_loader, print_freq, header)): # get data images = [image.to(device) for image in images] targets = [{k: v.to(device) for k, v in t.items()} for t in targets] # forward loss_dict = model(images, targets) losses = loss_dict['loss_classifier'] + loss_dict['loss_box_reg'] \ + loss_dict['loss_mask'] + loss_dict['loss_objectness'] \ + loss_dict['loss_rpn_box_reg'] # backporp optimizer.zero_grad() losses.backward() optimizer.step() # sum loss loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) # logging metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) # learning rate scheduler curr_itr = idx + epoch * len(data_loader) + 1 if lr_scheduler is not None and lr_update is not None: if curr_itr % lr_update == 0: print("+++ LR Update !") lr_scheduler.step()
def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, print_freq): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) header = 'Epoch: [{}]'.format(epoch) for data, target in metric_logger.log_every(data_loader, print_freq, header): global n_iter n_iter = n_iter + 1 optimizer.zero_grad() target = target.to(device) output = model(data) loss = criterion(output, target) loss = loss.mean() #visualization segmap = torch.argmax(output['out'], dim=1) loss.backward() optimizer.step() lr_scheduler.step() metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) if n_iter % args.print_freq == 0: if args.tensorboard and utils.is_main_process(): args.writer.add_scalar('SupLoss', loss.item(), n_iter) if n_iter % (args.print_freq * 100) == 0: grid = torchvision.utils.make_grid(data[:1]) grid = (grid - grid.min()) / (grid.max() - grid.min() + 1e-5) args.writer.add_image('sup images', grid, n_iter) segmap = args.colormap[segmap[0].detach().cpu().numpy()] segmap = segmap / 255. args.writer.add_image('sup segmaps', segmap.transpose((2, 0, 1)), n_iter)
def evaluate(model, data_loader, device, num_classes): model.eval() confmat = utils.ConfusionMatrix(num_classes) metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' with torch.no_grad(): for image, target in metric_logger.log_every(data_loader, 100, header): image, target = image.to(device), target.to(device) output = model(image) output = {"out": output[0], "aux": output[1]} output = output['out'] confmat.update(target.flatten(), output.argmax(1).flatten()) confmat.reduce_from_all_processes() return confmat
def train(model, device, train_loader, optimizer, epoch): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) print_freq = 10 lr_scheduler = None if epoch == 0: warmup_factor = 1. / 1000 warmup_iters = min(1000, len(train_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) i = 1 for img, box in train_loader: imgs = list(image.to(device) for image in img) boxs = [{k: v.to(device) for k, v in t.items()} for t in box] i += 1 loss_dict = model(imgs, boxs) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() print(loss_value) print('-' * 50) optimizer.zero_grad() losses.backward() optimizer.step() if lr_scheduler is not None: lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) # print(loss) if i > 10: return
def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, print_freq): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) header = 'Epoch: [{}]'.format(epoch) for image, target in metric_logger.log_every(data_loader, print_freq, header): image, target = image.to(device), target.to(device) output = model(image) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() lr_scheduler.step() metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"])
def evaluate(model, data_loader, device): iou_types = ["bbox"] coco = get_coco_api_from_dataset(data_loader.dataset) n_threads = torch.get_num_threads() torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test:" model.to(device) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) to_tensor = torchvision.transforms.ToTensor() for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(to_tensor(img).to(device) for img in image) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] torch.cuda.synchronize() model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) return coco_evaluator
def train(model, linear_classifier, optimizer, loader, epoch, n, avgpool): linear_classifier.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) for (inp, target) in metric_logger.log_every(loader, 20, header): # move to gpu inp = inp.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # forward with torch.no_grad(): if "vit" in args.arch: intermediate_output = model.get_intermediate_layers(inp, n) output = torch.cat([x[:, 0] for x in intermediate_output], dim=-1) if avgpool: output = torch.cat( (output.unsqueeze(-1), torch.mean(intermediate_output[-1][:, 1:], dim=1).unsqueeze(-1)), dim=-1) output = output.reshape(output.shape[0], -1) else: output = model(inp) output = linear_classifier(output) # compute cross entropy loss loss = nn.CrossEntropyLoss()(output, target) # compute the gradients optimizer.zero_grad() loss.backward() # step optimizer.step() # log torch.cuda.synchronize() metric_logger.update(loss=loss.item()) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def evaluate(model, criterion, data_loader, device, print_freq=100): model.eval() metric_logger = utils.MetricLogger(delimiter=" ", device=device) header = 'Test:' step_count = 0 with torch.no_grad(): for image, target in metric_logger.log_every(data_loader, print_freq, header): image = image.to(device, non_blocking=True) if args.channels_last: image = image.contiguous(memory_format=torch.channels_last) if args.run_lazy_mode: # This mark_step is added so that the the lazy kernel can # create and evaluate the graph to infer the resulting tensor # as channels_last import habana_frameworks.torch.core as htcore htcore.mark_step() target = target.to(device, non_blocking=True) output = model(image) loss = criterion(output, target) acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = image.shape[0] loss_cpu = loss.to('cpu').detach() metric_logger.update(loss=loss_cpu.item()) metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) step_count = step_count + 1 if step_count >= args.num_eval_steps: break # gather the stats from all processes metric_logger.synchronize_between_processes() # Return from here if evaluation phase does not go through any iterations.(eg, The data set is so small that # there is only one eval batch, but that was skipped in data loader due to drop_last=True) if len(metric_logger.meters) == 0: return print(' * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}'.format( top1=metric_logger.acc1, top5=metric_logger.acc5)) return metric_logger.acc1.global_avg
def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, print_freq, apex=False): model.train() metric_logger = utils.MetricLogger(delimiter=" ", device=device) metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) metric_logger.add_meter('img/s', utils.SmoothedValue(window_size=10, fmt='{value}')) header = 'Epoch: [{}]'.format(epoch) step_count = 0 for image, target in metric_logger.log_every(data_loader, print_freq, header): if args.distributed: utils.barrier() start_time = time.time() image, target = image.to(device, non_blocking=True), target.to( device, non_blocking=True) if args.channels_last: image = image.contiguous(memory_format=torch.channels_last) loss_cpu, output_cpu = train_model(model, criterion, optimizer, image, target, apex) acc1, acc5 = utils.accuracy(output_cpu, target, topk=(1, 5)) batch_size = image.shape[0] #Bring the loss tensor back to CPU before printing. Certainly needed if running on Habana. metric_logger.update(loss=loss_cpu, lr=optimizer.param_groups[0]["lr"]) metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) metric_logger.meters['img/s'].update(batch_size / (time.time() - start_time)) step_count = step_count + 1 if step_count >= args.num_train_steps: break
def evaluate(model, criterion, data_loader, device): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test:" num_processed_samples = 0 with torch.inference_mode(): for video, target in metric_logger.log_every(data_loader, 100, header): video = video.to(device, non_blocking=True) target = target.to(device, non_blocking=True) output = model(video) loss = criterion(output, target) acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = video.shape[0] metric_logger.update(loss=loss.item()) metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) num_processed_samples += batch_size # gather the stats from all processes num_processed_samples = utils.reduce_across_processes( num_processed_samples) if isinstance(data_loader.sampler, DistributedSampler): # Get the len of UniformClipSampler inside DistributedSampler num_data_from_sampler = len(data_loader.sampler.dataset) else: num_data_from_sampler = len(data_loader.sampler) if (hasattr(data_loader.dataset, "__len__") and num_data_from_sampler != num_processed_samples and torch.distributed.get_rank() == 0): # See FIXME above warnings.warn( f"It looks like the sampler has {num_data_from_sampler} samples, but {num_processed_samples} " "samples were used for the validation, which might bias the results. " "Try adjusting the batch size and / or the world size. " "Setting the world size to 1 is always a safe bet.") metric_logger.synchronize_between_processes() print( " * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}". format(top1=metric_logger.acc1, top5=metric_logger.acc5)) return metric_logger.acc1.global_avg
def train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, device, epoch, print_freq, scaler=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value}")) metric_logger.add_meter( "clips/s", utils.SmoothedValue(window_size=10, fmt="{value:.3f}")) header = f"Epoch: [{epoch}]" for video, target in metric_logger.log_every(data_loader, print_freq, header): start_time = time.time() video, target = video.to(device), target.to(device) with torch.cuda.amp.autocast(enabled=scaler is not None): output = model(video) loss = criterion(output, target) optimizer.zero_grad() if scaler is not None: scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) batch_size = video.shape[0] metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) metric_logger.meters["clips/s"].update(batch_size / (time.time() - start_time)) lr_scheduler.step()
def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, print_freq, apex=False, model_ema=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) metric_logger.add_meter('img/s', utils.SmoothedValue(window_size=10, fmt='{value}')) header = 'Epoch: [{}]'.format(epoch) for image, target in metric_logger.log_every(data_loader, print_freq, header): start_time = time.time() image, target = image.to(device), target.to(device) output = model(image) loss = criterion(output, target) optimizer.zero_grad() if apex: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) batch_size = image.shape[0] metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) metric_logger.meters['img/s'].update(batch_size / (time.time() - start_time)) if model_ema: model_ema.update_parameters(model)
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): model.train() mse_loss = MSELoss() l1_loss = L1Loss() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) lr_scheduler = None if epoch == 0: warmup_factor = 1. / 1000 warmup_iters = min(1000, len(data_loader) - 1) lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) for images, distorted_bounding_box, ground_truth_box in metric_logger.log_every(data_loader, print_freq, header): images = images.to(device) distorted_bounding_box = distorted_bounding_box.to(device) ground_truth_box = ground_truth_box.to(device) prediction = model(images, distorted_bounding_box) directly_predicted_box, guided_predicted_box = prediction # We want that both outputs produce the same result, the right bounding box difference = l1_loss(directly_predicted_box, guided_predicted_box) # We want that the first and second head both produce accurate results wrt. the ground truth loss_1 = mse_loss(directly_predicted_box, ground_truth_box) loss_2 = mse_loss(guided_predicted_box, ground_truth_box) # We optimize for all these criteria loss = loss_1 + loss_2 + difference if not math.isfinite(loss): print("Loss is {}, stopping training".format(loss)) sys.exit(1) optimizer.zero_grad() loss.backward() optimizer.step() if lr_scheduler is not None: lr_scheduler.step() metric_logger.update(loss=loss) metric_logger.update(lr=optimizer.param_groups[0]["lr"])
def train_teacher_model(model, labeled_dataset, optimizer, scheduler=None, train_ratio=0.7, batch_size=4, device='cpu', max_epochs=100, print_freq=10, save_path=None, checkpoint=None): model.to(device) metric_logger = utils.MetricLogger(delimiter=" ") last_loss = 1e9 cur_epoch = 0 if checkpoint is not None: print("loading checkpoint:" + checkpoint) model, optimizer, scheduler, cur_epoch = load_checkpoint( model, optimizer, scheduler, device, checkpoint) train_dataset, vld_dataset = split_dataset(labeled_dataset, train_ratio) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn) vld_loader = DataLoader(vld_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn) for epoch in range(cur_epoch, max_epochs): print("epoch {} / {}".format(epoch + 1, max_epochs)) train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq) loss = evaluate(model, vld_loader, device, epoch, print_freq) if loss < last_loss and save_path != None: save_checkpoint(model, optimizer, scheduler, epoch + 1, device, save_path) last_loss = loss if scheduler is not None: scheduler.step()
def evaluate_mobilenet_ssd(model, data_loader, device): model.eval() predictor = create_mobilenetv1_ssd_predictor(model, nms_method='hard', device=device) metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' jdict = [] imgIds = [] for images, gt_boxes, gt_labels, image_ids in metric_logger.log_every( data_loader, 50, header): imgIds.extend(image_ids) torch.cuda.synchronize() model_time = time.time() for image, img_id in zip(images, image_ids): boxes, labels, probs = predictor.predict(image) for box, label, prob in zip(boxes, labels, probs): box = xyxy2xywh_ssd(box) # xywh jdict.append({ "image_id": img_id, "category_id": int(label), "bbox": box.cpu().numpy().tolist(), "score": float(prob) }) # Evaluate one batch model_time = time.time() - model_time evaluator_time = 0 metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) # coco evel # annotation_path = 'VOC2012.json' cocoEval = coco_eval_json('VOC2012.json', jdict, imgIds) return cocoEval
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) for images, targets in metric_logger.log_every(data_loader, print_freq, header): # for images, targets in data_loader: images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) # loss in original paper # losses = loss_dict['loss_classifier'] + loss_dict['loss_box_reg'] # losses_reduced = loss_dict_reduced['loss_classifier'] + loss_dict_reduced['loss_box_reg'] loss_value = losses_reduced.item() optimizer.zero_grad() losses.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 2) optimizer.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) if device == 'cuda': torch.cuda.empty_cache() del images del targets del losses_reduced del losses del loss_dict del loss_dict_reduced
def evaluate(model, loss_fn, data_loader): """Test dlrm model Args: model (dlrm): loss_fn (torch.nn.Module): Loss function data_loader (torch.utils.data.DataLoader): """ # Test bach size could be big, make sure it prints default_print_freq = max(524288 * 100 // FLAGS.test_batch_size, 1) print_freq = default_print_freq if FLAGS.print_freq is None else FLAGS.print_freq steps_per_epoch = len(data_loader) metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'loss', utils.SmoothedValue(window_size=print_freq, fmt='{avg:.4f}')) metric_logger.add_meter( 'step_time', utils.SmoothedValue(window_size=1, fmt='{avg:.4f}')) with torch.no_grad(): # ROC can be computed per batch and then compute AUC globally, but I don't have the code. # So pack all the outputs and labels together to compute AUC. y_true and y_score naming follows sklearn y_true = [] y_score = [] stop_time = time() for step, (numerical_features, categorical_features, click) in enumerate(data_loader): output = model(numerical_features, categorical_features).squeeze() loss = loss_fn(output, click) y_true.append(click) y_score.append(output) metric_logger.update(loss=loss.item()) if step % print_freq == 0: metric_logger.update(step_time=(time() - stop_time) / print_freq) stop_time = time() metric_logger.print(header=F"Test: [{step}/{steps_per_epoch}]") auc = metrics.roc_auc_score(torch.cat(y_true), torch.sigmoid(torch.cat(y_score))) return metric_logger.loss.global_avg, auc
def evaluate(model, data_loader, device, epoch, print_freq): # test overfitting metric_logger = utils.MetricLogger(delimiter=" ") header = 'Validation'.format(epoch) sum_loss = [] with torch.no_grad(): for images, targets in metric_logger.log_every(data_loader, print_freq, header): # for images, targets in data_loader: images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) # loss in origin paper # losses_reduced = loss_dict_reduced['loss_classifier'] + loss_dict_reduced['loss_box_reg'] # losses = loss_dict['loss_classifier'] + loss_dict['loss_box_reg'] if math.isfinite(losses.item()): sum_loss.append(losses.item()) loss_value = losses_reduced.item() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) if device == 'cuda': torch.cuda.empty_cache() del images del targets del losses_reduced del losses del loss_dict del loss_dict_reduced # break sum_loss = np.sum(sum_loss) return sum_loss
def evaluate_bin_yolo_2014(model, data_loader, device, bin_folder): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' jdict = [] jdict_not_resized = [] # To compare with HW imgIds = [] for imgs, _, paths, shapes in metric_logger.log_every( data_loader, 10, header): image_ids = [ int(Path(image_path).stem.split('_')[-1]) for image_path in paths ] imgIds.extend(image_ids) # convert bin files 2 tensor imgs_tensor = torch.tensor([], dtype=torch.uint8) for i, img_id in enumerate(image_ids): path = os.path.join(bin_folder, str(img_id) + '.bin') f = open(path, 'rb') img_from_file = np.fromfile(f, np.uint8) img_from_file = np.reshape(img_from_file, imgs[i].shape) img_T = torch.tensor(img_from_file).unsqueeze(0) imgs_tensor = torch.cat((imgs_tensor, img_T), 0) # Evaluate one batch model_time, evaluator_time = eval_yolo_2014_batch( jdict, jdict_not_resized, model, imgs_tensor, image_ids, shapes, device) metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # imgIds = [int(Path(x).stem.split('_')[-1]) for x in data_loader.dataset.img_files] # gather the stats from all processes # with open('jdict_not_resized.json', 'w') as f: # json.dump(jdict_not_resized, f) metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) # coco evel cocoEval = coco_eval_json(annotation_path, jdict, imgIds) return cocoEval
def preprocess_and_save_bin_yolo_2014(model, data_loader, device): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' shape_dict = {} for imgs, targets, paths, shapes in metric_logger.log_every( data_loader, 10, header): image_id = int(Path(paths[0]).stem.split('_')[-1]) filePath = os.path.join(args.output_dir, str(image_id) + '.bin') transformed_np_img = imgs[0].cpu().numpy() transformed_np_img.tofile(filePath) shape_dict[str(image_id)] = [transformed_np_img.shape[1:], shapes] # gather the stats from all processes jsonPath = os.path.join(args.output_dir, 'images_shape.json') with open(jsonPath, 'w') as fp: json.dump(shape_dict, fp)
def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, writer, print_freq): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) header = 'Epoch: [{}]'.format(epoch) for image, target in metric_logger.log_every(data_loader, print_freq, header): image, target = image.to(device), target.to(device) output = model(image) loss, iou = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() lr_scheduler.step() metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"], iou=iou) if random.random() < 0.15: writer.add_image( 'input/train', torchvision.utils.make_grid([ torchvision.utils.make_grid(image), torchvision.utils.make_grid(target), torchvision.utils.make_grid(output['out'].data, normalize=True) ], nrow=1), epoch) writer.add_scalar('loss/train', loss.item(), epoch) writer.add_scalar('lr/train', optimizer.param_groups[0]["lr"], epoch) writer.add_scalar('iou/train', iou, epoch)
def evaluate_unlabeled(model, data_loader, device): cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' pred_scores = {} for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] torch.cuda.synchronize() model_time = time.time() outputs = model(image) for i in range(len(targets)): img_id = targets[i]['image_id'].item() pred_scores[img_id] = outputs[i]['vanilla_scores'].cpu().numpy() outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time metric_logger.update(model_time=model_time) i += 1 return pred_scores
def evaluate(model, criterion, data_loader, device, print_freq=100, dgPruner=None, output_dir=''): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' with torch.no_grad(): if (dgPruner): dgPruner.dump_growth_stat(output_dir, 1000) dgPruner.dump_sparsity_stat(model, output_dir, 1000) for image, target in metric_logger.log_every(data_loader, print_freq, header): image = image.to(device) target = target.to(device) output = model(image) loss = criterion(output, target) acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = image.shape[0] metric_logger.update(loss=loss.item()) metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) # gather the stats from all processes metric_logger.synchronize_between_processes() print(' * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}'.format( top1=metric_logger.acc1, top5=metric_logger.acc5)) metrics = OrderedDict([('loss', metric_logger.loss), ('top1', metric_logger.acc1), ('top5', metric_logger.acc5)]) return metrics
def validate_network(val_loader, model, linear_classifier, n, avgpool): linear_classifier.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' for inp, target in metric_logger.log_every(val_loader, 20, header): # move to gpu inp = inp.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # forward with torch.no_grad(): intermediate_output = model.get_intermediate_layers(inp, n) output = [x[:, 0] for x in intermediate_output] if avgpool: output.append(torch.mean(intermediate_output[-1][:, 1:], dim=1)) output = torch.cat(output, dim=-1) output = linear_classifier(output) loss = nn.CrossEntropyLoss()(output, target) if linear_classifier.module.num_labels >= 5: acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) else: acc1, = utils.accuracy(output, target, topk=(1, )) batch_size = inp.shape[0] metric_logger.update(loss=loss.item()) metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) if linear_classifier.module.num_labels >= 5: metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) if linear_classifier.module.num_labels >= 5: print( '* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}' .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss)) else: print('* Acc@1 {top1.global_avg:.3f} loss {losses.global_avg:.3f}'. format(top1=metric_logger.acc1, losses=metric_logger.loss)) return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def evaluate(model, data_loader, device, num_classes): model.eval() confmat = utils.ConfusionMatrix(num_classes) metric_logger = utils.MetricLogger(delimiter=" ") class_iou_image = list() img_list = list() target_list = list() prediction_list = list() header = "Evaluate:" with torch.inference_mode(): for image, target in metric_logger.log_every(data_loader, 100, header): image, target = image.to(device), target.to(device) confmat_image = utils.ConfusionMatrix(num_classes) output = model(image) output = output["out"] inv_normalize = transforms.Normalize(mean=(-0.485, -0.456, -0.406), std=(1 / 0.229, 1 / 0.224, 1 / 0.225)) img_npy = inv_normalize(image[0], target)[0].cpu().detach().numpy() target_npy = target.cpu().detach().numpy() prediction_npy = output.cpu().detach().numpy() img_list.append(img_npy) target_list.append(target_npy) prediction_list.append(prediction_npy) confmat.update(target.flatten(), output.argmax(1).flatten()) confmat_image.update(target.flatten(), output.argmax(1).flatten()) class_iou_image.append(confmat_image.get_class_iou()) confmat_image.reduce_from_all_processes() confmat.reduce_from_all_processes() return confmat, class_iou_image, img_list, target_list, prediction_list
def train_one_epoch(model, criterion, data_loader, optimizer, epoch, max_epoch): model.train() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Epoch: [{}]/[{}]'.format(epoch, max_epoch - 1) print_freq = 20 for batch in metric_logger.log_every(data_loader, print_freq, header): images = batch['img'].cuda() labels = batch['label'].cuda() logits = model(images) loss = criterion(logits, labels) # print(loss.detach().cpu().numpy()) optimizer.zero_grad() loss.backward() optimizer.step() metric_logger.update(loss=loss.item()) print("stats:", metric_logger) return {k: meter.global_avg for k, meter in metric_logger.meters.items()}