def generate(model, post_processor, data_loader, device, verb_classes, missing_category_id): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Generate:' detections = [] for samples, targets in metric_logger.log_every(data_loader, 10, header): samples = samples.to(device) outputs = model(samples) orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) results = post_processor(outputs, orig_target_sizes) for img_results, img_targets in zip(results, targets): for hoi in img_results['hoi_prediction']: detection = { 'image_id': img_targets['img_id'], 'person_box': img_results['predictions'][hoi['subject_id']] ['bbox'].tolist() } if img_results['predictions'][hoi['object_id']][ 'category_id'] == missing_category_id: object_box = [np.nan, np.nan, np.nan, np.nan] else: object_box = img_results['predictions'][ hoi['object_id']]['bbox'].tolist() cut_agent = 0 hit_agent = 0 eat_agent = 0 for idx, score in zip(hoi['category_id'], hoi['score']): verb_class = verb_classes[idx] score = score.item() if len(verb_class.split('_')) == 1: detection['{}_agent'.format(verb_class)] = score elif 'cut_' in verb_class: detection[verb_class] = object_box + [score] cut_agent = score if score > cut_agent else cut_agent elif 'hit_' in verb_class: detection[verb_class] = object_box + [score] hit_agent = score if score > hit_agent else hit_agent elif 'eat_' in verb_class: detection[verb_class] = object_box + [score] eat_agent = score if score > eat_agent else eat_agent else: detection[verb_class] = object_box + [score] detection['{}_agent'.format( verb_class.replace('_obj', '').replace('_instr', ''))] = score detection['cut_agent'] = cut_agent detection['hit_agent'] = hit_agent detection['eat_agent'] = eat_agent detections.append(detection) return detections
def evaluate(model, criterion, dataset, visualizer, output_dir, args): with dg.no_grad(): model.eval() metric_logger = utils.MetricLogger(args, delimiter=" ") metric_logger.add_meter( "class_error", utils.SmoothedValue(window_size=1, fmt="{value:.2f}")) header = "Test" print_freq = 10 visualize_freq = 100 * print_freq count = 0 for samples, targets in metric_logger.log_every( dataset, print_freq, header): outputs = model(samples) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) losses = losses / args.batch_size metric_logger.update(loss=losses.numpy(), **loss_dict) metric_logger.update(class_error=loss_dict["class_error"]) count += 1 if visualize_freq % count == 0: visualizer.plot_results(samples, outputs, targets) print("Averaged stats:", metric_logger) return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def evaluate(model, criterion, postprocessors, data_loader, device, output_dir): model.eval() criterion.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Val:' for template_samples, search_samples, template_masks, search_masks, targets in metric_logger.log_every(data_loader, 10, header): template_nested_samples = utils.nested_tensor_from_tensor_list(template_samples, template_masks).to(device) search_nested_samples = utils.nested_tensor_from_tensor_list(search_samples, search_masks).to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] outputs = model(search_nested_samples, template_nested_samples) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = {k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict} loss_dict_reduced_unscaled = {f'{k}_unscaled': v for k, v in loss_dict_reduced.items()} metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged validation stats:", metric_logger) # do post processing if necessary for further testing stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} return stats
def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, data_loader: Iterable, optimizer: torch.optim.Optimizer, device: torch.device, epoch: int, max_norm: float = 0): model.train() criterion.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) metric_logger.add_meter('class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Epoch: [{}]'.format(epoch) print_freq = 300 keep_for_debug = None for samples, targets in metric_logger.log_every(data_loader, print_freq, header): samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] num = 0 for t in targets: num += len(t['boxes']) if num == 0: print('met 0 gt!!!') targets, samples = keep_for_debug outputs = model(samples) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_unscaled = {f'{k}_unscaled': v for k, v in loss_dict_reduced.items()} loss_dict_reduced_scaled = {k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict} losses_reduced_scaled = sum(loss_dict_reduced_scaled.values()) loss_value = losses_reduced_scaled.item() if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() if max_norm > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) optimizer.step() metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) keep_for_debug = (targets, samples) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def train_one_epoch(model: torch.nn.Module, dataloader: Iterable, optimizer: torch.optim.Optimizer, device: torch.device, epoch: int): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) print_freq = 100 data_iter = iter(dataloader) for _ in metric_logger.log_every(range(len(dataloader)), print_freq, header): batch = data_iter.next() full_imgs_list, body_imgs, body_targets = batch hand_imgs, hand_targets = None, None head_imgs, head_targets = None, None full_imgs = to_image_list(full_imgs_list) if full_imgs is not None: full_imgs = full_imgs.to(device=device) body_imgs = body_imgs.to(device=device) body_targets = [target.to(device) for target in body_targets] output = model(body_imgs, body_targets, hand_imgs=hand_imgs, hand_targets=hand_targets, head_imgs=head_imgs, head_targets=head_targets, full_imgs=full_imgs, device=device) loss_dict = output['losses'] losses = sum(loss_dict[k] for k in loss_dict) optimizer.zero_grad() losses.backward() optimizer.step() # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced = {f'{k}': v for k, v in loss_dict_reduced.items()} losses_reduced = sum(loss_dict_reduced.values()) loss_value = losses_reduced.item() metric_logger.update(loss=loss_value, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) # batch = data_iter.next() # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, data_loader: Iterable, optimizer: torch.optim.Optimizer, device: torch.device, epoch: int, max_norm: float = 0): model.train() criterion.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) metric_logger.add_meter('class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) metric_logger.add_meter('grad_norm', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Epoch: [{}]'.format(epoch) print_freq = 10 prefetcher = data_prefetcher(data_loader, device, prefetch=True) samples, targets = prefetcher.next() # for samples, targets in metric_logger.log_every(data_loader, print_freq, header): for _ in metric_logger.log_every(range(len(data_loader)), print_freq, header): outputs, pre_outputs = model(samples) loss_dict = criterion(outputs, targets, pre_outputs) weight_dict = criterion.weight_dict losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_unscaled = {f'{k}_unscaled': v for k, v in loss_dict_reduced.items()} loss_dict_reduced_scaled = {k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict} losses_reduced_scaled = sum(loss_dict_reduced_scaled.values()) loss_value = losses_reduced_scaled.item() if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() if max_norm > 0: grad_total_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) else: grad_total_norm = utils.get_total_grad_norm(model.parameters(), max_norm) optimizer.step() metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) metric_logger.update(grad_norm=grad_total_norm) samples, targets = prefetcher.next() # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def evaluate(model, criterion, postprocessors, data_loader, device, args): print(colored('evaluate', 'red')) model.eval() criterion.eval() metric_logger = utils.MetricLogger(delimiter=' ') if args.stage != 3: metric_logger.add_meter( 'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Test:' thumos_evaluator = Thumos14Evaluator() video_pool = list(load_json(args.annotation_path).keys()) video_pool.sort() video_dict = {i: video_pool[i] for i in range(len(video_pool))} for vid_name_list, locations, samples, targets, num_frames, base, s_e_scores in metric_logger.log_every( data_loader, 10, header): targets = [{k: v.to(device) for k, v in t.items()} for t in targets] outputs = model(locations, samples, s_e_scores) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) if args.stage != 3: metric_logger.update(class_error=loss_dict_reduced['class_error']) results = postprocessors['bbox'](outputs, num_frames, base) for target, output in zip(targets, results): vid = video_dict[target['video_id'].item()] thumos_evaluator.update(vid, output) # gather the stats from all processes metric_logger.synchronize_between_processes() thumos_evaluator.synchronize_between_processes() print('Averaged stats:', metric_logger) return thumos_evaluator, loss_dict
def evaluate_hoi(dataset_file, model, postprocessors, data_loader, subject_category_id, device): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' preds = [] gts = [] indices = [] for samples, targets in metric_logger.log_every(data_loader, 10, header): samples = samples.to(device) outputs = model(samples) orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) results = postprocessors['hoi'](outputs, orig_target_sizes) preds.extend( list(itertools.chain.from_iterable(utils.all_gather(results)))) # For avoiding a runtime error, the copy is used gts.extend( list( itertools.chain.from_iterable( utils.all_gather(copy.deepcopy(targets))))) # gather the stats from all processes metric_logger.synchronize_between_processes() img_ids = [img_gts['id'] for img_gts in gts] _, indices = np.unique(img_ids, return_index=True) preds = [img_preds for i, img_preds in enumerate(preds) if i in indices] gts = [img_gts for i, img_gts in enumerate(gts) if i in indices] if dataset_file == 'hico': evaluator = HICOEvaluator(preds, gts, subject_category_id, data_loader.dataset.rare_triplets, data_loader.dataset.non_rare_triplets, data_loader.dataset.correct_mat) elif dataset_file == 'hico_second': evaluator = HICOEvaluator(preds, gts, subject_category_id, data_loader.dataset.rare_triplets, data_loader.dataset.non_rare_triplets, data_loader.dataset.correct_mat) elif dataset_file == 'vcoco': evaluator = VCOCOEvaluator(preds, gts, subject_category_id, data_loader.dataset.correct_mat) stats = evaluator.evaluate() return stats
def train_one_epoch(model, criterion, dataset, optimizer, visualizer, epoch, max_norm, args): model.train() metric_logger = utils.MetricLogger(args, delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) metric_logger.add_meter( 'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Epoch: [{}]'.format(epoch) print_freq = 10 visualize_freq = 100 * print_freq count = 0 for samples, targets in metric_logger.log_every(dataset, print_freq, header): outputs = model(samples) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) losses = losses / args.batch_size if not math.isfinite(losses.numpy()): print("Loss is {}, stopping training".format(losses.numpy())) print(loss_dict) sys.exit(1) losses.backward() optimizer.minimize(losses) optimizer.clear_gradients() metric_logger.update(loss=losses.numpy(), **loss_dict) metric_logger.update(class_error=loss_dict["class_error"]) metric_logger.update(lr=optimizer.current_step_lr()) count += 1 if visualize_freq % count == 0: visualizer.plot_results(samples, outputs, targets) print("Averaged stats:", metric_logger) return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def evaluate(model, data_loader, device, epoch, experiment=None): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test:" print_freq = 10 for batch in metric_logger.log_every(data_loader, print_freq, header): # batch = {k: v.to(device) for k, v in batch.items()} conditional_frames = batch["conditional_frames"].to(device) ptp = batch["PTP"].to(device) target_frame = batch["target_frame"].to(device) encoded_frames = model.encode_frames(conditional_frames) latent = model.compute_optimal_latent(encoded_frames.detach(), ptp, target_frame).detach() latent.requires_grad = False predicted_hidden = model.hidden_predictor(encoded_frames, ptp, latent) predicted_frame = model.frame_decoder(predicted_hidden) free_energies = model.compute_energies(predicted_frame, target_frame) total_loss = free_energies["total"] metric_logger.update(loss=total_loss, **free_energies) # if experiment is not None: # experiment.log_metrics( # {f"val_{k}": v.item() for k, v in free_energies.items()}) # Gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) avg_stats = { k: meter.global_avg for k, meter in metric_logger.meters.items() } if experiment is not None: experiment.log_metrics({f"val_avg_loss": avg_stats["total"]}, step=epoch) return avg_stats
def evaluate(model, criterion, postprocessors, data_loader, base_ds, device, output_dir, log_step=0): model.eval() criterion.eval() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Test:' iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys()) coco_evaluator = CocoEvaluator(base_ds, iou_types) panoptic_evaluator = None if 'panoptic' in postprocessors.keys(): panoptic_evaluator = PanopticEvaluator( data_loader.dataset.ann_file, data_loader.dataset.ann_folder, output_dir=os.path.join(output_dir, "panoptic_eval"), ) dataset = data_loader.dataset classes = { cat["id"]: cat["name"] for cat in dataset.coco.dataset["categories"] } wandb_imgs = {"images": [], "self_attention": [], "attention": []} # Log every 50 steps and in step 0 log_this = output_dir and utils.is_main_process() and ( (log_step + 1) % 50 == 0 or log_step == 0) conv_features, enc_attn_weights, dec_attn_weights = [], [], [] for samples, targets in metric_logger.log_every(data_loader, 10, header): samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] log_image = False if log_this: if len(LOG_IDX) == 15: if targets[0]["image_id"] in LOG_IDX: log_image = True elif random.random() < 0.3 and len( targets[0]["labels"].tolist()) > 3: LOG_IDX.append(targets[0]["image_id"]) log_image = True if log_image: # Taken from https://colab.research.google.com/github/facebookresearch/detr/blob/colab/notebooks/detr_attention.ipynb hooks = [ model.module.backbone[-2].register_forward_hook( lambda self, input, output: conv_features.append(output )), model.module.transformer.encoder.layers[-1].self_attn. register_forward_hook(lambda self, input, output: enc_attn_weights.append(output[1])), model.module.transformer.decoder.layers[-1].multihead_attn. register_forward_hook(lambda self, input, output: dec_attn_weights.append(output[1])), ] outputs = model(samples) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) results = postprocessors['bbox'](outputs, orig_target_sizes) # Gather images to log to wandb if log_image: # get the HxW shape of the feature maps of the CNN f_map = conv_features[-1]['0'].tensors.cpu() shape = f_map.shape[-2:] sattn = enc_attn_weights[-1][0].reshape(shape + shape).cpu() dec_att = dec_attn_weights[-1].cpu() target = targets[0] logits = outputs["pred_logits"][0] boxes = outputs["pred_boxes"][0] pred = {"pred_logits": logits, "pred_boxes": boxes} name = dataset.coco.imgs[target["image_id"].item()]["file_name"] path = os.path.join(dataset.root, name) img, self_attention, att_map = create_wandb_img( classes, path, target, pred, sattn, f_map, dec_att) wandb_imgs["images"].append(img) wandb_imgs["self_attention"].append(self_attention) wandb_imgs["attention"].append(att_map) # Free memory del conv_features[-1] del enc_attn_weights[-1] for hook in hooks: hook.remove() if 'segm' in postprocessors.keys(): target_sizes = torch.stack([t["size"] for t in targets], dim=0) results = postprocessors['segm'](results, outputs, orig_target_sizes, target_sizes) res = { target['image_id'].item(): output for target, output in zip(targets, results) } if coco_evaluator is not None: coco_evaluator.update(res) if panoptic_evaluator is not None: res_pano = postprocessors["panoptic"](outputs, target_sizes, orig_target_sizes) for i, target in enumerate(targets): image_id = target["image_id"].item() file_name = f"{image_id:012d}.png" res_pano[i]["image_id"] = image_id res_pano[i]["file_name"] = file_name panoptic_evaluator.update(res_pano) # Log all images to wandb if log_this: wandb.log({"Images": wandb_imgs["images"]}, step=log_step) wandb.log({"Self Attention": wandb_imgs["self_attention"]}, step=log_step) wandb.log({"Attention": wandb_imgs["attention"]}, step=log_step) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) if coco_evaluator is not None: coco_evaluator.synchronize_between_processes() if panoptic_evaluator is not None: panoptic_evaluator.synchronize_between_processes() # accumulate predictions from all images if coco_evaluator is not None: coco_evaluator.accumulate() coco_evaluator.summarize() panoptic_res = None if panoptic_evaluator is not None: panoptic_res = panoptic_evaluator.summarize() stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} if coco_evaluator is not None: if 'bbox' in postprocessors.keys(): stats['coco_eval_bbox'] = coco_evaluator.coco_eval[ 'bbox'].stats.tolist() if 'segm' in postprocessors.keys(): stats['coco_eval_masks'] = coco_evaluator.coco_eval[ 'segm'].stats.tolist() if panoptic_res is not None: stats['PQ_all'] = panoptic_res["All"] stats['PQ_th'] = panoptic_res["Things"] stats['PQ_st'] = panoptic_res["Stuff"] return stats, coco_evaluator
def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, scaler: amp.GradScaler, data_loader: Iterable, optimizer: torch.optim.Optimizer, device: torch.device, epoch: int, max_norm: float = 0): model.train() criterion.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) metric_logger.add_meter( 'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Epoch: [{}]'.format(epoch) print_freq = 10 for samples, targets in metric_logger.log_every(data_loader, print_freq, header): # import ipdb; ipdb.set_trace() samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] # outputs = model(samples) with amp.autocast(enabled=scaler.is_enabled()): outputs = model(samples) outputs = to_fp32(outputs) if scaler.is_enabled() else outputs loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } losses_reduced_scaled = sum(loss_dict_reduced_scaled.values()) loss_value = losses_reduced_scaled.item() if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() # losses.backward() scaler.scale(losses).backward() if max_norm > 0: scaler.unscale_(optimizer) torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) # optimizer.step() scaler.step(optimizer) scaler.update() metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def train_one_epoch(model, criterion, postprocessors, data_loader, optimizer, device, epoch, max_norm, args): model.train() criterion.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) print_freq = 10 counter = 0 torch.cuda.empty_cache() for samples, targets in metric_logger.log_every(data_loader, print_freq, header): samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] try: if args.LETRpost: outputs, origin_indices = model(samples, postprocessors, targets, criterion) loss_dict = criterion(outputs, targets, origin_indices) else: outputs = model(samples) loss_dict = criterion(outputs, targets) except RuntimeError as e: if "out of memory" in str(e): sys.exit('Out Of Memory') else: raise e weight_dict = criterion.weight_dict losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } losses_reduced_scaled = sum(loss_dict_reduced_scaled.values()) loss_value = losses_reduced_scaled.item() if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() if max_norm > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) optimizer.step() metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def evaluate(model, criterion, postprocessors, data_loader, base_ds, device, output_dir, args): model.eval() criterion.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' id_to_img = {} f = open( os.path.join(args.coco_path, "annotations", "lines_{}2017.json".format(args.dataset))) data = json.load(f) for d in data['images']: id_to_img[d['id']] = d['file_name'].split('.')[0] counter = 0 num_images = 0 for samples, targets in metric_logger.log_every(data_loader, 10, header): samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] if args.LETRpost: outputs, origin_indices = model(samples, postprocessors, targets, criterion) loss_dict = criterion(outputs, targets, origin_indices) else: outputs = model(samples) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) if args.benchmark: orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) results = postprocessors['line'](outputs, orig_target_sizes, "prediction") pred_logits = outputs['pred_logits'] bz = pred_logits.shape[0] assert bz == 1 query = pred_logits.shape[1] rst = results[0]['lines'] pred_lines = rst.view(query, 2, 2) pred_lines = pred_lines.flip([-1]) # this is yxyx format h, w = targets[0]['orig_size'].tolist() pred_lines[:, :, 0] = pred_lines[:, :, 0] * (128) pred_lines[:, :, 0] = pred_lines[:, :, 0] / h pred_lines[:, :, 1] = pred_lines[:, :, 1] * (128) pred_lines[:, :, 1] = pred_lines[:, :, 1] / w score = results[0]['scores'].cpu().numpy() line = pred_lines.cpu().numpy() score_idx = np.argsort(-score) line = line[score_idx] score = score[score_idx] os.makedirs(args.output_dir + '/benchmark', exist_ok=True) if 'data/york_processed' in args.coco_path: append_path = '/benchmark/benchmark_york_' + args.append_word os.makedirs(args.output_dir + append_path, exist_ok=True) checkpoint_path = args.output_dir + append_path + '/{}.npz' curr_img_id = targets[0]['image_id'].tolist()[0] np.savez(checkpoint_path.format(id_to_img[curr_img_id]), **{ 'lines': line, 'score': score }) elif 'data/wireframe_processed' in args.coco_path: append_path = '/benchmark/benchmark_val_' + args.append_word os.makedirs(args.output_dir + append_path, exist_ok=True) checkpoint_path = args.output_dir + append_path + '/{:08d}.npz' curr_img_id = targets[0]['image_id'].tolist()[0] np.savez(checkpoint_path.format(int(id_to_img[curr_img_id])), **{ 'lines': line, 'score': score }) else: assert False num_images += 1 # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) # accumulate predictions from all images stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} return stats
def test_wider(model, criterion, postprocessors, dset, data_loader, device, output_dir): model.eval() criterion.eval() output_dir = Path(output_dir) / "wider" output_dir.mkdir(parents=True, exist_ok=True) metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( "class_error", utils.SmoothedValue(window_size=1, fmt="{value:.2f}") ) header = "Test:" for samples, targets in metric_logger.log_every(data_loader, 10, header): samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] outputs = model(samples, targets) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } loss_dict_reduced_unscaled = { f"{k}_unscaled": v for k, v in loss_dict_reduced.items() } metric_logger.update( loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled, ) metric_logger.update(class_error=loss_dict_reduced["class_error"]) orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) results = postprocessors["bbox"](outputs, orig_target_sizes) res = { target["image_id"].item(): output for target, output in zip(targets, results) } image_ids = [target["image_id"].item() for target in targets] for i in range(len(image_ids)): image_id = image_ids[i] scene_folder = output_dir / dset.data[image_id]["img_path"].split("/")[0] scene_folder.mkdir(parents=True, exist_ok=True) file_name = ( dset.data[image_id]["img_path"].split("/")[1].replace("jpg", "txt") ) labels = res[image_id]["labels"] scores = res[image_id]["scores"] bboxes = res[image_id]["boxes"] num_faces = len(labels) - sum(labels).item() with open(scene_folder / file_name, "w") as f: f.write(file_name.split(".")[0] + "\n") f.write(str(num_faces) + "\n") for j in range(len(labels)): if labels[j] == 0: bb = bboxes[j] sc = scores[j] d = "{0} {1} {2} {3} {4}\n".format( int(bb[0].item()), int(bb[1].item()), int(bb[2].item() - bb[0].item()), int(bb[3].item() - bb[1].item()), sc.item(), ) f.write(d)
def evaluate(args, model, criterion, postprocessors, dataloader, support_data_loader, base_ds, device, type='all'): model.eval() criterion.eval() # First: Obtain Category Codes for All Categories to Detect support_iter = iter(support_data_loader) all_category_codes_final = [] print("Extracting support category codes...") number_of_supports = 100 # This is the number of support images to use for each category. Need be large enough. for i in range(number_of_supports): try: support_images, support_class_ids, support_targets = next(support_iter) except: support_iter = iter(support_data_loader) support_images, support_class_ids, support_targets = next(support_iter) support_images = [support_image.squeeze(0) for support_image in support_images] support_class_ids = support_class_ids.squeeze(0).to(device) support_targets = [{k: v.squeeze(0) for k, v in t.items()} for t in support_targets] num_classes = support_class_ids.shape[0] num_episode = math.ceil(num_classes / args.episode_size) category_codes_final = [] support_class_ids_final = [] for i in range(num_episode): if (args.episode_size * (i + 1)) <= num_classes: support_images_ = utils.nested_tensor_from_tensor_list( support_images[(args.episode_size * i): (args.episode_size * (i + 1))] ).to(device) support_targets_ = [ {k: v.to(device) for k, v in t.items()} for t in support_targets[(args.episode_size * i): (args.episode_size * (i + 1))] ] support_class_ids_ = support_class_ids[(args.episode_size * i): (args.episode_size* (i + 1))] else: support_images_ = utils.nested_tensor_from_tensor_list( support_images[-args.episode_size:] ).to(device) support_targets_ = [ {k: v.to(device) for k, v in t.items()} for t in support_targets[-args.episode_size:] ] support_class_ids_ = support_class_ids[-args.episode_size:] if isinstance(model, torch.nn.parallel.DistributedDataParallel): category_code = model.module.compute_category_codes(support_images_, support_targets_) else: category_code = model.compute_category_codes(support_images_, support_targets_) category_code = torch.stack(category_code, dim=0) # (num_enc_layer, args.total_num_support, d) category_codes_final.append(category_code) support_class_ids_final.append(support_class_ids_) support_class_ids_final = torch.cat(support_class_ids_final, dim=0) category_codes_final = torch.cat(category_codes_final, dim=1) # (num_enc_layer, num_episode x args.total_num_support, d) all_category_codes_final.append(category_codes_final) if args.num_feature_levels == 1: all_category_codes_final = torch.stack(all_category_codes_final, dim=0) # (number_of_supports, num_enc_layer, num_episode x args.total_num_support, d) all_category_codes_final = torch.mean(all_category_codes_final, 0, keepdims=False) all_category_codes_final = list(torch.unbind(all_category_codes_final, dim=0)) elif args.num_feature_levels == 4: raise NotImplementedError else: raise NotImplementedError print("Completed extracting category codes. Start Inference...") metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Test:' iou_types = tuple(k for k in ('bbox',) if k in postprocessors.keys()) evaluator = DetectionEvaluator(base_ds, iou_types) if type == 'all': pass # To evaluate all categories, no need to change params of the evaluator elif type == 'base': if args.dataset_file == 'coco_base': evaluator.coco_eval['bbox'].params.catIds = coco_base_class_ids elif args.dataset_file == 'voc_base1': evaluator.coco_eval['bbox'].params.catIds = voc_base1_class_ids elif args.dataset_file == 'voc_base2': evaluator.coco_eval['bbox'].params.catIds = voc_base2_class_ids elif args.dataset_file == 'voc_base3': evaluator.coco_eval['bbox'].params.catIds = voc_base3_class_ids else: raise ValueError elif type == 'novel': if args.dataset_file == 'coco_base' or args.dataset_file == 'coco': evaluator.coco_eval['bbox'].params.catIds = coco_novel_class_ids elif args.dataset_file == 'voc_base1': evaluator.coco_eval['bbox'].params.catIds = voc_novel1_class_ids elif args.dataset_file == 'voc_base2': evaluator.coco_eval['bbox'].params.catIds = voc_novel2_class_ids elif args.dataset_file == 'voc_base3': evaluator.coco_eval['bbox'].params.catIds = voc_novel3_class_ids else: raise ValueError else: raise ValueError("Type must be 'all', 'base' or 'novel'!") print_freq = 50 for samples, targets in metric_logger.log_every(dataloader, print_freq, header): samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] outputs = model(samples, targets=targets, supp_class_ids=support_class_ids_final, category_codes=all_category_codes_final) loss_dict = criterion(outputs) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = {k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict} loss_dict_reduced_unscaled = {f'{k}_unscaled': v for k, v in loss_dict_reduced.items()} metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) results = postprocessors['bbox'](outputs, orig_target_sizes) res = {target['image_id'].item(): output for target, output in zip(targets, results)} if evaluator is not None: evaluator.update(res) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) if evaluator is not None: evaluator.synchronize_between_processes() # accumulate predictions from all images if evaluator is not None: if type == 'all': print("\n\n\n\n * ALL Categories:") elif type == 'base': print("\n\n\n\n * Base Categories:") elif type == 'novel': print("\n\n\n\n * Novel Categories:") else: raise ValueError("Type must be 'all', 'base' or 'novel'!") evaluator.accumulate() evaluator.summarize() stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} if evaluator is not None: if 'bbox' in postprocessors.keys(): stats['coco_eval_bbox'] = evaluator.coco_eval['bbox'].stats.tolist() del support_images del support_class_ids del support_targets del samples del targets del outputs del weight_dict del loss_dict del loss_dict_reduced del loss_dict_reduced_scaled del loss_dict_reduced_unscaled del category_code del category_codes_final del all_category_codes_final del orig_target_sizes del res del results torch.cuda.empty_cache() return stats, evaluator
def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, data_loader: Iterable, optimizer: torch.optim.Optimizer, device: torch.device, epoch: int, args, postprocessors=None): model.train() criterion.train() metric_logger = utils.MetricLogger(delimiter=' ') metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) if args.stage != 3: metric_logger.add_meter( 'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Epoch: [{}]'.format(epoch) print_freq = 10 max_norm = args.clip_max_norm for vid_name_list, locations, samples, targets, num_frames, base, s_e_scores \ in metric_logger.log_every(data_loader, print_freq, header): samples = samples.to(device) s_e_scores = s_e_scores.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] outputs = model(locations, samples, s_e_scores) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } losses_reduced_scaled = sum(loss_dict_reduced_scaled.values()) loss_value = losses_reduced_scaled.item() if not math.isfinite(loss_value): print('Loss is {}, stopping training'.format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() if max_norm > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) optimizer.step() metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) if args.stage != 3: metric_logger.update(class_error=loss_dict_reduced['class_error']) metric_logger.update(lr=optimizer.param_groups[0]['lr']) metric_logger.synchronize_between_processes() return {k: meter.global_avg for k, meter in metric_logger.meters.items()}, loss_dict
def train_one_epoch( model: nn.Module, data_loader: Iterable, optimizer: optim.Optimizer, batch_repeat_step: int, device: torch.device, epoch: int, max_norm: float = 0, experiment=None, ): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt="{value:6f}")) header = f"Epoch: [{epoch}]" print_freq = 10 for batch in metric_logger.log_every(data_loader, print_freq, header): # batch = {k: v.to(device) for k, v in batch.items()} conditional_frames = batch["conditional_frames"].to(device) ptp = batch["PTP"].to(device) target_frame = batch["target_frame"].to(device) # Compute optimal latent encoded_frames = model.encode_frames(conditional_frames).detach() latent = model.compute_optimal_latent(encoded_frames, ptp, target_frame).detach() latent.requires_grad = False for i in range(batch_repeat_step): encoded_frames = model.encode_frames(conditional_frames) predicted_hidden = model.hidden_predictor(encoded_frames, ptp, latent) predicted_frame = model.frame_decoder(predicted_hidden) free_energies = model.compute_energies(predicted_frame, target_frame) total_loss = free_energies["total"] if not math.isfinite(total_loss): print(f"Loss is {total_loss.item()}, stopping training") sys.exit(1) optimizer.zero_grad() total_loss.backward() if max_norm > 0: nn.utils.clip_grad_norm_(model.parameters(), max_norm) optimizer.step() metric_logger.update(loss=total_loss, **free_energies) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) # if experiment is not None: # experiment.log_metrics( # {f"train_{k}": v.item() for k, v in free_energies.items()}) # Gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) avg_stats = { k: meter.global_avg for k, meter in metric_logger.meters.items() } if experiment is not None: experiment.log_metrics({f"train_avg_loss": avg_stats["total"]}, step=epoch) return avg_stats
def evaluate(model, criterion, postprocessors, data_loader, base_ds, device, output_dir): model.eval() criterion.eval() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Test:' print_freq = 50 iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys()) coco_evaluator = CocoEvaluator(base_ds, iou_types) # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75] panoptic_evaluator = None if 'panoptic' in postprocessors.keys(): panoptic_evaluator = PanopticEvaluator( data_loader.dataset.ann_file, data_loader.dataset.ann_folder, output_dir=os.path.join(output_dir, "panoptic_eval"), ) for samples, targets in metric_logger.log_every(data_loader, print_freq, header): # visualize_batches(samples, targets) samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] outputs = model(samples) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) #orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) orig_target_sizes = torch.stack( [t["orig_size"].max() for t in targets], dim=0).repeat(2, 1).transpose(1, 0) results = postprocessors['bbox'](outputs, orig_target_sizes) # for i, t in enumerate(targets): # targets[i]["boxes"] = box_cxcywh_to_xyxy(t["boxes"] * orig_target_sizes[i].float().repeat(2)).long() # print(targets[i]["boxes"]) # visualize_result(targets, results, threshold=0.7, # save_path=os.path.expanduser("~/Pictures/20200720_coco_val2017")) if 'segm' in postprocessors.keys(): target_sizes = torch.stack([t["size"] for t in targets], dim=0) results = postprocessors['segm'](results, outputs, orig_target_sizes, target_sizes) res = { target['image_id'].item(): output for target, output in zip(targets, results) } if coco_evaluator is not None: coco_evaluator.update(res) if panoptic_evaluator is not None: res_pano = postprocessors["panoptic"](outputs, target_sizes, orig_target_sizes) for i, target in enumerate(targets): image_id = target["image_id"].item() file_name = f"{image_id:012d}.png" res_pano[i]["image_id"] = image_id res_pano[i]["file_name"] = file_name panoptic_evaluator.update(res_pano) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) if coco_evaluator is not None: coco_evaluator.synchronize_between_processes() if panoptic_evaluator is not None: panoptic_evaluator.synchronize_between_processes() # accumulate predictions from all images if coco_evaluator is not None: coco_evaluator.accumulate() coco_evaluator.summarize() panoptic_res = None if panoptic_evaluator is not None: panoptic_res = panoptic_evaluator.summarize() stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} if coco_evaluator is not None: if 'bbox' in postprocessors.keys(): stats['coco_eval_bbox'] = coco_evaluator.coco_eval[ 'bbox'].stats.tolist() if 'segm' in postprocessors.keys(): stats['coco_eval_masks'] = coco_evaluator.coco_eval[ 'segm'].stats.tolist() if panoptic_res is not None: stats['PQ_all'] = panoptic_res["All"] stats['PQ_th'] = panoptic_res["Things"] stats['PQ_st'] = panoptic_res["Stuff"] return stats, coco_evaluator
def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, data_loader: Iterable, optimizer: torch.optim.Optimizer, device: torch.device, epoch: int, max_norm: float = 0, writer=None, args=None): model.train() criterion.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) metric_logger.add_meter( 'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Epoch: [{}]'.format(epoch) print_freq = 10 losses_items = [] FNs, FPs, TPs, AVGs, TAR = [], [], [], [], [] # for samples, targets in metric_logger.log_every(data_loader, print_freq, header): for i, (samples, targets, info) in enumerate(data_loader): samples = samples.to(device) targets = [t.to(device) for t in targets] outputs = model(samples) # import numpy as np # couples = [] # for x in np.arange(1/10, 1, 1/5): # for y in np.arange(1/12, 1, 1/6): # couples.append(torch.tensor([x, y])) # outputs['pred_boxes'][0] = torch.cat(couples).view(-1, 2) loss_dict, indices = criterion(outputs, targets) if epoch % 50 == 0 or epoch == (args.epochs - 1): step = (epoch * len(data_loader) + i) * args.batch_size plot_images(writer, step, samples, outputs, targets, indices, epoch, i, tag='train', folder=args.comment) for d in range(len(samples)): FN, FP, TP, in_dist = spine_evaluation(outputs['pred_boxes'][d], outputs['pred_logits'][d], targets[d][:, 1:3], info[d], args) FNs.append(FN) FPs.append(FP) TPs.append(TP) TAR.append(len(targets[d])) AVGs.append(in_dist) weight_dict = criterion.weight_dict losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict).float() not_used_keys = [ k for k in loss_dict.keys() if k not in weight_dict.keys() ] if len(not_used_keys) > 0 and i == 0: print( f'[WARNING] these keys are not used to calculate the loss: {not_used_keys}' ) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } losses_reduced_scaled = sum(loss_dict_reduced_scaled.values()) loss_value = losses_reduced_scaled.item() losses_items.append(loss_value) print( f"{epoch:03d}_{i:03d} loss_value: {loss_value:.04f} mean {mean(losses_items):.04f} loss_centers {loss_dict['loss_centers'].item():.04f} loss_bce {loss_dict['loss_bce'].item():.04f} loss_spine_l1 {loss_dict['loss_spine_l1'].item():.04f} id: {info[0]['patient_id']}" ) if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() if max_norm > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) optimizer.step() metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) writer.add_scalar('train_metric/FN', sum(FNs) / sum(TAR), global_step=epoch) writer.add_scalar('train_metric/FP', sum(FPs) / sum(TAR), global_step=epoch) writer.add_scalar('train_metric/TP', sum(TPs) / sum(TAR), global_step=epoch) if len(torch.cat(AVGs)) > 0: writer.add_scalar('train_metric/avg_dist', torch.cat(AVGs).mean(), global_step=epoch) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def evaluate(model, criterion, postprocessors, data_loader, base_ds, device, output_dir, epoch, writer=None, args=None): model.eval() criterion.eval() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Test:' iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys()) # coco_evaluator = CocoEvaluator(base_ds, iou_types) coco_evaluator = None # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75] panoptic_evaluator = None FNs, FPs, TPs, AVGs, TAR = [], [], [], [], [] # for samples, targets in metric_logger.log_every(data_loader, 10, header): for i, (samples, targets, info) in enumerate(data_loader): samples = samples.to(device) targets = [t.to(device) for t in targets] outputs = model(samples) loss_dict, indices = criterion(outputs, targets) weight_dict = criterion.weight_dict if epoch % 50 == 0 or epoch == (args.epochs - 1): step = (epoch * len(data_loader) + i) * args.batch_size plot_images(writer, step, samples, outputs, targets, indices, epoch, i, tag='test', folder=args.comment) for d in range(len(samples)): FN, FP, TP, in_dist = spine_evaluation(outputs['pred_boxes'][d], outputs['pred_logits'][d], targets[d][:, 1:3], info[d], args) FNs.append(FN) FPs.append(FP) TPs.append(TP) TAR.append(len(targets[d])) AVGs.append(in_dist) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) # orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) # results = postprocessors['bbox'](outputs, orig_target_sizes) # if 'segm' in postprocessors.keys(): # target_sizes = torch.stack([t["size"] for t in targets], dim=0) # results = postprocessors['segm'](results, outputs, orig_target_sizes, target_sizes) # res = {target['image_id'].item(): output for target, output in zip(targets, results)} if coco_evaluator is not None: coco_evaluator.update(res) if panoptic_evaluator is not None: res_pano = postprocessors["panoptic"](outputs, target_sizes, orig_target_sizes) for i, target in enumerate(targets): image_id = target["image_id"].item() file_name = f"{image_id:012d}.png" res_pano[i]["image_id"] = image_id res_pano[i]["file_name"] = file_name panoptic_evaluator.update(res_pano) writer.add_scalar('test_metric/FN', sum(FNs) / sum(TAR), global_step=epoch) writer.add_scalar('test_metric/FP', sum(FPs) / sum(TAR), global_step=epoch) writer.add_scalar('test_metric/TP', sum(TPs) / sum(TAR), global_step=epoch) if len(torch.cat(AVGs)) > 0: writer.add_scalar('test_metric/avg_dist', torch.cat(AVGs).mean(), global_step=epoch) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) if coco_evaluator is not None: coco_evaluator.synchronize_between_processes() if panoptic_evaluator is not None: panoptic_evaluator.synchronize_between_processes() # accumulate predictions from all images if coco_evaluator is not None: coco_evaluator.accumulate() coco_evaluator.summarize() panoptic_res = None if panoptic_evaluator is not None: panoptic_res = panoptic_evaluator.summarize() stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} if coco_evaluator is not None: if 'bbox' in postprocessors.keys(): stats['coco_eval_bbox'] = coco_evaluator.coco_eval[ 'bbox'].stats.tolist() if 'segm' in postprocessors.keys(): stats['coco_eval_masks'] = coco_evaluator.coco_eval[ 'segm'].stats.tolist() if panoptic_res is not None: stats['PQ_all'] = panoptic_res["All"] stats['PQ_th'] = panoptic_res["Things"] stats['PQ_st'] = panoptic_res["Stuff"] return stats, coco_evaluator
def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, data_loader: Iterable, optimizer: torch.optim.Optimizer, device: torch.device, epoch: int, max_norm: float = 0): model.train() criterion.train() # 这里用到了一个类 MetricLogger(位于 detr/util/misc.py),它主要用于log输出, # 其中使用了一个defaultdict来记录各种数据的历史值,这些数据为 SmoothValue(位 # 于 detr/util/misc.py) 类型,该类型通过指定的窗口大小(window_size) # 来存储数据的历史步长(比如1就代表不存储历史记录,每次新的值都会覆盖旧的),并且 # 可以格式化输出。另外 SmoothValue 还实现了统计中位数、均值等方法,并且能够在各 # 进程间同步数据。MetricLogger 除了通过key来存储SmoothValue以外,最重要的就是其 # 实现了一个log_every的方法,这个方法是一个生成器,用于将每个batch的数据取出(yeild), # 然后该方法内部会暂停在此处,待模型训练完一次迭代后再执行剩下的内容,进行各项统计, # 然后再yeild下一个batch的数据,暂停在那里,以此重复,直至所有batch都训练完。这种方 # 式在其它项目中比较少见,感兴趣的炼丹者们可以一试,找些新鲜感 metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) metric_logger.add_meter( 'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Epoch: [{}]'.format(epoch) print_freq = 10 for samples, targets in metric_logger.log_every(data_loader, print_freq, header): samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] outputs = model(samples) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # 各部分loss(如分类loss、回归loss等)的加权和 losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) # reduce losses over all GPUs for logging purposes # 在计算出loss后,若采用了分布式训练,那么就在各个进程间进行同步,默认是总和/进程数量 loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } losses_reduced_scaled = sum(loss_dict_reduced_scaled.values()) loss_value = losses_reduced_scaled.item() # 若梯度溢出了,那么此时会产生梯度爆炸,于是就直接结束训练。 if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) # 为避免梯度爆炸,在训练过程中,对梯度进行裁剪,裁剪方式有很 # 多种,可以直接对梯度值处理,这里的方式是对梯度的范式做截断, # 默认是第二范式,即所有参数的梯度平方和开方后与一个指定的最 # 大值(下图中max_norm)相比,若比起大,则按比例对所有参数的 # 梯度进行缩放。 optimizer.zero_grad() losses.backward() if max_norm > 0: # 对梯度的范式进行截断,默认是第二范式 torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) optimizer.step() metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) # gather the stats from all processes # 将 MetricLogger 统计的各项数据在进程间进行同步,同时返回它们的历史均值 # 这个历史均值:global_avg是SmoothedValue类里的属性方法(用@property修饰),返回的是 # 在各个进程同步后的历史均值。例如对于loss这项数据,在训练过程中被计算了n次,那么历史均 # 值就是这n次的总和在进程间同步除以n metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def evaluate(model, criterion, postprocessors, data_loader, base_ds, device, output_dir): model.eval() criterion.eval() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Test:' iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys()) # ('bbox',) # import ipdb; ipdb.set_trace() coco_evaluator = CocoEvaluator(base_ds, iou_types) # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75] panoptic_evaluator = None if 'panoptic' in postprocessors.keys(): panoptic_evaluator = PanopticEvaluator( data_loader.dataset.ann_file, data_loader.dataset.ann_folder, output_dir=os.path.join(output_dir, "panoptic_eval"), ) for samples, targets in metric_logger.log_every(data_loader, 10, header): samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] # [{"baxes": , "labels":, ] outputs = model(samples) # outputs.keys(): dict_keys(['pred_logits', 'pred_boxes', 'aux_outputs']) # outputs['pred_logits'].size(): torch.Size([2, 100, 92]) # pred_boxes: 2, 100, 4, len(outputs['aux_outputs']): 5 with ('pred_logits', 'pred_boxes') for each loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = {k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict} loss_dict_reduced_unscaled = {f'{k}_unscaled': v for k, v in loss_dict_reduced.items()} metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) #outputs['pred_boxes'][0][0]: tensor([0.5006, 0.4931, 0.4815, 0.5018], device='cuda:0') # results = postprocessors['bbox'](outputs, orig_target_sizes) # 426, 640 # results[0]['boxes'][0]: tensor([166.3250, 103.1957, 474.5013, 316.9653], device='cuda:0') if 'segm' in postprocessors.keys(): target_sizes = torch.stack([t["size"] for t in targets], dim=0) results = postprocessors['segm'](results, outputs, orig_target_sizes, target_sizes) import ipdb; ipdb.set_trace() res = {target['image_id'].item(): output for target, output in zip(targets, results)} # change the loaded targets to predictions if coco_evaluator is not None: coco_evaluator.update(res) if panoptic_evaluator is not None: res_pano = postprocessors["panoptic"](outputs, target_sizes, orig_target_sizes) for i, target in enumerate(targets): image_id = target["image_id"].item() file_name = f"{image_id:012d}.png" res_pano[i]["image_id"] = image_id res_pano[i]["file_name"] = file_name panoptic_evaluator.update(res_pano) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) if coco_evaluator is not None: coco_evaluator.synchronize_between_processes() if panoptic_evaluator is not None: panoptic_evaluator.synchronize_between_processes() # accumulate predictions from all images if coco_evaluator is not None: coco_evaluator.accumulate() coco_evaluator.summarize() panoptic_res = None if panoptic_evaluator is not None: panoptic_res = panoptic_evaluator.summarize() stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} if coco_evaluator is not None: if 'bbox' in postprocessors.keys(): stats['coco_eval_bbox'] = coco_evaluator.coco_eval['bbox'].stats.tolist() if 'segm' in postprocessors.keys(): stats['coco_eval_masks'] = coco_evaluator.coco_eval['segm'].stats.tolist() if panoptic_res is not None: stats['PQ_all'] = panoptic_res["All"] stats['PQ_th'] = panoptic_res["Things"] stats['PQ_st'] = panoptic_res["Stuff"] return stats, coco_evaluator
def evaluate(model, criterion, postprocessors, data_loader, base_ds, device, output_dir, tracker=None): model.eval() criterion.eval() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Test:' iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys()) coco_evaluator = CocoEvaluator(base_ds, iou_types) # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75] panoptic_evaluator = None if 'panoptic' in postprocessors.keys(): panoptic_evaluator = PanopticEvaluator( data_loader.dataset.ann_file, data_loader.dataset.ann_folder, output_dir=os.path.join(output_dir, "panoptic_eval"), ) res_tracks = dict() pre_embed = None for samples, targets in metric_logger.log_every(data_loader, 10, header): # pre process for track. if tracker is not None: assert samples.tensors.shape[ 0] == 1, "Now only support inference of batchsize 1." frame_id = targets[0].get("frame_id", None) assert frame_id is not None frame_id = frame_id.item() if frame_id == 1: tracker.reset_all() pre_embed = None samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] outputs = model(samples) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) results = postprocessors['bbox'](outputs, orig_target_sizes) if 'segm' in postprocessors.keys(): target_sizes = torch.stack([t["size"] for t in targets], dim=0) results = postprocessors['segm'](results, outputs, orig_target_sizes, target_sizes) res = { target['image_id'].item(): output for target, output in zip(targets, results) } # post process for track. if tracker is not None: if frame_id == 1: res_track = tracker.init_track(results[0]) else: res_track = tracker.step(results[0]) res_tracks[targets[0]['image_id'].item()] = res_track if coco_evaluator is not None: coco_evaluator.update(res) if panoptic_evaluator is not None: res_pano = postprocessors["panoptic"](outputs, target_sizes, orig_target_sizes) for i, target in enumerate(targets): image_id = target["image_id"].item() file_name = f"{image_id:012d}.png" res_pano[i]["image_id"] = image_id res_pano[i]["file_name"] = file_name panoptic_evaluator.update(res_pano) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) if coco_evaluator is not None: coco_evaluator.synchronize_between_processes() if panoptic_evaluator is not None: panoptic_evaluator.synchronize_between_processes() # accumulate predictions from all images if coco_evaluator is not None: coco_evaluator.accumulate() coco_evaluator.summarize() panoptic_res = None if panoptic_evaluator is not None: panoptic_res = panoptic_evaluator.summarize() stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} if coco_evaluator is not None: if 'bbox' in postprocessors.keys(): stats['coco_eval_bbox'] = coco_evaluator.coco_eval[ 'bbox'].stats.tolist() if 'segm' in postprocessors.keys(): stats['coco_eval_masks'] = coco_evaluator.coco_eval[ 'segm'].stats.tolist() if panoptic_res is not None: stats['PQ_all'] = panoptic_res["All"] stats['PQ_th'] = panoptic_res["Things"] stats['PQ_st'] = panoptic_res["Stuff"] return stats, coco_evaluator, res_tracks
def evaluate(model, criterion, postprocessors, data_loader, base_ds, device, output_dir, epoch=None): model.eval() criterion.eval() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Test:' iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys()) coco_evaluator = CocoEvaluator(base_ds, iou_types) # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75] panoptic_evaluator = None if 'panoptic' in postprocessors.keys(): panoptic_evaluator = PanopticEvaluator( data_loader.dataset.ann_file, data_loader.dataset.ann_folder, output_dir=os.path.join(output_dir, "panoptic_eval"), ) saved_pickle = {} for index, (samples, targets) in enumerate( metric_logger.log_every(data_loader, 10, header)): samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] outputs = model(samples) # test = dataset_train[32] #[x,y,width,height] logits = torch.argmax(outputs['pred_logits'][0], axis=1) p_boxes = outputs['pred_boxes'][0][logits != 5].cpu() names = logits[logits != 5].cpu().numpy() plotit(samples.tensors[0].cpu(), targets[0]['boxes'].cpu(), targets[0]['boxes'].shape[0] * ['tgt'], "./vis/target/" + str(index) + ".jpg") plotit(samples.tensors[0].cpu(), p_boxes, names, "./vis/vis_output/" + str(index) + ".jpg") saved_dict = { "target_bbox": targets[0]['boxes'].cpu().numpy(), "pred_bbox": p_boxes.numpy(), "target_label": targets[0]['labels'].cpu().numpy(), "pred_label": names, "image_id": targets[0]['image_id'].cpu().numpy() } saved_pickle[index] = saved_dict loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) results = postprocessors['bbox'](outputs, orig_target_sizes) if 'segm' in postprocessors.keys(): target_sizes = torch.stack([t["size"] for t in targets], dim=0) results = postprocessors['segm'](results, outputs, orig_target_sizes, target_sizes) res = { target['image_id'].item(): output for target, output in zip(targets, results) } if coco_evaluator is not None: coco_evaluator.update(res) if panoptic_evaluator is not None: res_pano = postprocessors["panoptic"](outputs, target_sizes, orig_target_sizes) for i, target in enumerate(targets): image_id = target["image_id"].item() file_name = f"{image_id:012d}.png" res_pano[i]["image_id"] = image_id res_pano[i]["file_name"] = file_name panoptic_evaluator.update(res_pano) if epoch != None: saved_result_dir = f"./vis/saved_result_epoch_{epoch}.pkl" with open(saved_result_dir, "wb") as files: pickle.dump(saved_pickle, files) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) if coco_evaluator is not None: coco_evaluator.synchronize_between_processes() if panoptic_evaluator is not None: panoptic_evaluator.synchronize_between_processes() # accumulate predictions from all images if coco_evaluator is not None: coco_evaluator.accumulate() coco_evaluator.summarize() panoptic_res = None if panoptic_evaluator is not None: panoptic_res = panoptic_evaluator.summarize() stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} if coco_evaluator is not None: if 'bbox' in postprocessors.keys(): stats['coco_eval_bbox'] = coco_evaluator.coco_eval[ 'bbox'].stats.tolist() if 'segm' in postprocessors.keys(): stats['coco_eval_masks'] = coco_evaluator.coco_eval[ 'segm'].stats.tolist() if panoptic_res is not None: stats['PQ_all'] = panoptic_res["All"] stats['PQ_th'] = panoptic_res["Things"] stats['PQ_st'] = panoptic_res["Stuff"] return stats, coco_evaluator
def evaluate_nvdata(model, criterion, postprocessors, data_loader, base_ds, device, output_dir): model.eval() criterion.eval() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Test:' iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys()) coco_evaluator = CocoEvaluator(base_ds, iou_types) # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75] # panoptic_evaluator = None # if 'panoptic' in postprocessors.keys(): # panoptic_evaluator = PanopticEvaluator( # data_loader.dataset.ann_file, # data_loader.dataset.ann_folder, # output_dir=os.path.join(output_dir, "panoptic_eval"), # ) for samples, targets in metric_logger.log_every(data_loader, 10, header): samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] outputs = model(samples) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = {k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict} loss_dict_reduced_unscaled = {f'{k}_unscaled': v for k, v in loss_dict_reduced.items()} metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) results = postprocessors['bbox'](outputs, orig_target_sizes) if 'segm' in postprocessors.keys(): target_sizes = torch.stack([t["size"] for t in targets], dim=0) results = postprocessors['segm'](results, outputs, orig_target_sizes, target_sizes) res = {target['image_id'].item(): output for target, output in zip(targets, results)} if coco_evaluator is not None: coco_evaluator.update(res) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) if coco_evaluator is not None: coco_evaluator.synchronize_between_processes() # accumulate predictions from all images if coco_evaluator is not None: coco_evaluator.accumulate() coco_evaluator.summarize() panoptic_res = None stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} if coco_evaluator is not None: if 'bbox' in postprocessors.keys(): stats['coco_eval_bbox'] = coco_evaluator.coco_eval['bbox'].stats.tolist() if 'segm' in postprocessors.keys(): stats['coco_eval_masks'] = coco_evaluator.coco_eval['segm'].stats.tolist() if panoptic_res is not None: stats['PQ_all'] = panoptic_res["All"] stats['PQ_th'] = panoptic_res["Things"] stats['PQ_st'] = panoptic_res["Stuff"] return stats, coco_evaluator
def train_one_epoch(args, model: torch.nn.Module, criterion: torch.nn.Module, dataloader: Iterable, optimizer: torch.optim.Optimizer, device: torch.device, epoch: int, max_norm: float = 0): model.train() criterion.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) metric_logger.add_meter('class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) metric_logger.add_meter('grad_norm', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Epoch: [{}]'.format(epoch) print_freq = 50 for samples, targets, support_images, support_class_ids, support_targets in metric_logger.log_every(dataloader, print_freq, header): # * Sample Support Categories; # * Filters Targets (only keep GTs within support categories); # * Samples Support Images and Targets targets, support_images, support_class_ids, support_targets = \ sample_support_categories(args, targets, support_images, support_class_ids, support_targets) samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] support_images = support_images.to(device) support_class_ids = support_class_ids.to(device) support_targets = [{k: v.to(device) for k, v in t.items()} for t in support_targets] outputs = model(samples, targets=targets, supp_samples=support_images, supp_class_ids=support_class_ids, supp_targets=support_targets) loss_dict = criterion(outputs) weight_dict = criterion.weight_dict losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_unscaled = {f'{k}_unscaled': v for k, v in loss_dict_reduced.items()} loss_dict_reduced_scaled = {k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict} losses_reduced_scaled = sum(loss_dict_reduced_scaled.values()) loss_value = losses_reduced_scaled.item() if not math.isfinite(loss_value): print("Loss is NaN - {}. \nTraining terminated unexpectedly.\n".format(loss_value)) print("loss dict:") print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() if max_norm > 0: grad_total_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) else: grad_total_norm = utils.get_total_grad_norm(model.parameters(), max_norm) optimizer.step() metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) metric_logger.update(grad_norm=grad_total_norm) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) del support_images del support_class_ids del support_targets del samples del targets del outputs del weight_dict del grad_total_norm del loss_value del losses del loss_dict del loss_dict_reduced del loss_dict_reduced_scaled del loss_dict_reduced_unscaled return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
def extract_feature(model, criterion, postprocessors, data_loader, base_ds, device, output_dir): model.eval() criterion.eval() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) header = 'Test:' iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys()) coco_evaluator = CocoEvaluator(base_ds, iou_types) # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75] for samples, targets in metric_logger.log_every(data_loader, 10, header): samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] outputs = model(samples, feature_extraction=True) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } loss_dict_reduced_unscaled = { f'{k}_unscaled': v for k, v in loss_dict_reduced.items() } metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) metric_logger.update(class_error=loss_dict_reduced['class_error']) orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) results = postprocessors['bbox'](outputs, orig_target_sizes) # res = {target['image_id'].item(): output for target, output in zip(targets, results)} for index, target in enumerate(targets): features_file_path = os.path.join( output_dir, '{:09d}.hdf5'.format(target['image_id'].item())) with h5py.File(features_file_path, 'w') as wf: wf.create_dataset( 'features', data=outputs['encoder_features'][index].cpu().numpy()) predicted = wf.create_group('predicted') predicted.create_dataset('scores', data=outputs['pred_logits'] [index].max(-1)[0].cpu().numpy()) predicted.create_dataset('labels', data=outputs['pred_logits'] [index].max(-1)[1].cpu().numpy()) estimated = wf.create_group('estimated') estimated.create_dataset( 'scores', data=results[index]['scores'].cpu().numpy()) estimated.create_dataset( 'labels', data=results[index]['labels'].cpu().numpy()) wf.create_dataset('bboxes', data=results[index]['boxes'].cpu().numpy()) # gather the stats from all processes metric_logger.synchronize_between_processes() return
def evaluate( model, criterion, postprocessors, data_loader, base_ds, device, output_dir, dset_file="coco", ): model.eval() criterion.eval() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( "class_error", utils.SmoothedValue(window_size=1, fmt="{value:.2f}") ) header = "Test:" iou_types = tuple(k for k in ("segm", "bbox") if k in postprocessors.keys()) coco_evaluator = None if dset_file == "coco": coco_evaluator = CocoEvaluator(base_ds, iou_types) if dset_file == "MOT17": mot_res = {} print("DSET Eval", dset_file) # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75] panoptic_evaluator = None if "panoptic" in postprocessors.keys(): panoptic_evaluator = PanopticEvaluator( data_loader.dataset.ann_file, data_loader.dataset.ann_folder, output_dir=os.path.join(output_dir, "panoptic_eval"), ) for samples, targets in metric_logger.log_every(data_loader, 10, header): samples = samples.to(device) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] outputs = model(samples, targets) loss_dict = criterion(outputs, targets) weight_dict = criterion.weight_dict # reduce losses over all GPUs for logging purposes loss_dict_reduced = utils.reduce_dict(loss_dict) loss_dict_reduced_scaled = { k: v * weight_dict[k] for k, v in loss_dict_reduced.items() if k in weight_dict } loss_dict_reduced_unscaled = { f"{k}_unscaled": v for k, v in loss_dict_reduced.items() } metric_logger.update( loss=sum(loss_dict_reduced_scaled.values()), **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled, ) metric_logger.update(class_error=loss_dict_reduced["class_error"]) orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) results = postprocessors["bbox"](outputs, orig_target_sizes) if "segm" in postprocessors.keys(): target_sizes = torch.stack([t["size"] for t in targets], dim=0) results = postprocessors["segm"]( results, outputs, orig_target_sizes, target_sizes ) res = { target["image_id"].item(): output for target, output in zip(targets, results) } if dset_file == "MOT17": mot_res.update({target["image_id"].item(): {'boxes': output["boxes"].cpu(), 'scores': output["scores"].cpu} for target, output in zip(targets, results)}) if coco_evaluator is not None: coco_evaluator.update(res) if panoptic_evaluator is not None: res_pano = postprocessors["panoptic"]( outputs, target_sizes, orig_target_sizes ) for i, target in enumerate(targets): image_id = target["image_id"].item() file_name = f"{image_id:012d}.png" res_pano[i]["image_id"] = image_id res_pano[i]["file_name"] = file_name panoptic_evaluator.update(res_pano) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) if coco_evaluator is not None: coco_evaluator.synchronize_between_processes() if panoptic_evaluator is not None: panoptic_evaluator.synchronize_between_processes() # accumulate predictions from all images if coco_evaluator is not None: coco_evaluator.accumulate() coco_evaluator.summarize() panoptic_res = None if panoptic_evaluator is not None: panoptic_res = panoptic_evaluator.summarize() stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} if dset_file == "MOT17": print("\n \n ### FINAL MOT STATS: ### \n \t") stats.update(base_ds.print_eval(mot_res)) print("\n\n") if coco_evaluator is not None: if "bbox" in postprocessors.keys(): stats["coco_eval_bbox"] = coco_evaluator.coco_eval["bbox"].stats.tolist() if "segm" in postprocessors.keys(): stats["coco_eval_masks"] = coco_evaluator.coco_eval["segm"].stats.tolist() if panoptic_res is not None: stats["PQ_all"] = panoptic_res["All"] stats["PQ_th"] = panoptic_res["Things"] stats["PQ_st"] = panoptic_res["Stuff"] return stats, coco_evaluator