def main(args): try: if not os.path.isfile(args.checkpoint): print('ERROR: Checkpoint file "%s" not found' % args.checkpoint) print( 'Maybe you forgot to download pretraind models? Try running:') print('bash scripts/download_models.sh') return if not os.path.isdir(args.output_dir): print('Output directory "%s" does not exist; creating it' % args.output_dir) os.makedirs(args.output_dir) if args.device == 'cpu': device = torch.device('cpu') elif args.device == 'gpu': device = torch.device('cuda:0') if not torch.cuda.is_available(): print('WARNING: CUDA not available; falling back to CPU') device = torch.device('cpu') # Load the model, with a bit of care in case there are no GPUs map_location = 'cpu' if device == torch.device('cpu') else None checkpoint = torch.load(args.checkpoint, map_location=map_location) model = Sg2ImModel(**checkpoint['model_kwargs']) model.load_state_dict(checkpoint['model_state']) model.eval() model.to(device) # Load the scene graphs scene_graphs = args.scene_graphs_json # with open(args.scene_graphs_json, 'r') as f: # scene_graphs = json.load(f) print(type(scene_graphs)) print('Loaded graph!') # Run the model forward with torch.no_grad(): imgs, boxes_pred, masks_pred, _ = model.forward_json(scene_graphs) imgs = imagenet_deprocess_batch(imgs) # Save the generated images for i in range(imgs.shape[0]): img_np = imgs[i].numpy().transpose(1, 2, 0) img_path = os.path.join(args.output_dir, 'img' + args.id + '.png') imwrite(img_path, img_np) print('Drawing now!') # Draw the scene graphs if args.draw_scene_graphs == 1: for i, sg in enumerate(scene_graphs): sg_img = vis.draw_scene_graph(sg['objects'], sg['relationships']) sg_img_path = os.path.join(args.output_dir, 'sg' + args.id + '.png' % i) imwrite(sg_img_path, sg_img) return True except (): return False
def main(args): if not os.path.isfile(args.checkpoint): print('ERROR: Checkpoint file "%s" not found' % args.checkpoint) print('Maybe you forgot to download pretraind models? Try running:') print('bash scripts/download_models.sh') return if not os.path.isdir(args.output_dir): print('Output directory "%s" does not exist; creating it' % args.output_dir) os.makedirs(args.output_dir) if args.device == 'cpu': device = torch.device('cpu') elif args.device == 'gpu': device = torch.device('cuda:0') if not torch.cuda.is_available(): print('WARNING: CUDA not available; falling back to CPU') device = torch.device('cpu') # Load the model, with a bit of care in case there are no GPUs map_location = 'cpu' if device == torch.device('cpu') else None checkpoint = torch.load(args.checkpoint, map_location=map_location) model = Sg2ImModel(**checkpoint['model_kwargs']) model.load_state_dict(checkpoint['model_state'], strict=False) model.eval() model.to(device) # Load the scene graphs with open(args.scene_graphs_json, 'r') as f: scene_graphs = json.load(f) # Run the model forward with torch.no_grad(): # imgs, boxes_pred, masks_pred, _ = model.forward_json(scene_graphs) imgs, boxes_pred, masks_pred, objs, layout, layout_boxes_t, layout_masks, obj_to_img, sg_context_pred, _, _ = model.forward_json(scene_graphs) imgs = imagenet_deprocess_batch(imgs) layout_boxes = layout_boxes_t.numpy() np_imgs = [] # Save the generated images import numpy as np for i in range(imgs.shape[0]): # img_np = imgs[i].numpy().transpose(1, 2, 0) img_np = (imgs[i].numpy().transpose(1, 2, 0) * 255.0).astype(np.uint8) img_path = os.path.join(args.output_dir, 'img%06d.png' % i) imwrite(img_path, img_np) np_imgs.append(img_np) # Draw the scene graphs if args.draw_scene_graphs == 1: for i, sg in enumerate(scene_graphs): sg_img = vis.draw_scene_graph(sg['objects'], sg['relationships']) sg_img_path = os.path.join(args.output_dir, 'sg%06d.png' % i) imwrite(sg_img_path, sg_img)
def generate_img(objs, triples, model): ''' takes scene graph, returns generated img ''' O = objs.size(0) obj_to_img = torch.LongTensor(O).fill_(0) obj_to_img = obj_to_img.cuda() objs = objs.cuda() triples = triples.cuda() with torch.no_grad(): model_out = model( objs, triples, obj_to_img) #, boxes_gt=model_boxes, masks_gt=model_masks) imgs, boxes_pred, masks_pred, predicate_scores = model_out imgs = imagenet_deprocess_batch(imgs) return imgs
def check_model(args, t, loader, model, logger=None, log_tag='', write_images=False): float_dtype = torch.cuda.FloatTensor long_dtype = torch.cuda.LongTensor num_samples = 0 all_losses = defaultdict(list) total_iou = 0 total_boxes = 0 with torch.no_grad(): for batch in loader: batch = [tensor.cuda() for tensor in batch] masks = None if len(batch) == 6: imgs, objs, boxes, triples, obj_to_img, triple_to_img = batch elif len(batch) == 7: imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = batch predicates = triples[:, 1] # Run the model as it has been run during training model_masks = masks model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=model_masks) # imgs_pred, boxes_pred, masks_pred, predicate_scores = model_out imgs_pred, boxes_pred, masks_pred, objs_vec, layout, layout_boxes, layout_masks, obj_to_img, sg_context_pred, sg_context_pred_d, predicate_scores = model_out skip_pixel_loss = False skip_perceptual_loss = False total_loss, losses = calculate_model_losses( args, skip_pixel_loss, model, imgs, imgs_pred, boxes, boxes_pred, masks, masks_pred, predicates, predicate_scores, skip_perceptual_loss) total_iou += jaccard(boxes_pred, boxes) total_boxes += boxes_pred.size(0) for loss_name, loss_val in losses.items(): all_losses[loss_name].append(loss_val) num_samples += imgs.size(0) if num_samples >= args.num_val_samples: break samples = {} samples['gt_img'] = imgs model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=masks) samples['gt_box_gt_mask'] = model_out[0] model_out = model(objs, triples, obj_to_img, boxes_gt=boxes) samples['gt_box_pred_mask'] = model_out[0] model_out = model(objs, triples, obj_to_img) samples['pred_box_pred_mask'] = model_out[0] for k, v in samples.items(): samples[k] = imagenet_deprocess_batch(v) if logger is not None and write_images: #3. Log ground truth and predicted images with torch.no_grad(): p_imgs = samples['gt_box_gt_mask'].detach() gt_imgs = samples['gt_img'].detach() p_gbox_pmsk_img = samples['gt_box_pred_mask'] p_test_imgs = samples['pred_box_pred_mask'] np_gt_imgs = [ gt.cpu().numpy().transpose(1, 2, 0) for gt in gt_imgs ] np_pred_imgs = [ pred.cpu().numpy().transpose(1, 2, 0) for pred in p_imgs ] np_gbox_pmsk_imgs = [ pred.cpu().numpy().transpose(1, 2, 0) for pred in p_gbox_pmsk_img ] np_test_pred_imgs = [ pred.cpu().numpy().transpose(1, 2, 0) for pred in p_test_imgs ] np_all_imgs = [] for gt_img, gtb_gtm_img, gtb_pm_img, pred_img in zip( np_gt_imgs, np_pred_imgs, np_gbox_pmsk_imgs, np_test_pred_imgs): np_all_imgs.append((gt_img * 255.0).astype(np.uint8)) np_all_imgs.append((gtb_gtm_img * 255.0).astype(np.uint8)) np_all_imgs.append((gtb_pm_img * 255.0).astype(np.uint8)) np_all_imgs.append((pred_img * 255.0).astype(np.uint8)) logger.image_summary(log_tag, np_all_imgs, t) ######################################################################### mean_losses = {k: np.mean(v) for k, v in all_losses.items()} avg_iou = total_iou / total_boxes masks_to_store = masks if masks_to_store is not None: masks_to_store = masks_to_store.data.cpu().clone() masks_pred_to_store = masks_pred if masks_pred_to_store is not None: masks_pred_to_store = masks_pred_to_store.data.cpu().clone() batch_data = { 'objs': objs.detach().cpu().clone(), 'boxes_gt': boxes.detach().cpu().clone(), 'masks_gt': masks_to_store, 'triples': triples.detach().cpu().clone(), 'obj_to_img': obj_to_img.detach().cpu().clone(), 'triple_to_img': triple_to_img.detach().cpu().clone(), 'boxes_pred': boxes_pred.detach().cpu().clone(), 'masks_pred': masks_pred_to_store } out = [mean_losses, samples, batch_data, avg_iou] return tuple(out)
def check_model(args, t, loader, model, logger=None, log_tag='', write_images=False): # float_dtype = torch.cuda.FloatTensor # long_dtype = torch.cuda.LongTensor float_dtype = torch.FloatTensor long_dtype = torch.LongTensor num_samples = 0 all_losses = defaultdict(list) total_iou = 0 total_boxes = 0 ################### if not os.path.isdir(args.output_dir): os.mkdir(args.output_dir) print('Created %s' % args.output_dir) img_dir = args.output_dir + '/img_dir' if not os.path.isdir(img_dir): os.mkdir(img_dir) print('Created %s' % img_dir) ################## t = 0 t1 = 0 # relationship (triplet) database triplet_db = dict() # iterate over all batches of images with torch.no_grad(): o_start = o_end = 0 t_start = t_end = 0 last_o_idx = last_t_idx = 0 for batch in loader: #batch = [tensor.cuda() for tensor in batch] batch = [tensor for tensor in batch] masks = None if len(batch) == 6: imgs, objs, boxes, triples, obj_to_img, triple_to_img = batch elif len(batch) == 7: imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = batch # Run the model as it has been run during training model_masks = masks model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=model_masks) # layout_boxes = GT boxes (?), (masks_pred, layout_masks (GT masks) ) = None (because VG) imgs_pred, boxes_pred, masks_pred, objs_vec, layout, layout_boxes, layout_masks, obj_to_img, sg_context_pred, sg_context_pred_d, predicate_scores, obj_embeddings, pred_embeddings = model_out # only info used from model output (for now) is obj/pred embeddings # using GT bounding boxes in database for patch extraction # detach() any outputs from network: detaches from any stored graph data relevant to NN obj_embeddings = obj_embeddings.detach() pred_embeddings = pred_embeddings.detach() num_batch_samples = imgs.size(0) num_samples += num_batch_samples if num_samples >= args.num_val_samples: break super_boxes = [] file_path = os.path.join(img_dir, 'all_batch_triplets.txt') f = open(file_path, 'w') for i in range(0, num_batch_samples): print('Processing image', i + 1, 'of batch size', args.batch_size) f.write('---------- image ' + str(i) + '----------\n') # from batch: objs, triples, triple_to_img, objs_to_img (need indices in that to select to tie triplets to image) # from model: obj_embed, pred_embed # find all triple indices for specific image tr_index = np.where(triple_to_img.numpy() == i) # all triples for image i tr_img = triples.numpy()[tr_index, :] tr_img = np.squeeze(tr_img, axis=0) # vocab['object_idx_to_name'], vocab['pred_idx_to_name'] # s,o: indices for "objs" array (yields 'object_idx' for 'object_idx_to_name') # p: use this value as is (yields 'pred_idx' for 'pred_idx_to_name') s, p, o = np.squeeze(np.split(tr_img, 3, axis=1)) # iterate over all triplets in image to form (subject, predicat, object) tuples relationship_data = [] num_triples = len(tr_img) for n in range(0, num_triples): # tuple = (objs[obj_index], p, objs[subj_index]) subj_index = s[n] subj = np.array( model.vocab['object_idx_to_name'])[objs[subj_index]] pred = np.array(model.vocab['pred_idx_to_name'])[p[n]] obj_index = o[n] obj = np.array( model.vocab['object_idx_to_name'])[objs[obj_index]] triplet = tuple([subj, pred, obj]) relationship_data += [tuple([subj, pred, obj])] f.write( '(' + db_utils.tuple_to_string(tuple([subj, pred, obj])) + ')\n') # GT bounding boxes: (x0, y0, x1, y1) format, in a [0, 1] coordinate system # (from "boxes" (one for each object in "objs") using subj_index and obj_index) subj_bbox = boxes[subj_index].numpy().tolist( ) # list(..) won't work here obj_bbox = boxes[obj_index].numpy().tolist() print(tuple([subj, pred, obj]), subj_bbox, obj_bbox) # SG GCNN embeddings to be used for search (nth triplet corresponds to nth embedding) subj_embed = obj_embeddings[subj_index].numpy().tolist() pred_embed = pred_embeddings[n].numpy().tolist() obj_embed = obj_embeddings[obj_index].numpy().tolist() pooled_embed = subj_embed + pred_embed + obj_embed # add relationship to database relationship = dict() relationship['subject'] = subj relationship['predicate'] = pred relationship['object'] = obj relationship['subject_bbox'] = subj_bbox relationship['object_bbox'] = obj_bbox # get super box min_x = np.min([subj_bbox[0], obj_bbox[0]]) min_y = np.min([subj_bbox[1], obj_bbox[1]]) max_x = np.max([subj_bbox[2], obj_bbox[2]]) max_y = np.max([subj_bbox[3], obj_bbox[3]]) relationship['super_bbox'] = [min_x, min_y, max_x, max_y] super_boxes += [relationship['super_bbox']] #relationship['subject_embed'] = subj_embed #relationship['predicate_embed'] = pred_embed #relationship['object_embed'] = obj_embed relationship['embed'] = pooled_embed if triplet not in triplet_db: triplet_db[db_utils.tuple_to_string(triplet)] = [ relationship ] elif triplet in triplet_db: triplet_db[db_utils.tuple_to_string(triplet)] += [ relationship ] #pprint.pprint(triplet_db) #pdb.set_trace() print('---------------------------------') #pprint.pprint(relationship_data) #pprint.pprint(triplet_db) # printed per image iteration print( '------- end of processing for image --------------------------' ) ####### process batch images by visualizing triplets on all ######### f.close() # measure IoU as a basic metric for bbox prediction total_iou += jaccard(boxes_pred, boxes) total_boxes += boxes_pred.size(0) # detach imgs = imgs.detach() #if imgs_pred is not None: # imgs_pred = imgs_pred.detach() boxes_pred = boxes_pred.detach() # deprocess (normalize) images samples = {} samples['gt_imgs'] = imgs #if imgs_pred is not None: # samples['pred_imgs'] = imgs_pred for k, v in samples.items(): samples[k] = imagenet_deprocess_batch(v) # GT images np_imgs = [gt.cpu().numpy().transpose(1, 2, 0) for gt in imgs] # predicted images #np_pred_imgs = [p.cpu().numpy().transpose(1,2,0) for p in imgs_pred] # visualize predicted boxes/images # (output image is always 64x64 based upon how current model is trained) pred_overlaid_images = vis.overlay_boxes(np_imgs, model.vocab, objs_vec, boxes_pred, obj_to_img, W=256, H=256) # visualize GT boxes/images #overlaid_images = vis.overlay_boxes(np_imgs, model.vocab, objs_vec, boxes, obj_to_img, W=64, H=64) overlaid_images = vis.overlay_boxes(np_imgs, model.vocab, objs_vec, boxes, obj_to_img, W=256, H=256) # triples to image print(triple_to_img) print(torch.tensor(super_boxes)) #pdb.set_trace() # visualize suberboxes with object boxes underneath norm_overlaid_images = [i / 255.0 for i in overlaid_images] sb_overlaid_images = vis.overlay_boxes(norm_overlaid_images, model.vocab, objs_vec, torch.tensor(super_boxes), triple_to_img, W=256, H=256, drawText=False, drawSuperbox=True) import matplotlib.pyplot as plt print("---- saving first GT image of batch -----") img_gt = np_imgs[0] #plt.imshow(img_gt) # can visualize [0-1] or [0-255] color scaling #plt.show() imwrite('./test_GT_img_vg.png', img_gt) print("---- saving first predicted image of batch -----") #img_np = np_pred_imgs[0] #plt.imshow(img_np) #plt.show() #imwrite('./test_pred_img.png', img_np) print("---- saving first overlay image of batch -----") imwrite('./test_overlay_img_vg.png', overlaid_images[0]) #plt.imshow(overlaid_images[0]) #plt.show() print("---- saving first overlay image of batch -----") imwrite('./test_sb_overlay_img_vg.png', sb_overlaid_images[0]) #plt.imshow(sb_overlaid_images[0]) #plt.show() print("---- saving batch images -----") t = 0 for gt_img, pred_overlaid_img, overlaid_img, sb_overlaid_img in zip( np_imgs, pred_overlaid_images, overlaid_images, sb_overlaid_images): img_path = os.path.join(img_dir, '%06d_gt_img.png' % t) imwrite(img_path, gt_img) img_path = os.path.join(img_dir, '%06d_pred_bbox.png' % t) imwrite(img_path, pred_overlaid_img) img_path = os.path.join(img_dir, '%06d_gt_bbox_img.png' % t) imwrite(img_path, overlaid_img) img_path = os.path.join(img_dir, '%06d_gt_superbox_img.png' % t) imwrite(img_path, sb_overlaid_img) t += 1 #pdb.set_trace() # write database to JSON file db_utils.write_to_JSON(triplet_db, "vg_test_db.json") ###### inside batch processing loop #### #samples = {} #>>>samples['gt_img'] = imgs #model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=masks) #samples['gt_box_gt_mask'] = model_out[0] #model_out = model(objs, triples, obj_to_img, boxes_gt=boxes) #samples['gt_box_pred_mask'] = model_out[0] #model_out = model(objs, triples, obj_to_img) #samples['pred_box_pred_mask'] = model_out[0] #layout_preds = {} #layout_preds['pred_boxes'] = model_out[5] #layout_preds['pred_masks'] = model_out[6] #for k, v in samples.items(): # samples[k] = imagenet_deprocess_batch(v) #if write_images: #3. Log ground truth and predicted images #with torch.no_grad(): #>>> gt_imgs = samples['gt_img'].detach() # p_gbox_pmsk_img = samples['gt_box_pred_mask'].detach() # p_test_imgs = samples['pred_box_pred_mask'].detach() # # p_test_boxes = layout_preds['pred_boxes'] # p_test_masks = layout_preds['pred_masks'] #>>>np_gt_imgs = [gt.cpu().numpy().transpose(1,2,0) for gt in gt_imgs] #np_gbox_pmsk_imgs = [pred.cpu().numpy().transpose(1,2,0) for pred in p_gbox_pmsk_img] #np_test_pred_imgs = [pred.cpu().numpy().transpose(1,2,0) for pred in p_test_imgs] #pred_layout_boxes = p_test_boxes #pred_layout_masks = p_test_masks #np_all_imgs = [] # Overlay box on images ####pred_layout_boxes_t = pred_layout_boxes.detach() # overlaid_images = vis.overlay_boxes(np_test_pred_imgs, model.vocab, objs_vec, layout_boxes_t, obj_to_img, W=64, H=64) ####overlaid_images = vis.overlay_boxes(np_test_pred_imgs, model.vocab, objs_vec, pred_layout_boxes_t, obj_to_img, W=64, H=64) # # # draw the layout # layouts_gt = vis.debug_layout_mask(model.vocab, objs_vec, layout_boxes, layout_masks, obj_to_img, W=128, H=128) # layouts_pred = vis.debug_layout_mask(model.vocab, objs_vec, pred_layout_boxes, pred_layout_masks, obj_to_img, W=128, H=128) ###for gt_img, gtb_pm_img, pred_img, overlaid in zip(np_gt_imgs, np_gbox_pmsk_imgs, np_test_pred_imgs, overlaid_images): # for gt_img, gtb_gtm_img, gtb_pm_img, pred_img, gt_layout_img, pred_layout_img, overlaid in zip(np_gt_imgs, np_pred_imgs, np_gbox_pmsk_imgs, np_test_pred_imgs, layouts_gt, layouts_pred, overlaid_images): # img_path = os.path.join(img_dir, '%06d_gt_img.png' % t) # imwrite(img_path, gt_img) # img_path = os.path.join(img_dir, '%06d_gtb_pm_img.png' % t) # imwrite(img_path, gtb_pm_img) # img_path = os.path.join(img_dir, '%06d_pred_img.png' % t) # imwrite(img_path, pred_img) # overlaid_path = os.path.join(img_dir, '%06d_overlaid.png' % t) # imwrite(overlaid_path, overlaid) # t=t+1 #total_iou += jaccard(boxes_pred, boxes) #total_boxes += boxes_pred.size(0) ## Draw scene graph #tot_obj = 0 #for b_t in range(imgs.size(0)): # sg_objs = objs[obj_to_img==b_t] # sg_rels = triples[triple_to_img==b_t] # sg_img = vis.draw_scene_graph_temp(sg_objs, sg_rels, tot_obj, vocab=model.vocab) # sg_img_path = os.path.join(img_dir, '%06d_sg.png' % t1) # imwrite(sg_img_path, sg_img) # tot_obj = tot_obj + len(sg_objs) #.size(0) # t1 = t1+1 # for gt_img, gtb_gtm_img, gtb_pm_img, pred_img in zip(np_gt_imgs, np_pred_imgs, np_gbox_pmsk_imgs, np_test_pred_imgs): # np_all_imgs.append((gt_img * 255.0).astype(np.uint8)) # np_all_imgs.append((gtb_gtm_img * 255.0).astype(np.uint8)) # np_all_imgs.append((gtb_pm_img * 255.0).astype(np.uint8)) # np_all_imgs.append((pred_img * 255.0).astype(np.uint8)) # logger.image_summary(log_tag, np_all_imgs, t) ######################################################################### masks_to_store = masks if masks_to_store is not None: masks_to_store = masks_to_store.data.cpu().clone() masks_pred_to_store = masks_pred if masks_pred_to_store is not None: masks_pred_to_store = masks_pred_to_store.data.cpu().clone() #batch_data = { # 'objs': objs.detach().cpu().clone(), # 'boxes_gt': boxes.detach().cpu().clone(), # 'masks_gt': masks_to_store, # 'triples': triples.detach().cpu().clone(), # 'obj_to_img': obj_to_img.detach().cpu().clone(), # 'triple_to_img': triple_to_img.detach().cpu().clone(), # 'boxes_pred': boxes_pred.detach().cpu().clone(), # 'masks_pred': masks_pred_to_store #} #out = [mean_losses, samples, batch_data, avg_iou] out = [samples] #################### avg_iou = total_iou / total_boxes avg_iou print('average bbox IoU = ', avg_iou.numpy()) ################### return tuple(out)
def check_model(args, t_2, loader, model, device, logger=None, log_tag='', write_images=False): # float_dtype = torch.cuda.FloatTensor # long_dtype = torch.cuda.LongTensor if device == torch.device('cpu'): float_dtype = torch.FloatTensor long_dtype = torch.LongTensor else: float_dtype = torch.cuda.FloatTensor long_dtype = torch.cuda.LongTensor num_samples = 0 all_losses = defaultdict(list) total_iou = 0 total_boxes = 0 draw_scene_graph = True sane_sg_im_list = [ 0, 4, 9, 18, 22, 30, 32, 42, 54, 58, 63, 66, 69, 74, 76, 80, 82, 88, 96, 100, 101, 107, 111, 116, 136, 147, 168, 170, 175, 195, 196, 204, 211, 214, 218, 234, 241, 244, 246, 261, 262, 268, 288, 291, 313, 339, 343, 369, 374, 376, 402, 407, 410, 422, 425, 431, 440, 455, 461, 463, 465, 467, 468, 471, 474, 489, 493 ] ################### if not os.path.isdir(args.output_dir): os.mkdir(args.output_dir) print('Created %s' % args.output_dir) img_dir = args.output_dir + '/img_dir' if not os.path.isdir(img_dir): os.mkdir(img_dir) print('Created %s' % img_dir) ################## t = 0 t1 = 0 bt = 0 with torch.no_grad(): o_start = o_end = 0 t_start = t_end = 0 last_o_idx = last_t_idx = 0 for batch in loader: #batch = [tensor.cuda() for tensor in batch] batch = [tensor for tensor in batch] masks = None if len(batch) == 6: imgs, objs, boxes, triples, obj_to_img, triple_to_img = batch elif len(batch) == 7: imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = batch predicates = triples[:, 1] objs = objs.detach() triples = triples.detach() # Run the model as it has been run during training model_masks = masks model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=model_masks) # imgs_pred, boxes_pred, masks_pred, predicate_scores = model_out imgs_pred, boxes_pred, masks_pred, objs_vec, layout, layout_boxes, layout_masks, obj_to_img, sg_context_pred, sg_context_pred_d, predicate_scores = model_out num_samples += imgs.size(0) if num_samples >= args.num_val_samples: break samples = {} samples['gt_img'] = imgs total_iou += jaccard(boxes_pred, boxes) total_boxes += boxes_pred.size(0) if write_images: model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=masks) samples['gt_box_gt_mask'] = model_out[0] model_out = model(objs, triples, obj_to_img, boxes_gt=boxes) samples['gt_box_pred_mask'] = model_out[0] ############################################## # import pdb # pdb.set_trace() # num_boxes=len(boxes) # model_out = model(objs, triples, obj_to_img, boxes_gt=scaled_boxes) # samples['gt_scaled_box_pred_mask'] = model_out[0] ############################################## model_out = model(objs, triples, obj_to_img) samples['pred_box_pred_mask'] = model_out[0] layout_preds = {} layout_preds['pred_boxes'] = model_out[5] layout_preds['pred_masks'] = model_out[6] for k, v in samples.items(): samples[k] = imagenet_deprocess_batch(v) #3. Log ground truth and predicted images with torch.no_grad(): gt_imgs = samples['gt_img'].detach() p_gbox_pmsk_img = samples['gt_box_pred_mask'].detach() p_test_imgs = samples['pred_box_pred_mask'].detach() p_test_boxes = layout_preds['pred_boxes'] p_test_masks = layout_preds['pred_masks'] np_gt_imgs = [ gt.cpu().numpy().transpose(1, 2, 0) for gt in gt_imgs ] np_gbox_pmsk_imgs = [ pred.cpu().numpy().transpose(1, 2, 0) for pred in p_gbox_pmsk_img ] np_test_pred_imgs = [ pred.cpu().numpy().transpose(1, 2, 0) for pred in p_test_imgs ] pred_layout_boxes = p_test_boxes pred_layout_masks = p_test_masks np_all_imgs = [] # Overlay box on images pred_layout_boxes_t = pred_layout_boxes.detach() # overlaid_images = vis.overlay_boxes(np_test_pred_imgs, model.vocab, objs_vec, layout_boxes_t, obj_to_img, W=64, H=64) overlaid_images = vis.overlay_boxes(np_test_pred_imgs, model.vocab, objs_vec, pred_layout_boxes_t, obj_to_img, W=64, H=64) # # # draw the layout # layouts_gt = vis.debug_layout_mask(model.vocab, objs_vec, layout_boxes, layout_masks, obj_to_img, W=128, H=128) # layouts_pred = vis.debug_layout_mask(model.vocab, objs_vec, pred_layout_boxes, pred_layout_masks, obj_to_img, W=128, H=128) for gt_img, gtb_pm_img, pred_img, overlaid in zip( np_gt_imgs, np_gbox_pmsk_imgs, np_test_pred_imgs, overlaid_images): # for gt_img, gtb_gtm_img, gtb_pm_img, pred_img, gt_layout_img, pred_layout_img, overlaid in zip(np_gt_imgs, np_pred_imgs, np_gbox_pmsk_imgs, np_test_pred_imgs, layouts_gt, layouts_pred, overlaid_images): img_path = os.path.join(img_dir, '%06d_gt_img.png' % t) imwrite(img_path, gt_img) img_path = os.path.join(img_dir, '%06d_gtb_pm_img.png' % t) imwrite(img_path, gtb_pm_img) img_path = os.path.join(img_dir, '%06d_pred_img.png' % t) imwrite(img_path, pred_img) overlaid_path = os.path.join(img_dir, '%06d_overlaid.png' % t) imwrite(overlaid_path, overlaid) t = t + 1 print(t) ## Draw scene graph tot_obj = 0 for b_t in range(imgs.size(0)): sg_objs = objs[obj_to_img == b_t] sg_rels = triples[triple_to_img == b_t] if draw_scene_graph == True: sg_img = vis.draw_scene_graph_temp(sg_objs, sg_rels, tot_obj, vocab=model.vocab) sg_img_path = os.path.join(img_dir, '%06d_sg.png' % t1) imwrite(sg_img_path, sg_img) if t1 in sane_sg_im_list: vis.draw_scene_graph_json(t1, sg_objs, sg_rels, tot_obj, vocab=model.vocab) # sg_img = vis.draw_scene_graph(sg_objs, sg_rels, vocab=model.vocab) tot_obj = tot_obj + len(sg_objs) t1 = t1 + 1 # mean_losses = {k: np.mean(v) for k, v in all_losses.items()} # avg_iou = total_iou / total_boxes masks_to_store = masks if masks_to_store is not None: masks_to_store = masks_to_store.data.cpu().clone() masks_pred_to_store = masks_pred if masks_pred_to_store is not None: masks_pred_to_store = masks_pred_to_store.data.cpu().clone() batch_data = { 'objs': objs.detach().cpu().clone(), 'boxes_gt': boxes.detach().cpu().clone(), 'masks_gt': masks_to_store, 'triples': triples.detach().cpu().clone(), 'obj_to_img': obj_to_img.detach().cpu().clone(), 'triple_to_img': triple_to_img.detach().cpu().clone(), 'boxes_pred': boxes_pred.detach().cpu().clone(), 'masks_pred': masks_pred_to_store } #out = [mean_losses, samples, batch_data, avg_iou] out = [samples] #################### avg_iou = total_iou / total_boxes print(avg_iou.numpy()) ################### return tuple(out)
def main(args): if not os.path.isfile(args.checkpoint): print('ERROR: Checkpoint file "%s" not found' % args.checkpoint) print('Maybe you forgot to download pretraind models? Try running:') print('bash scripts/download_models.sh') return if not os.path.isdir(args.output_dir): print('Output directory "%s" does not exist; creating it' % args.output_dir) os.makedirs(args.output_dir) if args.device == 'cpu': device = torch.device('cpu') elif args.device == 'gpu': device = torch.device('cuda:0') if not torch.cuda.is_available(): print('WARNING: CUDA not available; falling back to CPU') device = torch.device('cpu') # Load the model, with a bit of care in case there are no GPUs map_location = 'cpu' if device == torch.device('cpu') else None checkpoint = torch.load(args.checkpoint, map_location=map_location) model = Sg2ImModel(**checkpoint['model_kwargs']) model.load_state_dict(checkpoint['model_state']) model.eval() model.to(device) SCENE_GRAPH_DIR = args.scene_graph_dir scene_graphs = [] # Load the scene graphs for filename in os.listdir(SCENE_GRAPH_DIR): print("opening file: {}".format(filename)) with open(os.path.join(SCENE_GRAPH_DIR, filename), 'r') as f: sg = json.load(f) scene_graphs.append(sg) for sg_idx, sg in enumerate(scene_graphs): # Run the model forward with torch.no_grad(): try: imgs, boxes_pred, masks_pred, _ = model.forward_json(sg) except ValueError as err: print("ValueError: {}".format(err)) continue imgs = imagenet_deprocess_batch(imgs) # Save the generated images for i in range(imgs.shape[0]): img_np = imgs[i].numpy().transpose(1, 2, 0) img_path = os.path.join(args.output_dir, 'img%06d.png' % sg_idx) imwrite(img_path, img_np) # Draw the scene graphs if args.draw_scene_graphs == 1: for i, sg_ in enumerate(sg): sg_img = vis.draw_scene_graph(sg_['objects'], sg_['relationships']) sg_img_path = os.path.join(args.output_dir, 'sg%06d.png' % sg_idx) imwrite(sg_img_path, sg_img)
def check_model(args, t, loader, model, logger=None, log_tag='', write_images=False): # float_dtype = torch.cuda.FloatTensor # long_dtype = torch.cuda.LongTensor float_dtype = torch.FloatTensor long_dtype = torch.LongTensor num_samples = 0 all_losses = defaultdict(list) total_iou = 0 total_boxes = 0 ################### if not os.path.isdir(args.output_dir): os.mkdir(args.output_dir) print('Created %s' % args.output_dir) img_dir = args.output_dir + '/img_dir' if not os.path.isdir(img_dir): os.mkdir(img_dir) print('Created %s' % img_dir) ################## t = 0 t1 = 0 with torch.no_grad(): o_start = o_end = 0 t_start = t_end = 0 last_o_idx = last_t_idx = 0 for batch in loader: #batch = [tensor.cuda() for tensor in batch] batch = [tensor for tensor in batch] masks = None if len(batch) == 6: imgs, objs, boxes, triples, obj_to_img, triple_to_img = batch elif len(batch) == 7: imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = batch predicates = triples[:, 1] ############################ pdb.set_trace() ############################ objs = objs.detach() triples = triples.detach() # Run the model as it has been run during training model_masks = masks model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=model_masks) # imgs_pred, boxes_pred, masks_pred, predicate_scores = model_out imgs_pred, boxes_pred, masks_pred, objs_vec, layout, layout_boxes, layout_masks, obj_to_img, sg_context_pred, sg_context_pred_d, predicate_scores = model_out num_samples += imgs.size(0) if num_samples >= args.num_val_samples: break samples = {} samples['gt_img'] = imgs model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=masks) samples['gt_box_gt_mask'] = model_out[0] model_out = model(objs, triples, obj_to_img, boxes_gt=boxes) samples['gt_box_pred_mask'] = model_out[0] ############################################## # import pdb # pdb.set_trace() # num_boxes=len(boxes) # model_out = model(objs, triples, obj_to_img, boxes_gt=scaled_boxes) # samples['gt_scaled_box_pred_mask'] = model_out[0] ############################################## model_out = model(objs, triples, obj_to_img) samples['pred_box_pred_mask'] = model_out[0] layout_preds = {} layout_preds['pred_boxes'] = model_out[5] layout_preds['pred_masks'] = model_out[6] for k, v in samples.items(): samples[k] = imagenet_deprocess_batch(v) if write_images: #3. Log ground truth and predicted images with torch.no_grad(): gt_imgs = samples['gt_img'].detach() p_gbox_pmsk_img = samples['gt_box_pred_mask'].detach() p_test_imgs = samples['pred_box_pred_mask'].detach() p_test_boxes = layout_preds['pred_boxes'] p_test_masks = layout_preds['pred_masks'] np_gt_imgs = [ gt.cpu().numpy().transpose(1, 2, 0) for gt in gt_imgs ] np_gbox_pmsk_imgs = [ pred.cpu().numpy().transpose(1, 2, 0) for pred in p_gbox_pmsk_img ] np_test_pred_imgs = [ pred.cpu().numpy().transpose(1, 2, 0) for pred in p_test_imgs ] pred_layout_boxes = p_test_boxes pred_layout_masks = p_test_masks np_all_imgs = [] # Overlay box on images pred_layout_boxes_t = pred_layout_boxes.detach() # overlaid_images = vis.overlay_boxes(np_test_pred_imgs, model.vocab, objs_vec, layout_boxes_t, obj_to_img, W=64, H=64) overlaid_images = vis.overlay_boxes(np_test_pred_imgs, model.vocab, objs_vec, pred_layout_boxes_t, obj_to_img, W=64, H=64) # # # draw the layout # layouts_gt = vis.debug_layout_mask(model.vocab, objs_vec, layout_boxes, layout_masks, obj_to_img, W=128, H=128) # layouts_pred = vis.debug_layout_mask(model.vocab, objs_vec, pred_layout_boxes, pred_layout_masks, obj_to_img, W=128, H=128) for gt_img, gtb_pm_img, pred_img, overlaid in zip( np_gt_imgs, np_gbox_pmsk_imgs, np_test_pred_imgs, overlaid_images): # for gt_img, gtb_gtm_img, gtb_pm_img, pred_img, gt_layout_img, pred_layout_img, overlaid in zip(np_gt_imgs, np_pred_imgs, np_gbox_pmsk_imgs, np_test_pred_imgs, layouts_gt, layouts_pred, overlaid_images): img_path = os.path.join(img_dir, '%06d_gt_img.png' % t) imwrite(img_path, gt_img) img_path = os.path.join(img_dir, '%06d_gtb_pm_img.png' % t) imwrite(img_path, gtb_pm_img) img_path = os.path.join(img_dir, '%06d_pred_img.png' % t) imwrite(img_path, pred_img) overlaid_path = os.path.join(img_dir, '%06d_overlaid.png' % t) imwrite(overlaid_path, overlaid) t = t + 1 total_iou += jaccard(boxes_pred, boxes) total_boxes += boxes_pred.size(0) ## Draw scene graph tot_obj = 0 for b_t in range(imgs.size(0)): sg_objs = objs[obj_to_img == b_t] sg_rels = triples[triple_to_img == b_t] sg_img = vis.draw_scene_graph_temp(sg_objs, sg_rels, tot_obj, vocab=model.vocab) sg_img_path = os.path.join(img_dir, '%06d_sg.png' % t1) imwrite(sg_img_path, sg_img) tot_obj = tot_obj + len(sg_objs) #.size(0) t1 = t1 + 1 # for gt_img, gtb_gtm_img, gtb_pm_img, pred_img in zip(np_gt_imgs, np_pred_imgs, np_gbox_pmsk_imgs, np_test_pred_imgs): # np_all_imgs.append((gt_img * 255.0).astype(np.uint8)) # np_all_imgs.append((gtb_gtm_img * 255.0).astype(np.uint8)) # np_all_imgs.append((gtb_pm_img * 255.0).astype(np.uint8)) # np_all_imgs.append((pred_img * 255.0).astype(np.uint8)) # logger.image_summary(log_tag, np_all_imgs, t) ######################################################################### # mean_losses = {k: np.mean(v) for k, v in all_losses.items()} # avg_iou = total_iou / total_boxes masks_to_store = masks if masks_to_store is not None: masks_to_store = masks_to_store.data.cpu().clone() masks_pred_to_store = masks_pred if masks_pred_to_store is not None: masks_pred_to_store = masks_pred_to_store.data.cpu().clone() batch_data = { 'objs': objs.detach().cpu().clone(), 'boxes_gt': boxes.detach().cpu().clone(), 'masks_gt': masks_to_store, 'triples': triples.detach().cpu().clone(), 'obj_to_img': obj_to_img.detach().cpu().clone(), 'triple_to_img': triple_to_img.detach().cpu().clone(), 'boxes_pred': boxes_pred.detach().cpu().clone(), 'masks_pred': masks_pred_to_store } #out = [mean_losses, samples, batch_data, avg_iou] out = [samples] #################### avg_iou = total_iou / total_boxes avg_iou # print('ravg iou:' avg_iou) ################### return tuple(out)
def check_model(args, t, loader, model, log_tag='', write_images=False): if torch.cuda.is_available(): float_dtype = torch.cuda.FloatTensor long_dtype = torch.cuda.LongTensor else: float_dtype = torch.FloatTensor long_dtype = torch.LongTensor num_samples = 0 all_losses = defaultdict(list) total_iou = 0 total_boxes = 0 ################### if not os.path.isdir(args.output_dir): os.mkdir(args.output_dir) print('Created %s' % args.output_dir) img_dir = args.output_dir + '/img_dir' if not os.path.isdir(img_dir): os.mkdir(img_dir) print('Created %s' % img_dir) ################## t = 0 # relationship (triplet) database triplet_db = dict() # iterate over all batches of images with torch.no_grad(): for batch in loader: # TODO: HERE if torch.cuda.is_available(): batch = [tensor.cuda() for tensor in batch] else: batch = [tensor for tensor in batch] masks = None if len(batch) == 6: # VG imgs, objs, boxes, triples, obj_to_img, triple_to_img = batch #elif len(batch) == 8: # COCO # imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, triplet_masks = batch #elif len(batch) == 9: # COCO # imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, triplet_masks, triplet_contours = batch elif len(batch) == 10: # COCO imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, triplet_masks, triplet_contours, obj_contours = batch #elif len(batch) == 7: # imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = batch predicates = triples[:, 1] # Run the model as it has been run during training model_masks = masks model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=model_masks) # imgs_pred, boxes_pred, masks_pred, predicate_scores = model_out imgs_pred, boxes_pred, masks_pred, objs_vec, layout, layout_boxes, layout_masks, obj_to_img, sg_context_pred, sg_context_pred_d, predicate_scores, obj_embeddings, pred_embeddings, triple_boxes_pred, triple_boxes_gt, triplet_masks_pred, triplet_contours_pred, obj_contours_pred = model_out #imgs_pred, boxes_pred, masks_pred, objs_vec, layout, layout_boxes, layout_masks, obj_to_img, sg_context_pred, sg_context_pred_d, predicate_scores, obj_embeddings, pred_embeddings, triple_boxes_pred, triple_boxes_gt, triplet_masks_pred, triplet_contours_pred = model_out #imgs_pred, boxes_pred, masks_pred, objs_vec, layout, layout_boxes, layout_masks, obj_to_img, sg_context_pred, sg_context_pred_d, predicate_scores, obj_embeddings, pred_embeddings, triple_boxes_pred, triple_boxes_gt, triplet_masks_pred = model_out # Run model without GT boxes to get predicted layout masks # use this when to get layout boxes/masks using predicted boxes #model_out = model(objs, triples, obj_to_img) #layout_boxes, layout_masks = model_out[5], model_out[6] # if obj contours are predicted, derive bounding box from these; # if GT boxes are passed in, layout_masks are GT boxes if obj_contours_pred is not None and boxes_pred is None: boxes_pred = min_max_bbox_fr_contours(obj_contours_pred) if 0: import matplotlib.pyplot as plt cc = obj_contours.clone().view(-1, 12, 2) cp = obj_contours_pred.view(-1, 12, 2).clone().detach() cp = cp.cpu().numpy() bb = boxes[0].view(2, 2) bbp = boxes_pred.clone().detach() bbp = bbp[0].view(2, 2) fig, ax = plt.subplots() #ax.imshow(masks[0]) # without mask, origin will be LLHC ax.scatter(cc[0, :, 0], cc[0, :, 1], linewidth=0.5) ax.scatter(cp[0, :, 0], cp[0, :, 1], linewidth=0.5) ax.scatter(bb[:, 0], bb[:, 1], linewidth=1.0, marker="x") ax.scatter(bbp[:, 0], bbp[:, 1], linewidth=1.0, marker="x") plt.show() # display masks masks_pred = masks_pred.detach() np_masks_pred = [mask.cpu().numpy() for mask in masks_pred] fig = plt.figure() ax1 = fig.add_subplot(1, 2, 1) ax1.imshow(masks[0]) ax2 = fig.add_subplot(1, 2, 2) ax2.imshow(np_masks_pred[0]) plt.show() pdb.set_trace() num_batch_samples = imgs.size(0) num_samples += num_batch_samples if num_samples >= args.num_val_samples: break super_boxes = [] # open file to record all triplets, per image, in a batch file_path = os.path.join(img_dir, 'all_batch_triplets.txt') f = open(file_path, 'w') ### embedding stuff below here #### for i in range(0, num_batch_samples): print('Processing image', i + 1, 'of batch size', args.batch_size) f.write('---------- image ' + str(i) + '----------\n') # from batch: objs, triples, triple_to_img, objs_to_img (need indices in that to select to tie triplets to image) # from model: obj_embed, pred_embed # find all triple indices for specific image # all triples for image i # TODO: clean up code so it is numpy() equivalent in all places tr_index = np.where(triple_to_img.cpu().numpy() == i) tr_img = triples.cpu().numpy()[tr_index, :] tr_img = np.squeeze(tr_img, axis=0) # 8 point triple boxes np_triple_boxes_gt = np.array(triple_boxes_gt).astype(float) tr_img_boxes = np_triple_boxes_gt[tr_index] assert len(tr_img) == len(tr_img_boxes) # vocab['object_idx_to_name'], vocab['pred_idx_to_name'] # s,o: indices for "objs" array (yields 'object_idx' for 'object_idx_to_name') # p: use this value as is (yields 'pred_idx' for 'pred_idx_to_name') s, p, o = np.squeeze(np.split(tr_img, 3, axis=1)) # iterate over all triplets in image to form (subject, predicat, object) tuples relationship_data = [] num_triples = len(tr_img) # need to iterate over all triples due to information that needs to be extracted per triple for n in range(0, num_triples): # tuple = (objs[obj_index], p, objs[subj_index]) subj_index = s[n] subj = np.array( model.vocab['object_idx_to_name'])[objs[subj_index]] pred = np.array(model.vocab['pred_idx_to_name'])[p[n]] obj_index = o[n] obj = np.array( model.vocab['object_idx_to_name'])[objs[obj_index]] triplet = tuple([subj, pred, obj]) relationship_data += [tuple([subj, pred, obj])] print(tuple([subj, pred, obj])) #print('--------------------') f.write( '(' + db_utils.tuple_to_string(tuple([subj, pred, obj])) + ')\n') # GT bounding boxes: (x0, y0, x1, y1) format, in a [0, 1] coordinate system # (from "boxes" (one for each object in "objs") using subj_index and obj_index) subj_bbox = tr_img_boxes[n, 0:5] obj_bbox = tr_img_boxes[n, 4:8] print(tuple([subj, pred, obj]), subj_bbox, obj_bbox) # SG GCNN embeddings to be used for search (nth triplet corresponds to nth embedding) #subj_embed = obj_embeddings[subj_index].numpy().tolist() #pred_embed = pred_embeddings[n].numpy().tolist() #obj_embed = obj_embeddings[obj_index].numpy().tolist() subj_embed = obj_embeddings[subj_index].cpu().numpy( ).tolist() pred_embed = pred_embeddings[n].cpu().numpy().tolist() obj_embed = obj_embeddings[obj_index].cpu().numpy().tolist( ) pooled_embed = subj_embed + pred_embed + obj_embed # add relationship to database relationship = dict() relationship['subject'] = subj relationship['predicate'] = pred relationship['object'] = obj relationship['subject_bbox'] = subj_bbox.tolist( ) #JSON can't serialize np.array() relationship['object_bbox'] = obj_bbox.tolist() # get super box #min_x = np.min([tr_img_boxes[n][0], tr_img_boxes[n][4]]) #min_y = np.min([tr_img_boxes[n][1], tr_img_boxes[n][5]]) #max_x = np.max([tr_img_boxes[n][2], tr_img_boxes[n][6]]) #max_y = np.max([tr_img_boxes[n][3], tr_img_boxes[n][7]]) min_x = np.min([subj_bbox[0], obj_bbox[0]]) min_y = np.min([subj_bbox[1], obj_bbox[1]]) max_x = np.max([subj_bbox[2], obj_bbox[2]]) max_y = np.max([subj_bbox[3], obj_bbox[3]]) #print([min_x, min_y, max_x, max_y]) #print([_min_x, _min_y, _max_x, _max_y]) relationship['super_bbox'] = [min_x, min_y, max_x, max_y] super_boxes += [relationship['super_bbox']] #relationship['subject_embed'] = subj_embed #relationship['predicate_embed'] = pred_embed #relationship['object_embed'] = obj_embed relationship['embed'] = pooled_embed if triplet not in triplet_db: triplet_db[db_utils.tuple_to_string(triplet)] = [ relationship ] elif triplet in triplet_db: triplet_db[db_utils.tuple_to_string(triplet)] += [ relationship ] #pprint.pprint(triplet_db) #pdb.set_trace() print('---------------------------------') #pprint.pprint(relationship_data) #pprint.pprint(triplet_db) # printed per image iteration print( '------- end of processing for image --------------------------' ) ####### process batch images by visualizing triplets on all ######### f.close() # measure IoU as a basic metric for bbox prediction total_iou += jaccard(boxes_pred, boxes) total_boxes += boxes_pred.size(0) # detach imgs = imgs.detach() triplet_masks = triplet_masks.detach() if triplet_masks_pred is not None: triplet_masks_pred = triplet_masks_pred.detach() else: triplet_masks_pred = triplet_masks boxes_pred = boxes_pred.detach() # deprocess (normalize) images samples = {} samples['gt_imgs'] = imgs for k, v in samples.items(): samples[k] = imagenet_deprocess_batch(v) # GT images np_imgs = [gt.cpu().numpy().transpose(1, 2, 0) for gt in imgs] np_triplet_masks = [mask.cpu().numpy() for mask in triplet_masks] np_triplet_masks_pred = [ mask.cpu().numpy() for mask in triplet_masks_pred ] # object masks np_masks_pred = [mask.cpu().numpy() for mask in masks_pred] # # objects np_masks = [mask.cpu().numpy() for mask in model_masks] # # objects np_layout_masks = [mask.cpu().numpy() for mask in layout_masks] # # objects # visualize predicted boxes/images # (output image is always 64x64 based upon how current model is trained) pred_overlaid_images = vis.overlay_boxes(np_imgs, model.vocab, objs_vec, boxes_pred, obj_to_img, W=256, H=256) # visualize predicted boxes/images # predicted layouts and bounding boxes (layout_boxes may be ground truth, layout_boxes = boxes_pred)) layouts = vis.debug_layout_mask(model.vocab, objs, boxes_pred, layout_masks, obj_to_img, W=256, H=256) #layouts = vis.debug_layout_mask(model.vocab, objs, layout_boxes, layout_masks, obj_to_img, W=256, H=256) # visualize GT boxes/images #overlaid_images = vis.overlay_boxes(np_imgs, model.vocab, objs_vec, boxes, obj_to_img, W=64, H=64) overlaid_images = vis.overlay_boxes(np_imgs, model.vocab, objs_vec, boxes, obj_to_img, W=256, H=256) # triples to image # visualize suberboxes with object boxes underneath ##norm_overlaid_images = [i/255.0 for i in overlaid_images] ##sb_overlaid_images = vis.overlay_boxes(norm_overlaid_images, model.vocab, objs_vec, torch.tensor(super_boxes), triple_to_img, W=256, H=256, drawText=False, drawSuperbox=True) import matplotlib.pyplot as plt print("---- saving first GT image of batch -----") img_gt = np_imgs[0] imwrite('./test_GT_img_coco.png', img_gt) #plt.imshow(img_gt) # can visualize [0-1] or [0-255] color scaling #plt.show() #print("---- saving first predicted triplet mask of batch -----") #gt_mask_np = np_triplet_masks[1] #plt.imshow(gt_mask_np) #plt.show() #pred_mask_np = np_triplet_masks_pred[1] #imwrite('./test_pred_overlay_mask_coco.png', img_np) #plt.imshow(pred_mask_np) #plt.show() print("---- saving first overlay image of batch -----") imwrite('./test_overlay_img_coco.png', overlaid_images[0]) #plt.imshow(overlaid_images[0]) #plt.show() print("---- saving first layout image of batch -----") imwrite('./test_layout_img_coco.png', layouts[0]) #plt.imshow(layouts[0]) #plt.show() # display GT / layout mask together #fig = plt.figure() #ax1 = fig.add_subplot(1,2,1) #ax1.imshow(overlaid_images[0]) #ax2 = fig.add_subplot(1,2,2) #ax2.imshow(layouts[0]) #plt.show() #print("---- saving first superbox overlay image of batch -----") #imwrite('./test_sb_overlay_img_coco.png', sb_overlaid_images[0]) #plt.imshow(sb_overlaid_images[0]) #plt.show() pdb.set_trace() # visualize predicted object contours with GT singleton mask c = 0 #for np_img in np_imgs: for o in obj_contours: fig, ax = plt.subplots() ax.imshow(np_imgs[0]) oc = obj_contours[c].view(12, 2) * 256.0 ocp = obj_contours_pred[c].view(12, 2) * 256.0 ax.scatter(oc[:, 0], oc[:, 1], linewidth=0.5) # order was switched in coco_cont.py ax.scatter(ocp[:, 0], ocp[:, 1], linewidth=0.5) # order was switched in coco_cont.py plt.show() #pdb.set_trace() c += 1 print("---- saving batch images -----") if write_images: t = 0 for gt_img, pred_overlaid_img, overlaid_img, layout_img in zip( np_imgs, pred_overlaid_images, overlaid_images, layouts): #for gt_img, pred_overlaid_img, overlaid_img, sb_overlaid_img, layout_img in zip(np_imgs, pred_overlaid_images, overlaid_images, sb_overlaid_images, layouts): img_path = os.path.join(img_dir, '%06d_gt_img.png' % t) imwrite(img_path, gt_img) img_path = os.path.join(img_dir, '%06d_pred_bbox.png' % t) imwrite(img_path, pred_overlaid_img) img_path = os.path.join(img_dir, '%06d_gt_bbox_img.png' % t) imwrite(img_path, overlaid_img) #img_path = os.path.join(img_dir, '%06d_gt_superbox_img.png' % t) #imwrite(img_path, sb_overlaid_img) img_path = os.path.join(img_dir, '%06d_layout.png' % t) imwrite(img_path, layout_img) t += 1 # write database to JSON file db_utils.write_to_JSON(triplet_db, "coco_test_db.json") masks_to_store = masks if masks_to_store is not None: masks_to_store = masks_to_store.data.cpu().clone() masks_pred_to_store = masks_pred if masks_pred_to_store is not None: masks_pred_to_store = masks_pred_to_store.data.cpu().clone() #batch_data = { # 'objs': objs.detach().cpu().clone(), # 'boxes_gt': boxes.detach().cpu().clone(), # 'masks_gt': masks_to_store, # 'triples': triples.detach().cpu().clone(), # 'obj_to_img': obj_to_img.detach().cpu().clone(), # 'triple_to_img': triple_to_img.detach().cpu().clone(), # 'boxes_pred': boxes_pred.detach().cpu().clone(), # 'masks_pred': masks_pred_to_store #} #out = [mean_losses, samples, batch_data, avg_iou] #out = [mean_losses, avg_iou] out = [] #################### avg_iou = total_iou / total_boxes print('average bbox IoU = ', avg_iou.cpu().numpy()) ################### return tuple(out)