def _work_cpu(process_id, model, dataset, args): databin = dataset[process_id] data_loader = DataLoader(databin, shuffle=False, num_workers=1, pin_memory=False) with torch.no_grad(): for iter, pack in tqdm(enumerate(data_loader), total=len(databin)): img_name = pack['name'][0] path = os.path.join(args.sem_seg_out_dir, img_name + '.png') if not os.path.exists(path): try: orig_img_size = np.asarray(pack['size']) edge, dp = model(pack['img'][0]) cam_dict = np.load(args.cam_out_dir + '/' + img_name + '.npy', allow_pickle=True).item() cams = cam_dict['cam'] keys = np.pad(cam_dict['keys'] + 1, (1, 0), mode='constant') cam_downsized_values = cams rw = indexing.propagate_to_edge(cam_downsized_values, edge, beta=args.beta, exp_times=args.exp_times, radius=5) rw_up = F.interpolate( rw, scale_factor=4, mode='bilinear', align_corners=False)[ ..., 0, :orig_img_size[0], :orig_img_size[1]] rw_up = rw_up / torch.max(rw_up) rw_up_bg = F.pad(rw_up, (0, 0, 0, 0, 1, 0), value=args.sem_seg_bg_thres) rw_pred = torch.argmax(rw_up_bg, dim=0).cpu().numpy() rw_pred = keys[rw_pred] os.makedirs(os.path.dirname(path), exist_ok=True) imageio.imsave(path, rw_pred.astype(np.uint8)) if process_id == args.num_workers - 1 and iter % ( len(databin) // 4) == 0: print("%d " % ((5 * iter + 1) // (len(databin) // 4)), end='') except Exception as e: print(e, img_name)
def _work_gpu(process_id, model, dataset, args): n_gpus = torch.cuda.device_count() databin = dataset[process_id] data_loader = DataLoader(databin, shuffle=False, num_workers=args.num_workers // n_gpus, pin_memory=False) with torch.no_grad(), cuda.device(process_id): model.cuda() for iter, pack in tqdm(enumerate(data_loader), total=len(databin)): img_name = pack['name'][0] path = os.path.join(args.ins_seg_out_dir, img_name + '.npy') if not os.path.exists(path): os.makedirs(os.path.dirname(path), exist_ok=True) size = np.asarray(pack['size']) edge, dp = model(pack['img'][0].cuda(non_blocking=True)) dp = dp.cpu().numpy() cam_dict = np.load(args.cam_out_dir + '/' + img_name + '.npy', allow_pickle=True).item() cams = cam_dict['cam'].cuda() keys = cam_dict['keys'] centroids = find_centroids_with_refinement(dp) instance_map = cluster_centroids(centroids, dp) instance_cam = separte_score_by_mask(cams, instance_map) rw = indexing.propagate_to_edge(instance_cam, edge, beta=args.beta, exp_times=args.exp_times, radius=5) rw_up = F.interpolate( rw, scale_factor=4, mode='bilinear', align_corners=False)[:, 0, :size[0], :size[1]] rw_up = rw_up / torch.max(rw_up) rw_up_bg = F.pad(rw_up, (0, 0, 0, 0, 1, 0), value=args.ins_seg_bg_thres) num_classes = len(keys) num_instances = instance_map.shape[0] instance_shape = torch.argmax(rw_up_bg, 0).cpu().numpy() instance_shape = pyutils.to_one_hot( instance_shape, maximum_val=num_instances * num_classes + 1)[1:] instance_class_id = np.repeat(keys, num_instances) detected = detect_instance(rw_up.cpu().numpy(), instance_shape, instance_class_id, max_fragment_size=size[0] * size[1] * 0.01) np.save(path, detected) if process_id == n_gpus - 1 and iter % (len(databin) // 4) == 0: print("%d " % ((5 * iter + 1) // (len(databin) // 4)), end='')