def detect_instance(score_map, mask, class_id, max_fragment_size=0): # converting pixel-wise instance ids into detection form pred_score = [] pred_label = [] pred_mask = [] for ag_score, ag_mask, ag_class in zip(score_map, mask, class_id): if np.sum(ag_mask) < 1: continue segments = pyutils.to_one_hot( skimage.measure.label(ag_mask, connectivity=1, background=0))[1:] # connected components analysis for seg_mask in segments: if np.sum(seg_mask) < max_fragment_size: pred_score.append(0) else: pred_score.append(np.max(ag_score * seg_mask)) pred_label.append(ag_class) pred_mask.append(seg_mask) return { 'score': np.stack(pred_score, 0), 'mask': np.stack(pred_mask, 0), 'class': np.stack(pred_label, 0) }
def _work(process_id, model, dataset, args): n_gpus = torch.cuda.device_count() databin = dataset[process_id] data_loader = DataLoader(databin, shuffle=False, num_workers=args.num_workers // n_gpus, pin_memory=False) with torch.no_grad(), cuda.device(process_id): model.cuda() for iter, pack in enumerate(data_loader): img_name = pack['name'][0] if os.path.exists(os.path.join(args.ins_seg_out_dir, img_name + '.npy')): continue size = np.asarray(pack['size']) edge, dp = model(pack['img'][0].cuda(non_blocking=True)) dp = dp.cpu().numpy() cam_dict = np.load(args.cam_out_dir + '/' + img_name + '.npy', allow_pickle=True).item() cams = cam_dict['cam'].cuda() keys = cam_dict['keys'] centroids = find_centroids_with_refinement(dp) instance_map = cluster_centroids(centroids, dp) instance_cam = separte_score_by_mask(cams, instance_map) rw = indexing.propagate_to_edge(instance_cam, edge, beta=args.beta, exp_times=args.exp_times, radius=5) rw_up = F.interpolate(rw, scale_factor=4, mode='bilinear', align_corners=False)[:, 0, :size[0], :size[1]] rw_up = rw_up / torch.max(rw_up) rw_up_bg = F.pad(rw_up, (0, 0, 0, 0, 1, 0), value=args.ins_seg_bg_thres) num_classes = len(keys) num_instances = instance_map.shape[0] instance_shape = torch.argmax(rw_up_bg, 0).cpu().numpy() instance_shape = pyutils.to_one_hot(instance_shape, maximum_val=num_instances*num_classes+1)[1:] instance_class_id = np.repeat(keys, num_instances) detected = detect_instance(rw_up.cpu().numpy(), instance_shape, instance_class_id, max_fragment_size=size[0] * size[1] * 0.01) np.save(os.path.join(args.ins_seg_out_dir, img_name + '.npy'), detected) if process_id == n_gpus - 1 and iter % (len(databin) // 20) == 0: print("%d " % ((5*iter+1)//(len(databin) // 20)), end='')
def cluster_centroids(centroids, displacement, thres=2.5): # thres: threshold for grouping centroid (see supp) dp_strength = np.sqrt(displacement[1] ** 2 + displacement[0] ** 2) height, width = dp_strength.shape weak_dp_region = dp_strength < thres dp_label = skimage.measure.label(weak_dp_region, connectivity=1, background=0) dp_label_1d = dp_label.reshape(-1) centroids_1d = centroids[0]*width + centroids[1] clusters_1d = dp_label_1d[centroids_1d] cluster_map = imutils.compress_range(clusters_1d.reshape(height, width) + 1) return pyutils.to_one_hot(cluster_map)
def cluster_centroids(centroids, displacement, thres): dp_strength = np.sqrt(displacement[1]**2 + displacement[0]**2) height, width = dp_strength.shape weak_dp_region = dp_strength < thres dp_label = skimage.measure.label(weak_dp_region, neighbors=4, background=0) dp_label_1d = dp_label.reshape(-1) centroids_1d = centroids[0] * width + centroids[1] clusters_1d = dp_label_1d[centroids_1d] cluster_map = imutils.compress_range( clusters_1d.reshape(height, width) + 1) return pyutils.to_one_hot(cluster_map)
def _work(process_id, model, dataset, args): n_gpus = torch.cuda.device_count() databin = dataset[process_id] data_loader = DataLoader(databin, shuffle=False, num_workers=args.num_workers // n_gpus, pin_memory=False) with torch.no_grad(), cuda.device(process_id): model.cuda() for iter, pack in enumerate(data_loader): img_name = pack['name'][0] orig_img_size = np.asarray(pack['size']) strided_size = imutils.get_strided_size(orig_img_size, 4) out_setting = {"flip": True} img_o = pack['img'][0][0] edge, dp = model(img_o.cuda(non_blocking=True), out_setting) edge = torch.sigmoid(edge) dp = dp.cpu().numpy() cam_dict = np.load(args.cam_dir + '/' + img_name + '.npy', allow_pickle=True).item() cams = cam_dict['cam'].cuda() keys = np.pad(cam_dict['keys'] + 1, (1, 0), mode='constant') cams_edgerm = cams * (1 - edge) centroids = find_centroids_with_refinement(dp, args.u) instance_map = cluster_centroids(centroids, dp, thres=2.5) instacne_map_expanded = torch.from_numpy( np.expand_dims(instance_map, 0).astype(np.float32)) instance_cam = torch.unsqueeze(cams_edgerm, 1) * instacne_map_expanded.cuda() instance_cam = instance_cam.view( instance_cam.size(0) * instance_cam.size(1), strided_size[0], strided_size[1]) edge_padded = F.pad(edge, (5, 5, 0, 5), mode='constant', value=1.0) path_index = adv_indexing.PathIndex( radius=5, default_size=(strided_size[0] + 5, strided_size[1] + 10)) sparse_aff = adv_indexing.edge_to_affinity( torch.unsqueeze(edge_padded, 0), path_index.default_path_indices) dense_aff = affinity_sparse2dense( sparse_aff, path_index.default_src_indices, path_index.default_dst_indices, (strided_size[0] + 5) * (strided_size[1] + 10)) dense_aff = dense_aff.view(strided_size[0] + 5, strided_size[1] + 10, strided_size[0] + 5, -1)[:-5, 5:-5, :-5, 5:-5] dense_aff = dense_aff.reshape(strided_size[0] * strided_size[1], -1) trans_mat = to_transition_matrix(dense_aff, beta=args.beta, times=args.t) rw = torch.matmul( instance_cam.view(-1, strided_size[0] * strided_size[1]), trans_mat) rw = rw.view(rw.size(0), 1, strided_size[0], strided_size[1]) rw_up = F.interpolate( rw, scale_factor=4, mode='bilinear', align_corners=False)[:, 0, :orig_img_size[0], :orig_img_size[1]] rw_up_norm = rw_up / torch.max(rw_up) rw_up_norm_bg = F.pad(rw_up_norm, (0, 0, 0, 0, 1, 0), value=args.ins_seg_bg_thres) num_classes = len(cam_dict['keys']) num_instances = instance_map.shape[0] instance_shape = torch.argmax(rw_up_norm_bg, 0).cpu().numpy() instance_shape_1hot = pyutils.to_one_hot( instance_shape, maximum_val=num_instances * num_classes + 1)[1:] cam_keys_expanded = np.repeat(keys[1:], num_instances) max_fragment_size = orig_img_size[0] * orig_img_size[1] * 0.01 # divide parts - remove fragments - save each instance pred_score = [] pred_label = [] pred_mask = [] for rw_score, ins_mask, cls_lab in zip(rw_up_norm.cpu().numpy(), instance_shape_1hot, cam_keys_expanded): if np.sum(ins_mask) < 1: continue segments = pyutils.to_one_hot( skimage.measure.label(ins_mask, neighbors=4, background=0))[1:] seg_size = np.sum(segments, (1, 2)) if np.max(seg_size) < max_fragment_size: pred_score.append(np.max(rw_score * ins_mask)) pred_label.append(cls_lab) pred_mask.append(ins_mask) continue for s in segments: if np.sum(s) < max_fragment_size: continue cropped_msc = rw_score * s pred_score.append(np.max(cropped_msc)) pred_label.append(cls_lab) pred_mask.append(s) out = { 'score': np.stack(pred_score, 0), 'mask': np.stack(pred_mask, 0), 'class': np.stack(pred_label, 0) } np.save(os.path.join(args.ins_seg_out_dir, img_name + '.npy'), out) if process_id == n_gpus - 1 and iter % (len(databin) // 20) == 0: print("%d " % ((5 * iter + 1) // (len(databin) // 20)), end='')