def get_all_anchors_FPN(stride, sizes): # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors # are centered on stride / 2, have (approximate) sqrt areas of the specified # sizes, and aspect ratios as given. cell_anchors = generate_anchors( stride, scales=np.array([sizes], dtype=np.float) / stride, ratios=np.array(config.ANCHOR_RATIOS, dtype=np.float)) # anchors are intbox here. # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride) fpn_max_size = 32 * np.ceil( config.MAX_SIZE / 32 ) field_size = int(np.ceil(fpn_max_size / float(stride))) # field_size = config.MAX_SIZE // stride shifts = np.arange(0, field_size) * stride shift_x, shift_y = np.meshgrid(shifts, shifts) shift_x = shift_x.flatten() shift_y = shift_y.flatten() shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() # Kx4, K = field_size * field_size K = shifts.shape[0] A = cell_anchors.shape[0] field_of_anchors = ( cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4)) # FSxFSxAx4 assert np.all(field_of_anchors == field_of_anchors.astype('int32')) field_of_anchors = field_of_anchors.astype('float32') field_of_anchors[:, :, :, [2, 3]] += 1 return field_of_anchors
def __init__(self, db, video_names, data_dir, z_transforms, x_transforms, training=True): self.video_names = video_names self.data_dir = data_dir self.z_transforms = z_transforms self.x_transforms = x_transforms meta_data_path = os.path.join(data_dir, 'meta_data.pkl') self.meta_data = pickle.load(open(meta_data_path, 'rb')) self.meta_data = {x[0]: x[1] for x in self.meta_data} # filter traj len less than 2 for key in self.meta_data.keys(): trajs = self.meta_data[key] for trkid in list(trajs.keys()): if len(trajs[trkid]) < 2: del trajs[trkid] self.txn = db.begin(write=False) self.num = len(self.video_names) if config.pairs_per_video_per_epoch is None or not training \ else config.pairs_per_video_per_epoch * len(self.video_names) # data augmentation self.max_stretch = config.scale_resize self.max_translate = config.max_translate self.random_crop_size = config.instance_size self.center_crop_size = config.exemplar_size self.training = training #valid_scope = 2 * config.valid_scope + 1 self.anchors = generate_anchors(config.total_stride, config.anchor_base_size, config.anchor_scales, config.anchor_ratios, config.response_map_size) '''
def __init__(self, cf): super(ProposalLayer, self).__init__() self.cf = cf self._feat_stride = self.cf.feat_stride self._anchors = torch.from_numpy( generate_anchors(scales=np.array(self.cf.anchor_scales), ratios=np.array(self.cf.anchor_ratios))).float() self._num_anchors = self._anchors.size(0)
def __init__(self, cf): super(AnchorTargetLayer, self).__init__() self.cf = cf self._feat_stride = self.cf.feat_stride self._scales = self.cf.anchor_scales self._anchors = torch.from_numpy( generate_anchors(scales=np.array(self.cf.anchor_scales), ratios=np.array(self.cf.anchor_ratios))).float() self._num_anchors = self._anchors.size(0) # allow boxes to sit over the edge by a small amount self._allowed_border = 0 # default is 0
def get_all_anchors( stride=config.ANCHOR_STRIDE, sizes=config.ANCHOR_SIZES): """ Get all anchors in the largest possible image, shifted, floatbox Args: stride (int): the stride of anchors. sizes (tuple[int]): the sizes (sqrt area) of anchors Returns: anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE. """ # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors # are centered on stride / 2, have (approximate) sqrt areas of the specified # sizes, and aspect ratios as given. cell_anchors = generate_anchors( stride, scales=np.array(sizes, dtype=np.float) / stride, ratios=np.array(config.ANCHOR_RATIOS, dtype=np.float)) # anchors are intbox here. # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride) max_size = config.MAX_SIZE if config.MODE_FPN: # TODO setting this in config is perhaps better size_mult = config.FPN_RESOLUTION_REQUIREMENT * 1. max_size = np.ceil(max_size / size_mult) * size_mult field_size = int(np.ceil(max_size / stride)) shifts = np.arange(0, field_size) * stride shift_x, shift_y = np.meshgrid(shifts, shifts) shift_x = shift_x.flatten() shift_y = shift_y.flatten() shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() # Kx4, K = field_size * field_size K = shifts.shape[0] A = cell_anchors.shape[0] field_of_anchors = ( cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4)) # FSxFSxAx4 assert np.all(field_of_anchors == field_of_anchors.astype('int32')) field_of_anchors = field_of_anchors.astype('float32') field_of_anchors[:, :, :, [2, 3]] += 1 return field_of_anchors
def get_sniper_all_anchors(stride=None, sizes=None): """ Get all anchors in the largest possible image, shifted, floatbox Args: stride (int): the stride of anchors. sizes (tuple[int]): the sizes (sqrt area) of anchors Returns: anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE. """ if stride is None: stride = cfg.RPN.ANCHOR_STRIDE if sizes is None: sizes = cfg.RPN.ANCHOR_SIZES # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors # are centered on stride / 2, have (approximate) sqrt areas of the specified # sizes, and aspect ratios as given. cell_anchors = generate_anchors(stride, scales=np.array(sizes, dtype=np.float) / stride, ratios=np.array(cfg.RPN.ANCHOR_RATIOS, dtype=np.float)) # anchors are intbox here. # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride) # max_size = 0 max_size = cfg.SNIPER.CHIP_SIZE field_size = int(np.ceil(max_size / stride)) shifts = np.arange(0, field_size) * stride shift_x, shift_y = np.meshgrid(shifts, shifts) shift_x = shift_x.flatten() shift_y = shift_y.flatten() shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() # Kx4, K = field_size * field_size K = shifts.shape[0] A = cell_anchors.shape[0] field_of_anchors = (cell_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4)) # FSxFSxAx4 # Many rounding happens inside the anchor code anyway # assert np.all(field_of_anchors == field_of_anchors.astype('int32')) field_of_anchors = field_of_anchors.astype('float32') field_of_anchors[:, :, :, [2, 3]] += 1 return field_of_anchors
def get_all_anchors(stride=None, sizes=None): """ Get all anchors in the largest possible image, shifted, floatbox Args: stride (int): the stride of anchors. sizes (tuple[int]): the sizes (sqrt area) of anchors Returns: anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE. """ if stride is None: stride = cfg.RPN.ANCHOR_STRIDE if sizes is None: sizes = cfg.RPN.ANCHOR_SIZES # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors # are centered on stride / 2, have (approximate) sqrt areas of the specified # sizes, and aspect ratios as given. cell_anchors = generate_anchors( stride, scales=np.array(sizes, dtype=np.float) / stride, ratios=np.array(cfg.RPN.ANCHOR_RATIOS, dtype=np.float)) # anchors are intbox here. # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride) max_size = cfg.PREPROC.MAX_SIZE field_size = int(np.ceil(max_size / stride)) shifts = np.arange(0, field_size) * stride shift_x, shift_y = np.meshgrid(shifts, shifts) shift_x = shift_x.flatten() shift_y = shift_y.flatten() shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() # Kx4, K = field_size * field_size K = shifts.shape[0] A = cell_anchors.shape[0] field_of_anchors = ( cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4)) # FSxFSxAx4 # Many rounding happens inside the anchor code anyway # assert np.all(field_of_anchors == field_of_anchors.astype('int32')) field_of_anchors = field_of_anchors.astype('float32') field_of_anchors[:, :, :, [2, 3]] += 1 return field_of_anchors
def get_all_anchors(stride=config.ANCHOR_STRIDE, sizes=config.ANCHOR_SIZES): """ Get all anchors in the largest possible image, shifted, floatbox Args: stride (int): the stride of anchors. sizes (tuple[int]): the sizes (sqrt area) of anchors Returns: anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE. """ # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors # are centered on stride / 2, have (approximate) sqrt areas of the specified # sizes, and aspect ratios as given. cell_anchors = generate_anchors(stride, scales=np.array(sizes, dtype=np.float) / stride, ratios=np.array(config.ANCHOR_RATIOS, dtype=np.float)) # anchors are intbox here. # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride) max_size = config.MAX_SIZE if config.MODE_FPN: # TODO setting this in config is perhaps better size_mult = config.FPN_RESOLUTION_REQUIREMENT * 1. max_size = np.ceil(max_size / size_mult) * size_mult field_size = int(np.ceil(max_size / stride)) shifts = np.arange(0, field_size) * stride shift_x, shift_y = np.meshgrid(shifts, shifts) shift_x = shift_x.flatten() shift_y = shift_y.flatten() shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() # Kx4, K = field_size * field_size K = shifts.shape[0] A = cell_anchors.shape[0] field_of_anchors = (cell_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4)) # FSxFSxAx4 assert np.all(field_of_anchors == field_of_anchors.astype('int32')) field_of_anchors = field_of_anchors.astype('float32') field_of_anchors[:, :, :, [2, 3]] += 1 return field_of_anchors
def anchor_compose(self, height, width): anchors = generate_anchors(ratios=np.array(self.anchor_ratios), scales=np.array(self.anchor_scales)) num_anchor = anchors.shape[0] shift_x = np.arange(0, width) * self.feat_stride shift_y = np.arange(0, height) * self.feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() k = shifts.shape[0] # width changes faster, so here it is H, W, C anchors = anchors.reshape((1, num_anchor, 4)) + shifts.reshape( (1, k, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((k * num_anchor, 4)).astype(np.float32, copy=False) return torch.from_numpy(anchors).cuda()
def __init__(self, spatial_scale=0.0625, train=False, rpn_pre_nms_top_n = None, rpn_post_nms_top_n = None, rpn_nms_thresh = None, rpn_min_size = 0, anchor_sizes=(32, 64, 128, 256, 512), anchor_aspect_ratios=(0.5, 1, 2)): super(GenerateProposals, self).__init__() self._anchors = generate_anchors(sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios,stride=1. / spatial_scale) self._num_anchors = self._anchors.shape[0] self._spatial_scale = spatial_scale self._train = train self.rpn_pre_nms_top_n = rpn_pre_nms_top_n if rpn_pre_nms_top_n is not None else (12000 if train else 6000) self.rpn_post_nms_top_n = rpn_post_nms_top_n if rpn_post_nms_top_n is not None else (2000 if train else 1000) self.rpn_nms_thresh = rpn_nms_thresh if rpn_nms_thresh is not None else 0.7 self.rpn_min_size = rpn_min_size if rpn_min_size is not None else 0
def get_all_anchors( stride=config.ANCHOR_STRIDE, sizes=config.ANCHOR_SIZES, ratios=config.ANCHOR_RATIOS): """ Get all anchors in the largest possible image, shifted, floatbox Returns: anchors: SxSxNUM_ANCHORx4, where S == MAX_SIZE//STRIDE, floatbox The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SCALE. """ # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors # are centered on stride / 2, have (approximate) sqrt areas of the specified # sizes, and aspect ratios as given. cell_anchors = generate_anchors( stride, scales=np.array(sizes, dtype=np.float) / stride, ratios=np.array(ratios, dtype=np.float)) # anchors are intbox here. # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride) field_size = config.MAX_SIZE // stride shifts = np.arange(0, field_size) * stride shift_x, shift_y = np.meshgrid(shifts, shifts) shift_x = shift_x.flatten() shift_y = shift_y.flatten() shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() # Kx4, K = field_size * field_size K = shifts.shape[0] A = cell_anchors.shape[0] field_of_anchors = ( cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4)) # FSxFSxAx4 #assert np.all(field_of_anchors == field_of_anchors.astype('int32')) field_of_anchors = field_of_anchors.astype('float32') field_of_anchors[:, :, :, [2, 3]] += 1 return field_of_anchors
def get_all_anchors( stride=config.ANCHOR_STRIDE, sizes=config.ANCHOR_SIZES): """ Get all anchors in the largest possible image, shifted, floatbox Returns: anchors: SxSxNUM_ANCHORx4, where S == MAX_SIZE//STRIDE, floatbox The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SCALE. """ # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors # are centered on stride / 2, have (approximate) sqrt areas of the specified # sizes, and aspect ratios as given. cell_anchors = generate_anchors( stride, scales=np.array(sizes, dtype=np.float) / stride, ratios=np.array(config.ANCHOR_RATIOS, dtype=np.float)) # anchors are intbox here. # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride) field_size = config.MAX_SIZE // stride shifts = np.arange(0, field_size) * stride shift_x, shift_y = np.meshgrid(shifts, shifts) shift_x = shift_x.flatten() shift_y = shift_y.flatten() shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() # Kx4, K = field_size * field_size K = shifts.shape[0] A = cell_anchors.shape[0] field_of_anchors = ( cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4)) # FSxFSxAx4 assert np.all(field_of_anchors == field_of_anchors.astype('int32')) field_of_anchors = field_of_anchors.astype('float32') field_of_anchors[:, :, :, [2, 3]] += 1 return field_of_anchors
def __init__(self, spatial_scale=0.0625, train=False, rpn_pre_nms_top_n=None, rpn_post_nms_top_n=None, rpn_nms_thresh=None, rpn_min_size=0, anchor_sizes=(32, 64, 128, 256, 512), anchor_aspect_ratios=(0.5, 1, 2)): super(GenerateProposals, self).__init__() self._anchors = generate_anchors(sizes=anchor_sizes, aspect_ratios=anchor_aspect_ratios, stride=1. / spatial_scale) self._num_anchors = self._anchors.shape[0] self._spatial_scale = spatial_scale self._train = train self.rpn_pre_nms_top_n = rpn_pre_nms_top_n if rpn_pre_nms_top_n is not None else ( 12000 if train else 6000) self.rpn_post_nms_top_n = rpn_post_nms_top_n if rpn_post_nms_top_n is not None else ( 2000 if train else 1000) self.rpn_nms_thresh = rpn_nms_thresh if rpn_nms_thresh is not None else 0.7 self.rpn_min_size = rpn_min_size if rpn_min_size is not None else 0
def forward(self, cls_scores, bbox_deltas, gt_boxes, device): """ process proposals from the RPN :param bbox_deltas: [N x 4K x H x W ] :param cls_scores: [N x 2K x H x W ] of scores not probabilities :param gt_boxes: [M x 4] [x1, y1, x2, y2] :return: """ """ Algorithm 1) get all center points 2) make all anchors using center points 3) apply bbox_deltas 4) calculate IoUs 5) find positive labels 6) find negative labels 7) sample down the negative labels 8) calculate losses """ # ensure center and original anchors have been precomputed if self.feat_stride is None: self.feat_stride = round(self.image_size / float(cls_scores.size(3))) if self.anchors is None: self.anchors = generate_anchors(self.feat_stride, cls_scores.size(3), self.ratios, self.scales).to(device) N, _, H, W = cls_scores.shape cls_scores = cls_scores.permute(0, 2, 3, 1) # apply bbox deltas but first reshape to (batch,H,W,4K) bbox_deltas = bbox_deltas.permute(0, 2, 3, 1) # reshape again to match anchors (N,H,W,Kx4) bbox_deltas = bbox_deltas.reshape(N, H, W, -1, 4) _anchors = self.anchors.float() regions = _anchors + bbox_deltas # now we clip the boxes to the image regions = regions.view(N, -1, 4, H, W).permute(0, 3, 4, 1, 2) # reshape to [batch x L x 4] regions = regions.reshape(N, -1, 4) # filter the cross boundary images in training for i in range(N): regions[i, :, :] = cross_boundary(regions[i, :, :], self.image_size, device, remove=False) # we need anchors to be [L x 4] _anchors = _anchors.reshape(-1, 4) #get matches/ losses per batch cls_scores = cls_scores.reshape(N, -1, 1) tot_cls_loss = 0.0 tot_bbox_loss = 0.0 tot_fg = 0.0 tot_bg = 0.0 for i in range(N): matches = match(regions[i, :, :], gt_boxes[i][:, :4].squeeze(0), self.upper, self.lower, device) # filter out neither targets pos_mask = matches >= 0 pos_inds = pos_mask.nonzero() neg_mask = matches == NEGATIVE neg_inds = neg_mask.nonzero() # sample 256 anchors pos_inds = pos_inds.reshape(-1) npos = min(pos_inds.size(0), 128) pos_inds_perm = torch.randperm(pos_inds.size(0))[:npos] pos_inds = pos_inds[pos_inds_perm] bg_num = self.sample_num - npos bg_num = min(60, bg_num) perm = torch.randperm(neg_inds.size(0)) sample_neg_inds = perm[:bg_num] sample_ned_inds = neg_inds[sample_neg_inds] gt_cls = torch.cat( (torch.ones(pos_inds.size(0)), torch.zeros(sample_neg_inds.size(0)))).to(device) # grab cls_scores from each point pred_cls = torch.cat( (cls_scores[i, pos_inds], cls_scores[i, sample_neg_inds])).to(device).squeeze() # TODO avoid this reshape edge case gt_cls = gt_cls.reshape(-1) pred_cls = pred_cls.reshape(-1) cls_loss = self.cls_loss(pred_cls, gt_cls.reshape(-1)) if cls_loss != cls_loss: print(f"pred_cls: {pred_cls}") print(f"gt_cls: {gt_cls}") # we only do bbox regression on positive targets # get and reshape matches gt_indxs = matches[pos_inds].long() sample_gt_bbox = gt_boxes[i][:, gt_indxs, :] sample_gt_bbox = sample_gt_bbox.reshape(-1, 4) sample_pred_bbox = regions[i, pos_inds, :] sample_roi_bbox = _anchors[pos_inds, :] norm = torch.tensor(N).float() bbox_loss = self.bbox_loss(sample_pred_bbox, sample_gt_bbox, sample_roi_bbox, norm) tot_cls_loss = tot_cls_loss + cls_loss tot_bbox_loss = tot_bbox_loss + bbox_loss tot_fg += npos tot_bg += bg_num return tot_cls_loss, tot_bbox_loss, tot_bg, tot_fg, pred_cls.mean()
import sys from utils.generate_anchors import generate_anchors from PIL import Image from PIL import ImageDraw r = 1 image_size = 500 map_size = 16 feat_stride = float(image_size) / map_size ratios = [1.0, 2.0, 0.5] scales = [32, 64, 128] base_image = Image.new("RGB", (image_size, image_size), color="#FFF") centers, anchors = generate_anchors(feat_stride, map_size, ratios, scales, output_centers=True) draw = ImageDraw.Draw(base_image) centers = centers.reshape(-1, 2) print(anchors) for x, y in centers.astype(int): draw.ellipse((x - r, y - r, x + r, y + r), fill="#f00") i, j = 6, 6 for idx in range(9): x, y, h, w = anchors[i, j, idx] x1 = x - w / 2 y1 = y - h / 2 x2 = x + w / 2 y2 = y + h / 2
def get_all_anchors(stride=None, sizes=None, tile=True): """ Get all anchors in the largest possible image, shifted, floatbox Args: stride (int): the stride of anchors. sizes (tuple[int]): the sizes (sqrt area) of anchors Returns: anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE. """ if stride is None: stride = cfg.RPN.ANCHOR_STRIDE if sizes is None: sizes = cfg.RPN.ANCHOR_SIZES # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors # are centered on stride / 2, have (approximate) sqrt areas of the specified # sizes, and aspect ratios as given. if not cfg.RPN.UNQUANTIZED_ANCHOR: cell_anchors = generate_anchors( stride, scales=np.array(sizes, dtype=np.float) / stride, ratios=np.array(cfg.RPN.ANCHOR_RATIOS, dtype=np.float)) else: anchors = [] ratios=np.array(cfg.RPN.ANCHOR_RATIOS, dtype=np.float) for sz in sizes: for ratio in ratios: w = np.sqrt(sz * sz / ratio) h = ratio * w anchors.append([-w, -h, w, h]) cell_anchors = np.asarray(anchors) * 0.5 # anchors are intbox here. # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride) if tile: max_size = cfg.PREPROC.MAX_SIZE field_size = int(np.ceil(max_size / stride)) if not cfg.RPN.UNQUANTIZED_ANCHOR: shifts = np.arange(0, field_size) * stride else: shifts = (np.arange(0, field_size) * stride).astype("float32") shift_x, shift_y = np.meshgrid(shifts, shifts) shift_x = shift_x.flatten() shift_y = shift_y.flatten() shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose() # Kx4, K = field_size * field_size K = shifts.shape[0] A = cell_anchors.shape[0] if not cfg.RPN.UNQUANTIZED_ANCHOR: field_of_anchors = ( cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) else: field_of_anchors = cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4)) # FSxFSxAx4 # Many rounding happens inside the anchor code anyway # assert np.all(field_of_anchors == field_of_anchors.astype('int32')) field_of_anchors = field_of_anchors.astype('float32') if not cfg.RPN.UNQUANTIZED_ANCHOR: field_of_anchors[:, :, :, [2, 3]] += 1 return field_of_anchors else: cell_anchors = cell_anchors.astype('float32') cell_anchors[:, [2, 3]] += 1 return cell_anchors
def forward(self, cls_scores, bbox_deltas, device): """ process proposals from the RPN :param bbox_deltas: [N x 4K x H x W ] :param cls_scores: [N x 2K x H x W ] of scores not probabilities :return: """ """ Algorithm 1) get all center points 2) make all anchors using center points 3) apply bbox_deltas 4) clip boxes to image 5) filter small boxes 6) pre NMS fitering by score 7) NMS filtering 8) post NMS filtering by score """ # ensure center and original anchors have been precomputeds if self.feat_stride is None: self.feat_stride = round(self.image_size / float(cls_scores.size(3))) if self.anchors is None: self.anchors = generate_anchors(self.feat_stride, cls_scores.size(3), self.ratios, self.scales).to(device) N, _, H, W = cls_scores.shape cls_scores = cls_scores.permute(0, 2, 3, 1) # apply bbox deltas but first reshape to (0,2,3,1) = (12)(23) bbox_deltas = bbox_deltas.permute(0, 2, 3, 1) # reshape again to match anchors bbox_deltas = bbox_deltas.reshape(N, H, W, -1, 4) _anchors = self.anchors.float() regions = _anchors + bbox_deltas # now we clip the boxes to the image # now we can grab the pre NMS regions # first we reshape the tensors to be N x K, N x K x 4 cls_scores = cls_scores.permute(0, 3, 1, 2).reshape(N, -1) regions = regions.view(N, -1, 4, H, W).permute(0, 3, 4, 1, 2) regions = regions.reshape(N, -1, 4) for i in range(N): regions[i, :, :] = cross_boundary(regions[i, :, :], self.image_size, device, remove=False) pre_nms = min(self.NMS_PRE, cls_scores.size(1)) _, sort_order = cls_scores.topk(pre_nms, dim=1) slices_scores = [] slices_regions = [] for i in range(N): slice_idxs = sort_order[i, :] slice_score = cls_scores[i, slice_idxs] slice_region = regions[i, slice_idxs, :] slices_regions.append(slice_region) slices_scores.append(slice_score) cls_scores = torch.stack(slices_scores, dim=0) regions = torch.stack(slices_regions, dim=0) output = cls_scores.new(N, self.NMS_POST, 5) # TODO implement padding here for i in range(N): keep_idx = nms(regions[i, :, :], cls_scores[i, :], self.threshold) keep_idx = keep_idx[:self.NMS_POST] output[i, :, 1:] = pad_tensor(regions[i, keep_idx, :], (self.NMS_POST, 4)) output[:, :, 0] = pad_tensor(cls_scores[i, keep_idx].unsqueeze(1), (self.NMS_POST, 1)).squeeze() return output
def train_NuSeT(self): """Train the model, return test loss. Args: network (dict): the parameters of the network return: accuracy (float) """ # Get the training parameters learning_rate = self.params['lr'] optimizer = self.params['optimizer'] num_epoch = self.params['epochs'] bbox_min_score = self.params['min_score'] nms_thresh = self.params['nms_threshold'] normalization_method = self.params['normalization_method'] # Load the data # x_train, y_train: training images and corresponding labels # x_val, y_val: validation images and corresponding labels # w_train, w_val: training and validation weight matrices for U-Net # bbox_train, bbox_val: bounding box coordinates for train and validation dataset x_train, x_val, y_train, y_val, w_train, w_val, bbox_train, bbox_val = load_data_train( self, normalization_method) # pred_dict and pred_dict_final save all the temp variables pred_dict_final = {} # tensor placeholder for training images with labels train_initial = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 1]) labels = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 1]) # tensor placeholder for weigth matrices and ground truth bounding boxes edge_weights = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 1]) gt_boxes = tf.placeholder(dtype=tf.float32, shape=[None, 5]) input_shape = tf.shape(train_initial) input_height = input_shape[1] input_width = input_shape[2] im_shape = tf.cast([input_height, input_width], tf.float32) # number of classes needed to be classified, for our case this equals to 2 # (foreground and background) nb_classes = 2 # feed the initial image to U-Net, we expect 2 outputs: # 1. feat_map of shape (1,input_height/16,input_width/16,1024), which will be passed to the # region proposal network # 2. final_logits of shape(1,input_height,input_width,2), which is the prediction from U-net with tf.variable_scope('model_U-Net') as scope: final_logits, feat_map = UNET(nb_classes, train_initial) # The final_logits has 2 channels for foreground/background softmax scores, # then we get prediction with larger score for each pixel pred_masks = tf.argmax(final_logits, axis=3) pred_masks = tf.reshape(pred_masks, [input_height, input_width]) pred_masks = tf.to_float(pred_masks) # Dynamic anchor base size calculated from median cell lengths base_size = anchor_size(tf.reshape(labels, [input_height, input_width])) # scales and ratios are used to generate different anchors scales = np.array([0.5, 1, 2]) ratios = np.array([0.125, 0.25, 0.5, 1, 2, 4, 8]) # stride is to control how sparse we want to place anchors across the image # stride = 16 means to place an anchor every 16 pixels on the original image stride = 16 # Generate the anchor reference with respect to the original image ref_anchors = generate_anchors_reference(base_size, ratios, scales) num_ref_anchors = scales.shape[0] * ratios.shape[0] feat_height = input_height / stride feat_width = input_width / stride # Generate all the anchors based on ref_anchors all_anchors = generate_anchors(ref_anchors, stride, [feat_height, feat_width]) num_anchors = all_anchors.shape[0] with tf.variable_scope('model_RPN') as scope: prediction_dict = RPN(feat_map, num_ref_anchors) # Get the tensors from the dict rpn_cls_prob = prediction_dict['rpn_cls_prob'] rpn_bbox_pred = prediction_dict['rpn_bbox_pred'] proposal_prediction = RPNProposal(rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape, nms_thresh) pred_dict_final['all_anchors'] = tf.cast(all_anchors, tf.float32) pred_dict_final['gt_bboxes'] = gt_boxes prediction_dict['proposals'] = proposal_prediction['proposals'] prediction_dict['scores'] = proposal_prediction['scores'] # When training we use a separate module to calculate the target # values we want to output. (rpn_cls_target, rpn_bbox_target, rpn_max_overlap) = RPNTarget(all_anchors, num_anchors, gt_boxes, im_shape) prediction_dict['rpn_cls_target'] = rpn_cls_target prediction_dict['rpn_bbox_target'] = rpn_bbox_target pred_dict_final['rpn_prediction'] = prediction_dict scores = pred_dict_final['rpn_prediction']['scores'] proposals = pred_dict_final['rpn_prediction']['proposals'] pred_masks_watershed = tf.to_float( marker_watershed(scores, proposals, pred_masks, min_score=bbox_min_score)) # Loss is defined as rpn loss(class loss + bounding box loss) + # segmentation loss(default is the sum of soft dice and cross-entropy) rpn_loss = RPNLoss(prediction_dict) RPN_loss = rpn_loss['rpn_cls_loss'] + rpn_loss['rpn_reg_loss'] SEG_loss = segmentation_loss(final_logits, pred_masks_watershed, labels, edge_weights, mode='COMBO') final_loss = RPN_loss + SEG_loss # If training with just U-Net, then only include segmentation loss #final_loss = SEG_loss # Metrics are pixel accuracy, mean IU, mean accuracy, root mean squared error metrics, metrics_op = compute_metrics(pred_masks, labels) pred_dict_final['unet_mask'] = pred_masks # get the optimizer gen_train_op = optimizer_fun(optimizer, final_loss, learning_rate=learning_rate) # start point for training, and end point for graph sess = tf.Session() sess.run(tf.global_variables_initializer()) num_batches = len(x_train) num_batches_val = len(x_val) saver = tf.train.Saver() if normalization_method == 'wn': self.training_results.set('Start whole image Norm. training ...') self.window.update() if normalization_method == 'fg': self.training_results.set('Start Foreground Norm. training ...') self.window.update() # training images indexes will be shuffled at every epoch during training idx = np.arange(num_batches) best_IU = 0 for iteration in range(0, num_epoch): # The batch pointer to validation data j = 0 sess.run(tf.local_variables_initializer()) if iteration == num_epoch - 1 and normalization_method == 'wn': self.whole_norm_y_pred = [] # shuffle the sequence of the training data for the current epoch np.random.shuffle(idx) for i in tqdm(range(0, num_batches)): self.train_progress_var.set(i / num_batches * 100) self.window.update() # Generate the batch data from training data and training label batch_data = x_train[idx[i]] batch_data_shape = batch_data.shape batch_data = np.reshape( batch_data, [1, batch_data_shape[0], batch_data_shape[1], 1]) batch_label = np.reshape( y_train[idx[i]], [1, batch_data_shape[0], batch_data_shape[1], 1]) batch_edge = np.reshape( w_train[idx[i]], [1, batch_data_shape[0], batch_data_shape[1], 1]) batch_bbox = bbox_train[idx[i]] # Skip if this batch does not contain any object (bounding box is null) if batch_bbox.size > 0: # Here include the optimizer to actually perform learning sess.run( [gen_train_op], feed_dict={ train_initial: batch_data, gt_boxes: batch_bbox, labels: batch_label, edge_weights: batch_edge }) # Only calculate the accuracy and loss after the training epoch if i == num_batches - 1: while j < num_batches_val: # Generate the batch data from val data and val label batch_data = x_val[j] batch_data_shape = batch_data.shape batch_data = np.reshape( batch_data, [1, batch_data_shape[0], batch_data_shape[1], 1]) batch_label = np.reshape( y_val[j], [1, batch_data_shape[0], batch_data_shape[1], 1]) batch_edge = np.reshape( w_val[j], [1, batch_data_shape[0], batch_data_shape[1], 1]) batch_bbox = bbox_val[j] # At the end of whole image normalization training, # cache the predictions if iteration == num_epoch - 1 and normalization_method == 'wn': self.whole_norm_y_pred.append( sess.run(pred_masks, feed_dict={ train_initial: batch_data, gt_boxes: batch_bbox, labels: batch_label, edge_weights: batch_edge })) if batch_bbox.size > 0: # Here get the accuracy and loss for each batch in validation cycle loss_temp, rpnloss_temp, segloss_temp = sess.run( [final_loss, rpn_loss, SEG_loss], feed_dict={ train_initial: batch_data, gt_boxes: batch_bbox, labels: batch_label, edge_weights: batch_edge }) sess.run( [metrics_op], feed_dict={ train_initial: batch_data, gt_boxes: batch_bbox, labels: batch_label, edge_weights: batch_edge }) if j == num_batches_val - 1: metrics_all = sess.run(metrics, feed_dict={ train_initial: batch_data, gt_boxes: batch_bbox, labels: batch_label, edge_weights: batch_edge }) _mean_IU = metrics_all['global']['mean_IU'] _pixel_accuracy = metrics_all['global'][ 'pixel_accuracy'] _f1 = 2 * _mean_IU / (1 + _mean_IU) _rmse = metrics_all['global']['rmse'] # Get moving average of metrics and losses if j == 0: loss_total = loss_temp cls_loss = rpnloss_temp['rpn_cls_loss'] reg_loss = rpnloss_temp['rpn_reg_loss'] seg_loss = segloss_temp else: loss_total = (1 - 1 / (j + 1)) * loss_total + 1 / ( j + 1) * loss_temp cls_loss = (1 - 1 / (j + 1)) * cls_loss + 1 / ( j + 1) * rpnloss_temp['rpn_cls_loss'] reg_loss = (1 - 1 / (j + 1)) * reg_loss + 1 / ( j + 1) * rpnloss_temp['rpn_reg_loss'] seg_loss = (1 - 1 / (j + 1)) * seg_loss + 1 / ( j + 1) * segloss_temp j = j + 1 print( 'Epoch: %d - loss: %.2f - cls_loss: %.2f - reg_loss: %.2f - seg_loss: %.2f - mean_IU: %.4f - f1: %.4f - pixel_accuracy: %.4f' % (iteration, loss_total, cls_loss, reg_loss, seg_loss, _mean_IU, _f1, _pixel_accuracy)) self.training_results.set('Epoch ' + str(iteration) + ', loss ' + '{0:.2f}'.format(loss_total) + ', mean IU ' + '{0:.2f}'.format(_mean_IU)) self.window.update() # Keep track of the best model in the last 10 epoches and use that as the best model if iteration >= num_epoch - 10 and normalization_method == 'wn' and _mean_IU > best_IU: best_IU = _mean_IU saver.save(sess, './Network/whole_norm.ckpt') if iteration >= num_epoch - 10 and normalization_method == 'fg' and _mean_IU > best_IU: best_IU = _mean_IU saver.save(sess, './Network/foreground.ckpt') sess.close()
def test(params, self): """Predict masks for all images in a given directory, and save them Args: params (dict): the parameters of the network """ # Get the testing parameters perform_watershed = params['watershed'] bbox_min_score = params['min_score'] nms_thresh = params['nms_threshold'] postProcess = params['postProcess'] resize_scale = params['scale_ratio'] # Load the data # x_test, y_test: test images and corresponding labels x_id, x_test = load_data_test(self.batch_seg_path) # pred_dict and pred_dict_final save all the temp variables pred_dict_final = {} train_initial = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 1]) input_shape = tf.shape(train_initial) input_height = input_shape[1] input_width = input_shape[2] im_shape = tf.cast([input_height, input_width], tf.float32) # number of classes needed to be classified, for our case this equals to 2 # (foreground and background) nb_classes = 2 # feed the initial image to U-Net, we expect 2 outputs: # 1. feat_map of shape (?,hf,wf,1024), which will be passed to the # region proposal network # 2. final_logits of shape(?,h,w,2), which is the prediction from U-net with tf.variable_scope('model_U-Net') as scope: final_logits, feat_map = UNET(nb_classes, train_initial) # The final_logits has 2 channels for foreground/background softmax scores, # then we get prediction with larger score for each pixel pred_masks = tf.argmax(final_logits, axis=3) pred_masks = tf.reshape(pred_masks, [input_height, input_width]) pred_masks = tf.to_float(pred_masks) # Dynamic anchor base size calculated from median cell lengths base_size = anchor_size(tf.reshape(pred_masks, [input_height, input_width])) # scales and ratios are used to generate different anchors scales = np.array([0.5, 1, 2]) ratios = np.array([0.125, 0.25, 0.5, 1, 2, 4, 8]) # stride is to control how sparse we want to place anchors across the image # stride = 16 means to place an anchor every 16 pixels on the original image stride = 16 # Generate the anchor reference with respect to the original image ref_anchors = generate_anchors_reference(base_size, ratios, scales) num_ref_anchors = scales.shape[0] * ratios.shape[0] feat_height = input_height / stride feat_width = input_width / stride # Generate all the anchors based on ref_anchors all_anchors = generate_anchors(ref_anchors, stride, [feat_height, feat_width]) num_anchors = all_anchors.shape[0] with tf.variable_scope('model_RPN') as scope: prediction_dict = RPN(feat_map, num_ref_anchors) # Get the tensors from the dict rpn_cls_prob = prediction_dict['rpn_cls_prob'] rpn_bbox_pred = prediction_dict['rpn_bbox_pred'] proposal_prediction = RPNProposal(rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape, nms_thresh) pred_dict_final['all_anchors'] = tf.cast(all_anchors, tf.float32) prediction_dict['proposals'] = proposal_prediction['proposals'] prediction_dict['scores'] = proposal_prediction['scores'] pred_dict_final['rpn_prediction'] = prediction_dict scores = pred_dict_final['rpn_prediction']['scores'] proposals = pred_dict_final['rpn_prediction']['proposals'] pred_masks_watershed = tf.to_float( marker_watershed(scores, proposals, pred_masks, min_score=bbox_min_score)) # start point for testing, and end point for graph sess = tf.Session() sess.run(tf.global_variables_initializer()) num_batches_test = len(x_test) saver = tf.train.Saver() masks1 = [] # Restore the per-image normalization model from the trained network saver.restore(sess, './Network/whole_norm.ckpt') sess.run(tf.local_variables_initializer()) for j in tqdm(range(0, num_batches_test)): # whole image normalization batch_data = x_test[j] batch_data_shape = batch_data.shape image = np.reshape(batch_data, [batch_data_shape[0], batch_data_shape[1]]) if resize_scale != 1: image = rescale(image, self.params['scale_ratio'], anti_aliasing=True) # Clip the height and width to be 16-fold imheight, imwidth = image.shape imheight = imheight // 16 * 16 imwidth = imwidth // 16 * 16 image = image[:imheight, :imwidth] image_normalized_wn = whole_image_norm(image) image_normalized_wn = np.reshape(image_normalized_wn, [1, imheight, imwidth, 1]) masks = sess.run(pred_masks, feed_dict={train_initial: image_normalized_wn}) self.progress_var.set(j / 2 / num_batches_test * 100) self.window.update() # First pass, get the coarse masks, and normalize the image on masks masks1.append(masks) # Restore the foreground normalization model from the trained network saver.restore(sess, './Network/foreground.ckpt') sess.run(tf.local_variables_initializer()) for j in tqdm(range(0, num_batches_test)): batch_data = x_test[j] batch_data_shape = batch_data.shape image = np.reshape(batch_data, [batch_data_shape[0], batch_data_shape[1]]) if resize_scale != 1: image = rescale(image, self.params['scale_ratio']) # Clip the height and width to be 16-fold imheight, imwidth = image.shape imheight = imheight // 16 * 16 imwidth = imwidth // 16 * 16 image = image[:imheight, :imwidth] # Final pass, foreground normalization to get final masks image_normalized_fg = foreground_norm(image, masks1[j]) image_normalized_fg = np.reshape(image_normalized_fg, [1, imheight, imwidth, 1]) # If adding watershed, we save the watershed masks separately if perform_watershed == 'yes': masks_watershed = sess.run( pred_masks_watershed, feed_dict={train_initial: image_normalized_fg}) if postProcess == 'yes': masks_watershed = clean_image(masks_watershed) # Revert the scale to original display if resize_scale != 1: masks_watershed = rescale(masks_watershed, 1 / self.params['scale_ratio']) I8 = (((masks_watershed - masks_watershed.min()) / (masks_watershed.max() - masks_watershed.min())) * 255).astype(np.uint8) img = Image.fromarray(I8) img.save(self.batch_seg_path + x_id[j] + '_masks_watershed.png') else: masks = sess.run(pred_masks, feed_dict={train_initial: image_normalized_fg}) if postProcess == 'yes': masks = clean_image(masks) # enable these 2 lines if your want to see the detection result #image_pil = draw_top_nms_proposals(pred_dict, batch_data, min_score=bbox_min_score, draw_gt=False) #image_pil.save(str(j)+'_pred.png') # Revert the scale to original display if resize_scale != 1: masks = rescale(masks, 1 / self.params['scale_ratio']) I8 = (((masks - masks.min()) / (masks.max() - masks.min())) * 255).astype(np.uint8) img = Image.fromarray(I8) img.save(self.batch_seg_path + x_id[j] + '_masks.png') self.progress_var.set(50 + j / 2 / num_batches_test * 100) self.window.update() sess.close()
def test_single_img(params, x_test): """input the image, return the segmented mask Args: params (dict): the parameters of the network x_test: the input image in numpy array """ # Get the testing parameters perform_watershed = params['watershed'] bbox_min_score = params['min_score'] nms_thresh = params['nms_threshold'] postProcess = params['postProcess'] # pred_dict and pred_dict_final save all the temp variables pred_dict_final = {} train_initial = tf.placeholder(dtype=tf.float32, shape=[1, None, None, 1]) input_shape = tf.shape(train_initial) input_height = input_shape[1] input_width = input_shape[2] im_shape = tf.cast([input_height, input_width], tf.float32) # number of classes needed to be classified, for our case this equals to 2 # (foreground and background) nb_classes = 2 # feed the initial image to U-Net, we expect 2 outputs: # 1. feat_map of shape (?,32,32,1024), which will be passed to the # region proposal network # 2. final_logits of shape(?,512,512,2), which is the prediction from U-net with tf.variable_scope('model_U-Net') as scope: final_logits, feat_map = UNET(nb_classes, train_initial) # The final_logits has 2 channels for foreground/background softmax scores, # then we get prediction with larger score for each pixel pred_masks = tf.argmax(final_logits, axis=3) pred_masks = tf.reshape(pred_masks, [input_height, input_width]) pred_masks = tf.to_float(pred_masks) # Dynamic anchor base size calculated from median cell lengths base_size = anchor_size(tf.reshape(pred_masks, [input_height, input_width])) # scales and ratios are used to generate different anchors scales = np.array([0.5, 1, 2]) ratios = np.array([0.125, 0.25, 0.5, 1, 2, 4, 8]) # stride is to control how sparse we want to place anchors across the image # stride = 16 means to place an anchor every 16 pixels on the original image stride = 16 # Generate the anchor reference with respect to the original image ref_anchors = generate_anchors_reference(base_size, ratios, scales) num_ref_anchors = scales.shape[0] * ratios.shape[0] feat_height = input_height / stride feat_width = input_width / stride # Generate all the anchors based on ref_anchors all_anchors = generate_anchors(ref_anchors, stride, [feat_height, feat_width]) num_anchors = all_anchors.shape[0] with tf.variable_scope('model_RPN') as scope: prediction_dict = RPN(feat_map, num_ref_anchors) # Get the tensors from the dict rpn_cls_prob = prediction_dict['rpn_cls_prob'] rpn_bbox_pred = prediction_dict['rpn_bbox_pred'] proposal_prediction = RPNProposal(rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape, nms_thresh) pred_dict_final['all_anchors'] = tf.cast(all_anchors, tf.float32) prediction_dict['proposals'] = proposal_prediction['proposals'] prediction_dict['scores'] = proposal_prediction['scores'] pred_dict_final['rpn_prediction'] = prediction_dict scores = pred_dict_final['rpn_prediction']['scores'] proposals = pred_dict_final['rpn_prediction']['proposals'] pred_masks_watershed = tf.to_float( marker_watershed(scores, proposals, pred_masks, min_score=bbox_min_score)) # start point for testing, and end point for graph sess = tf.Session() sess.run(tf.global_variables_initializer()) num_batches_test = len(x_test) saver = tf.train.Saver() masks1 = [] # Restore the per-image normalization model from the trained network saver.restore(sess, './Network/whole_norm.ckpt') #saver.restore(sess,'./Network/whole_norm_weights_fluorescent/'+str(3)+'.ckpt') sess.run(tf.local_variables_initializer()) for j in tqdm(range(0, num_batches_test)): # whole image normalization batch_data = x_test[j] batch_data_shape = batch_data.shape image_normalized_wn = whole_image_norm(batch_data) image_normalized_wn = np.reshape( image_normalized_wn, [1, batch_data_shape[0], batch_data_shape[1], 1]) masks = sess.run(pred_masks, feed_dict={train_initial: image_normalized_wn}) # First pass, get the coarse masks, and normalize the image on masks masks1.append(masks) # Restore the foreground normalization model from the trained network saver.restore(sess, './Network/foreground.ckpt') #saver.restore(sess,'./Network/fg_norm_weights_fluorescent/'+str(30)+'.ckpt') sess.run(tf.local_variables_initializer()) for j in tqdm(range(0, num_batches_test)): batch_data = x_test[j] batch_data_shape = batch_data.shape image = np.reshape(batch_data, [batch_data_shape[0], batch_data_shape[1]]) # Final pass, foreground normalization to get final masks image_normalized_fg = foreground_norm(image, masks1[j]) image_normalized_fg = np.reshape( image_normalized_fg, [1, batch_data_shape[0], batch_data_shape[1], 1]) # If adding watershed, we save the watershed masks separately if perform_watershed == 'yes': masks = sess.run(pred_masks_watershed, feed_dict={train_initial: image_normalized_fg}) if postProcess == 'yes': masks = clean_image(masks) else: masks = sess.run(pred_masks, feed_dict={train_initial: image_normalized_fg}) if postProcess == 'yes': masks = clean_image(masks) sess.close() return masks