def test_return_var_tuple(self): """ pseudocode: if True: return 1, True else: return 3, 2 """ def true_func(): return layers.fill_constant(shape=[1, 2], dtype='int32', value=1), layers.fill_constant( shape=[2, 3], dtype='bool', value=True) def false_func(): return layers.fill_constant(shape=[3, 4], dtype='float32', value=3), layers.fill_constant( shape=[4, 5], dtype='int64', value=2) main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): pred = layers.fill_constant(shape=[1], dtype='bool', value=True) out = layers.cond(pred, true_func, false_func) # out is a tuple containing 2 tensors place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=out) self.assertTrue( np.allclose(np.asarray(ret[0]), np.full((1, 2), 1, np.int32))) self.assertTrue( np.allclose(np.asarray(ret[1]), np.full((2, 3), True, bool)))
def test_pass_and_modify_var(self): """ pseudocode: for i in range(5): a = 7 if i % 2 == 0: a = a * (i + 1) else: a = a - (i - 1) """ def true_func(a, i): a = a * (i + 1) return a def false_func(a, i): a = a - (i - 1) return a main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): a = layers.fill_constant(shape=[3, 2, 1], dtype='int32', value=7) i = fluid.data(name="i", shape=[1], dtype='int32') pred = ((i % 2) == 0) a = layers.cond(pred, lambda: true_func(a, i), lambda: false_func(a, i)) place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) for feed_i in range(5): expected_a = 7 * (feed_i + 1) if feed_i % 2 == 0 else 8 - feed_i ret = exe.run(main_program, feed={'i': np.full((1), feed_i, np.int32)}, fetch_list=[a]) self.assertTrue( np.allclose( np.asarray(ret), np.full((3, 2, 1), expected_a, np.int32)))
def test_extremely_simple_net_with_op_in_condition(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): a = fluid.layers.fill_constant(shape=[1], dtype='float32', value=1.23) a.stop_gradient = False b = fluid.layers.fill_constant(shape=[1], dtype='float32', value=1.25) b.stop_gradient = False out = layers.cond(a - b < -1.0, lambda: a, lambda: b) append_backward(out) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=[out, a.grad_name, b.grad_name]) # Note: fill_constant has loss of precision, you have to assertEqual # with values doens't lose precision in float-point number. self.assertEqual(ret[0][0], 1.25) self.assertEqual(ret[1][0], 0.0) self.assertEqual(ret[2][0], 1.0)
def build_program(self): def true_func(): return layers.fill_constant(shape=[1, 2], dtype='int32', value=1), layers.fill_constant( shape=[2, 3], dtype='bool', value=True) def false_func(): return layers.fill_constant(shape=[3, 4], dtype='float32', value=3), layers.fill_constant( shape=[4, 5], dtype='int64', value=2) main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): x = layers.fill_constant(shape=[1], dtype='float32', value=0.1) y = layers.fill_constant(shape=[1], dtype='float32', value=0.23) pred = layers.less_than(x, y) out = layers.cond(pred, true_func, false_func) # out is a tuple containing 2 tensors return main_program, startup_program, out
def branch(i, img, label): return layers.cond( (i % 2) == 0, lambda: simple_fc_net_with_inputs(img, label, class_num=10), lambda: batchnorm_fc_with_inputs(img, label, class_num=10))
def cond_func(i, img, label): predicate = ((i % 2) == 0) return layers.cond( predicate, lambda: simple_fc_net_with_inputs(img, label, class_num=10), lambda: batchnorm_fc_with_inputs(img, label, class_num=10))
def greater_equal_branch(i, a): return layers.cond(i < 8.0, lambda: layers.elementwise_mul(a, a), lambda: layers.elementwise_div(a, a))
def less_than_branch(i, a): return layers.cond(i >= 3.0, lambda: layers.elementwise_add(a, a), lambda: layers.elementwise_sub(a, a))
def fastnms(all_pred_boxes, all_pred_scores, resize_shape, origin_shape, conf_thresh, nms_thresh, keep_top_k, nms_top_k, use_yolo_box): ''' :param all_pred_boxes: [batch_size, -1, 4] :param all_pred_scores: [batch_size, -1, 80] :param resize_shape: [batch_size, 2] :param origin_shape: [batch_size, 2] ''' conf_preds = P.transpose(all_pred_scores, perm=[0, 2, 1]) # [1, 80, -1] cur_scores = conf_preds[0] # [80, -1] conf_scores = P.reduce_max(cur_scores, dim=0) # [-1, ] # keep如果是[None],并且在gather()里使用了keep,就会出现 # cudaGetLastError invalid configuration argument errno: 9 这个错误。 # 为了避免上面的问题,只能让keep不是[None],所以这里当keep是[None]时给keep赋予一个坐标[[0]]。 keep = P.where(conf_scores > conf_thresh) def exist_objs_1(keep): return keep def no_objs_1(): keep_extra = P.zeros((1, 1), 'int64') return keep_extra keep = P.cond(P.shape(keep)[0] == 0, no_objs_1, lambda: exist_objs_1(keep)) scores = P.gather(all_pred_scores[0], keep) scores = P.transpose(scores, perm=[1, 0]) boxes = P.gather(all_pred_boxes[0], keep) boxes, scores, classes = fast_nms(boxes, scores, conf_thresh, nms_thresh, keep_top_k, nms_top_k) # 再做一次分数过滤。前面提到,只要某个框最高分数>阈值就保留, # 然而计算上面那个矩阵时,这个框其实重复了80次,每一个分身代表是不同类的物品。 # 非最高分数的其它类别,它的得分可能小于阈值,要过滤。 # 所以fastnms存在这么一个现象:某个框它最高分数 > 阈值,它有一个非最高分数类的得分也超过了阈值, # 那么最后有可能两个框都保留,而且这两个框有相同的xywh keep = P.where(scores > conf_thresh) def exist_objs_2(keep, boxes, classes, scores): boxes = P.gather(boxes, keep) classes = P.gather(classes, keep) scores = P.gather(scores, keep) return boxes, classes, scores def no_objs_2(boxes, classes, scores): keep = P.zeros((1, 1), 'int64') boxes = P.gather(boxes, keep) classes = P.gather(classes, keep) scores = P.gather(scores, keep) scores -= 2.0 # 巧妙设置为负分数让python端过滤 return boxes, classes, scores boxes, classes, scores = P.cond( P.shape(keep)[0] == 0, lambda: no_objs_2(boxes, classes, scores), lambda: exist_objs_2(keep, boxes, classes, scores)) # 变成左上角坐标、右下角坐标 boxes = P.concat( [boxes[:, :2] - boxes[:, 2:] * 0.5, boxes[:, :2] + boxes[:, 2:] * 0.5], axis=-1) # 缩放到原图大小 resize_shape_f = P.cast(resize_shape, 'float32') origin_shape_f = P.cast(origin_shape, 'float32') if use_yolo_box: scale = origin_shape_f else: scale = origin_shape_f / resize_shape_f scale = P.expand(scale, [1, 2]) boxes *= scale # 批大小是1才支持这么做,因为scale第0维表示批大小,boxes第0维却表示这张图片预测出的物体数 # 批大小在前 boxes = P.reshape(boxes, (1, -1, 4), name='boxes') scores = P.reshape(scores, (1, -1), name='scores') classes = P.reshape(classes, (1, -1), name='classes') return [boxes, scores, classes]
def begin_localsgd(): layers.cond(step - last_step == k_steps, communicate_avg_loss)
def minimize_impl(self, loss, startup_program=None, parameter_list=None, no_grad_set=None): minimized = self.inner_opt.minimize(loss, startup_program=startup_program) init_k_steps = self.user_defined_strategy.adaptive_localsgd_configs[ 'init_k_steps'] begin_step_value = self.user_defined_strategy.adaptive_localsgd_configs[ 'begin_step'] if startup_program is None: startup_program = default_startup_program() main_block = loss.block self.nrings = 2 collective_helper = CollectiveHelper(self.role_maker, self.nrings) collective_helper.update_startup_program(startup_program) p2s = self.create_snapshot_vars(startup_program) self.init_snapshot_vars(startup_program, p2s) p2s = self.create_snapshot_vars(main_block.program) with program_guard(main_block.program, startup_program): step = layers.autoincreased_step_counter(begin=1) k_steps = layers.create_global_var(name="k_steps", shape=[1], value=int(init_k_steps), dtype='int64', persistable=True) begin_step = layers.create_global_var(name="begin_step", shape=[1], value=int(begin_step_value), dtype='int64', persistable=True) last_step = layers.create_global_var(name="last_step", shape=[1], value=int(0), dtype='int64', persistable=True) avg_loss = layers.create_global_var(name="avg_loss", shape=[1], value=float(0), dtype=loss.dtype, persistable=True) lr_0 = layers.create_global_var(name="lr_0", shape=[1], value=float(0), dtype='float32', persistable=True) loss_0 = layers.create_global_var(name="loss_0", shape=[1], value=float(0), dtype='float32', persistable=True) global_lr = self.inner_opt._global_learning_rate() def initialize(): self._generate_avg_loss(main_block, loss, avg_loss) layers.assign(avg_loss, loss_0) layers.assign(global_lr, lr_0) layers.cond(step == 1, initialize) def communicate(): sub_block = default_main_program().current_block() ring_id = -1 for param, snapshot in p2s: sub_block.append_op(type='elementwise_sub', inputs={ 'X': [snapshot], 'Y': [param] }, outputs={'Out': [param]}, attrs={OP_ROLE_KEY: OpRole.Optimize}) sub_block.append_op(type='c_sync_calc_stream', inputs={'X': param}, outputs={'Out': param}, attrs={OP_ROLE_KEY: OpRole.Optimize}) ring_id = (ring_id + 1) % self.nrings sub_block.append_op(type='c_allreduce_sum', inputs={'X': [param]}, outputs={'Out': [param]}, attrs={ 'ring_id': ring_id, OP_ROLE_KEY: OpRole.Optimize }) for ring_id in range(self.nrings): sub_block.append_op(type='c_sync_comm_stream', inputs={'X': param}, outputs={'Out': param}, attrs={ 'ring_id': ring_id, OP_ROLE_KEY: OpRole.Optimize }) for param, snapshot in p2s: sub_block.append_op(type='scale', inputs={'X': [param]}, outputs={'Out': [param]}, attrs={ 'scale': 1.0 / self.role_maker._worker_num(), OP_ROLE_KEY: OpRole.Optimize }) sub_block.append_op(type='elementwise_sub', inputs={ 'X': [snapshot], 'Y': [param] }, outputs={'Out': [param]}, attrs={OP_ROLE_KEY: OpRole.Optimize}) sub_block.append_op(type='assign', inputs={'X': [param]}, outputs={'Out': [snapshot]}, attrs={OP_ROLE_KEY: OpRole.Optimize}) layers.assign(step, last_step) def communicate_avg_loss(): communicate() self._generate_avg_loss(main_block, loss, avg_loss) next_local_steps = layers.cast(layers.ceil( layers.sqrt(lr_0 * avg_loss / (global_lr * loss_0) * float(init_k_steps))), dtype='int64') max_local_steps = layers.fill_constant(shape=[1], dtype='int64', value=16) min_local_steps = layers.fill_constant(shape=[1], dtype='int64', value=1) next_local_steps = layers.elementwise_min( next_local_steps, max_local_steps) next_local_steps = layers.elementwise_max( next_local_steps, min_local_steps) layers.assign(next_local_steps, k_steps) def begin_localsgd(): layers.cond(step - last_step == k_steps, communicate_avg_loss) layers.cond(step > begin_step, begin_localsgd, communicate) return minimized
def _create_cond_block_and_update_optimizer( main_program, cond_var, new_params_to_grads: List[Tuple[Any, Any]], param_to_gradient_merge: Dict[str, Any], optimize_ops_desc: List[Any], k_steps, avg): def true_apply_gradient(): cur_block_idx = main_program.current_block_idx cur_block = main_program.current_block() # cur_block's forward_block & backward_block is itself cur_block._set_forward_block_idx(cur_block_idx) op_maker = core.op_proto_and_checker_maker if avg: for param, new_grad in new_params_to_grads: # grad /= k_steps cur_block.append_op(type='scale', inputs={'X': new_grad}, outputs={'Out': new_grad}, attrs={ 'scale': 1.0 / k_steps, 'bias': 0.0, 'bias_after_scale': False }) new_grad.op._set_attr(op_maker.kOpRoleAttrName(), op_maker.OpRole.Optimize) # append optimizer ops for op_desc in optimize_ops_desc: new_op_desc = cur_block.desc.append_op() new_op_desc.copy_from(op_desc) #update input/output for input_name in new_op_desc.input_arg_names(): if input_name in new_params_to_grads: new_op_desc._rename_input(input_name, new_params_to_grads[input_name]) for output_name in new_op_desc.output_arg_names(): if output_name in new_params_to_grads: new_op_desc._rename_output( output_name, new_params_to_grads[output_name]) # remove op_role_var if new_op_desc.has_attr(op_maker.kOpRoleVarAttrName()): new_op_desc.remove_attr(op_maker.kOpRoleVarAttrName()) # op's update Grad if core.grad_var_suffix() in new_op_desc.input_arg_names(): grad_value = new_op_desc.input("Grad")[0] # TODO FIXME(xym) support fp16 grad_merge_value = grad_value + '@GradientMerge' new_op_desc.set_input("Grad", [grad_merge_value]) main_program.global_block()._sync_with_cpp() cur_block._sync_with_cpp() # clear gradient_merge_vars for param, new_grad in new_params_to_grads: layers.fill_constant(shape=new_grad.shape, dtype=new_grad.dtype, value=0.0, out=new_grad) new_grad.op._set_attr(op_maker.kOpRoleAttrName(), op_maker.OpRole.Optimize) layers.cond(cond_var, true_fn=true_apply_gradient, false_fn=None)
import paddle.fluid as fluid import paddle.fluid.layers as layers from paddle.fluid.executor import Executor from paddle.fluid.framework import Program, program_guard # 疑问:如果true_func or false_func执行时需要输入参数,怎么办? def true_func(): return layers.fill_constant(shape=[1,2], dtype='int32', value=1), \ layers.fill_constant(shape=[2,3], dtype='bool', value=True) def false_func(): return layers.fill_constant(shape=[3,4], dtype='float32', value=3), \ layers.fill_constant(shape=[4,5], dtype='int64', value=2) main_program = Program() startup_program = Program() with program_guard(main_program, startup_program): x = layers.fill_constant(shape=[1], dtype='float32', value=0.1) y = layers.fill_constant(shape=[1], dtype='float32', value=0.23) pred = layers.less_than(x, y) out = layers.cond(pred, true_func, false_func) place = fluid.CUDAPlace( 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) ret = exe.run(main_program, fetch_list=out) print(ret)
def cond_func_simple_net_at_false(i, img, label): return layers.cond(i < 5, lambda: layers.mean(img), lambda: branch(i, img, label))
def minimize_impl(self, loss, startup_program=None, parameter_list=None, no_grad_set=None): minimized = self.inner_opt.minimize(loss, startup_program=startup_program) k_steps_value = self.user_defined_strategy.localsgd_configs['k_steps'] begin_step_value = self.user_defined_strategy.localsgd_configs[ 'begin_step'] if startup_program is None: startup_program = default_startup_program() main_block = loss.block self.nrings = 2 collective_helper = CollectiveHelper(self.role_maker, self.nrings) collective_helper.update_startup_program(startup_program) p2s = self.create_snapshot_vars(startup_program) self.init_snapshot_vars(startup_program, p2s) p2s = self.create_snapshot_vars(main_block.program) with program_guard(main_block.program, startup_program): step = layers.autoincreased_step_counter(begin=1) k_steps = layers.create_global_var(name="k_steps", shape=[1], value=k_steps_value, dtype='int64', persistable=True) begin_step = layers.create_global_var(name="begin_step", shape=[1], value=begin_step_value, dtype='int64', persistable=True) last_step = layers.create_global_var(name="last_step", shape=[1], value=begin_step_value, dtype='int64', persistable=True) def communicate(): sub_block = default_main_program().current_block() ring_id = -1 for param, snapshot in p2s: sub_block.append_op(type='elementwise_sub', inputs={ 'X': [snapshot], 'Y': [param] }, outputs={'Out': [param]}, attrs={OP_ROLE_KEY: OpRole.Optimize}) sub_block.append_op(type='c_sync_calc_stream', inputs={'X': param}, outputs={'Out': param}, attrs={OP_ROLE_KEY: OpRole.Optimize}) ring_id = (ring_id + 1) % self.nrings sub_block.append_op(type='c_allreduce_sum', inputs={'X': [param]}, outputs={'Out': [param]}, attrs={ 'ring_id': ring_id, OP_ROLE_KEY: OpRole.Optimize }) for ring_id in range(self.nrings): sub_block.append_op(type='c_sync_comm_stream', inputs={'X': param}, outputs={'Out': param}, attrs={ 'ring_id': ring_id, OP_ROLE_KEY: OpRole.Optimize }) for param, snapshot in p2s: sub_block.append_op(type='scale', inputs={'X': [param]}, outputs={'Out': [param]}, attrs={ 'scale': 1.0 / self.role_maker._worker_num(), OP_ROLE_KEY: OpRole.Optimize }) sub_block.append_op(type='elementwise_sub', inputs={ 'X': [snapshot], 'Y': [param] }, outputs={'Out': [param]}, attrs={OP_ROLE_KEY: OpRole.Optimize}) sub_block.append_op(type='assign', inputs={'X': [param]}, outputs={'Out': [snapshot]}, attrs={OP_ROLE_KEY: OpRole.Optimize}) layers.assign(step, last_step) def begin_localsgd(): layers.cond(step - last_step == k_steps, communicate) layers.cond(step > begin_step, begin_localsgd, communicate) return minimized
def cond_func(i, img, label): return layers.cond(i < 5, lambda: branch(i, img, label, True), lambda: branch(i, img, label, False))
def get_seg_single(self, cate_preds, mask_proto, kernel_preds, featmap_size, resize_shape, ori_shape): ''' :param cate_preds: [所有格子数, 80] :param mask_proto: [1, 256, s4, s4] 掩码原型 :param kernel_preds: [所有格子数, 256] 每个格子生成的卷积核,是1x1卷积核,输入通道数是256,即掩码原型的通道数。 :param featmap_size: (s4, s4) :param resize_shape: shape=[3, ] :param ori_shape: shape=[3, ] :return: ''' # overall info. upsampled_size_out = (featmap_size[0] * 4, featmap_size[1] * 4 ) # 输入网络的图片大小 cfg = self.nms_cfg # 第一次过滤,分数过滤 inds = L.where(cate_preds > cfg['score_thr']) # [M, 2] # if len(inds) == 0: # return None # 静态图里写条件判断太难了。 def exist_objs_1(inds, cate_preds): inds.stop_gradient = True scores = L.gather_nd(cate_preds, inds) # [M, ] M个物体的分数 return inds, scores def no_objs_1(cate_preds): inds = L.zeros((1, 2), np.int64) inds.stop_gradient = True scores = L.gather_nd(cate_preds, inds) - 99.0 # [M, ] M个物体的分数。后面会被过滤掉。 return inds, scores # 是否有物体 inds, scores = L.cond( L.shape(inds)[0] == 0, lambda: no_objs_1(cate_preds), lambda: exist_objs_1(inds, cate_preds)) classes = inds[:, 1] # [M, ] M个物体的类别id kernel_preds = L.gather(kernel_preds, inds[:, 0]) # [M, 256] M个物体的卷积核 n_stage = len(self.seg_num_grids) # 5个输出层 strides = [] for ind_ in range(n_stage): st = L.zeros((1, ), dtype=np.float32) + self.strides[ind_] st = L.expand(st, [ self.seg_num_grids[ind_]**2, ]) # [40*40, ] strides.append(st) strides = L.concat(strides, axis=0) strides.stop_gradient = True strides = L.gather(strides, inds[:, 0]) # [M, ] M个物体的下采样倍率 # mask encoding.原版SOLO中的写法。1x1的卷积核卷积掩码原型,即可得到掩码。 # M, C = kernel_preds.shape # kernel_preds = kernel_preds.view(M, C, 1, 1) # 被当做卷积核使 # seg_preds = F.conv2d(seg_preds, kernel_preds, stride=1).squeeze(0).sigmoid() # 1x1的卷积核卷积掩码原型,等价于矩阵相乘。注意,3x3卷积核的话可不等价。 # 这里是由于暂时没发现等价api,所以用矩阵相乘代替。solov2和yolact在这里是一样的。 mask_proto = L.squeeze(mask_proto, axes=[0]) # [256, s4, s4] mask_proto = L.transpose(mask_proto, perm=[1, 2, 0]) # [s4, s4, 256] masks = L.matmul(mask_proto, kernel_preds, transpose_y=True) # [s4, s4, M] masks = L.sigmoid(masks) # [s4, s4, M] masks = L.transpose(masks, perm=[2, 0, 1]) # [M, s4, s4] # mask. seg_masks = L.cast(masks > cfg['mask_thr'], 'float32') # [M, s4, s4] 前景的话值为1 sum_masks = L.reduce_sum(seg_masks, dim=[1, 2]) # [M, ] M个物体的掩码面积 # 第二次过滤,下采样倍率过滤。掩码的面积 超过 下采样倍率 才保留下来。 keep = L.where(sum_masks > strides) # if keep.sum() == 0: # return None # 静态图里写条件判断太难了。 def exist_objs_2(keep, seg_masks, masks, sum_masks, scores, classes): keep = L.reshape(keep, (-1, )) # [M2, ] keep.stop_gradient = True seg_masks = L.gather(seg_masks, keep) # [M2, s4, s4] M2个物体的掩码 masks = L.gather(masks, keep) # [M2, s4, s4] M2个物体的掩码概率 sum_masks = L.gather(sum_masks, keep) # [M2, ] M2个物体的掩码面积 scores = L.gather(scores, keep) # [M2, ] M2个物体的分数 classes = L.gather(classes, keep) # [M2, ] M2个物体的类别id return seg_masks, masks, sum_masks, scores, classes def no_objs_2(seg_masks, masks, sum_masks, scores, classes): keep = L.zeros((1, ), np.int64) keep.stop_gradient = True seg_masks = L.gather(seg_masks, keep) # [M2, s4, s4] M2个物体的掩码 masks = L.gather(masks, keep) # [M2, s4, s4] M2个物体的掩码概率 sum_masks = L.gather(sum_masks, keep) # [M2, ] M2个物体的掩码面积 scores = L.gather(scores, keep) - 99.0 # [M2, ] M2个物体的分数。负分数,后面会被过滤掉。 classes = L.gather(classes, keep) # [M2, ] M2个物体的类别id return seg_masks, masks, sum_masks, scores, classes # 是否有物体 seg_masks, masks, sum_masks, scores, classes = L.cond( L.shape(keep)[0] == 0, lambda: no_objs_2(seg_masks, masks, sum_masks, scores, classes), lambda: exist_objs_2(keep, seg_masks, masks, sum_masks, scores, classes)) # mask scoring. # [M2, ] 前景的掩码概率求和,再除以掩码面积。即M2个物体的前景部分的平均掩码概率 avg_prob = L.reduce_sum(masks * seg_masks, dim=[1, 2]) / sum_masks scores *= avg_prob # [M2, ] M2个物体的最终分数 = 分类概率 * 平均掩码概率 # 第三次过滤,只保留得分前cfg['nms_pre']个物体 _, sort_inds = L.argsort(scores, axis=-1, descending=True) # 最终分数降序。最大值的下标,第2大值的下标,... sort_inds = sort_inds[:cfg['nms_pre']] # 最多cfg['nms_pre']个物体。 seg_masks = L.gather(seg_masks, sort_inds) # [M3, s4, s4] M3个物体的掩码 masks = L.gather(masks, sort_inds) # [M3, s4, s4] M3个物体的掩码概率 sum_masks = L.gather(sum_masks, sort_inds) # [M3, ] M3个物体的掩码面积 scores = L.gather(scores, sort_inds) # [M3, ] M3个物体的分数 classes = L.gather(classes, sort_inds) # [M3, ] M3个物体的类别id # Matrix NMS scores = matrix_nms(seg_masks, classes, scores, kernel=cfg['kernel'], sigma=cfg['sigma'], sum_masks=sum_masks) # 第四次过滤,分数过滤 keep = L.where(scores >= cfg['update_thr']) # if keep.sum() == 0: # return None def exist_objs_3(keep, masks, classes, scores, upsampled_size_out, resize_shape, ori_shape): keep = L.reshape(keep, (-1, )) keep.stop_gradient = True masks = L.gather(masks, keep) # [M4, s4, s4] M4个物体的掩码概率 scores = L.gather(scores, keep) # [M4, ] M4个物体的分数 classes = L.gather(classes, keep) # [M4, ] M4个物体的类别id # 第五次过滤,只保留得分前cfg['max_per_img']个物体 _, sort_inds = L.argsort(scores, axis=-1, descending=True) sort_inds = sort_inds[:cfg['max_per_img']] sort_inds.stop_gradient = True masks = L.gather(masks, sort_inds) # [M5, s4, s4] M5个物体的掩码概率 scores = L.gather(scores, sort_inds) # [M5, ] M5个物体的分数 classes = L.gather(classes, sort_inds) # [M5, ] M5个物体的类别id masks = L.resize_bilinear( L.unsqueeze(masks, axes=[0]), out_shape=upsampled_size_out, align_corners=False, align_mode=0)[:, :, :resize_shape[0], :resize_shape[1]] # 去掉黑边 masks = L.resize_bilinear(masks, out_shape=ori_shape[:2], align_corners=False, align_mode=0) # 插值成原图大小 masks = L.cast(masks > cfg['mask_thr'], 'float32')[0] return masks, classes, scores def no_objs_3(): masks = L.zeros([1, 1, 1], 'float32') - 1.0 classes = L.zeros([ 1, ], 'int64') - 1 scores = L.zeros([ 1, ], 'float32') - 2.0 return masks, classes, scores # 是否有物体 masks, classes, scores = L.cond( L.shape(keep)[0] == 0, no_objs_3, lambda: exist_objs_3(keep, masks, classes, scores, upsampled_size_out, resize_shape, ori_shape)) return masks, classes, scores