def debug_model(model, in_stream=sys.stdin, out_stream=sys.stdout, exit_func=sys.exit): ''' Returns a cloned model that has debug nodes inserted everywhere. When the graph is evaluated or trained, those nodes will allow to inspect the graph. Args: model (root node): root node until which the nodes are to be debugged in_stream (object behaving like sys.stdin, default stdin): `readline()` will be called on it to obtain user input out_stream (object behaving like sys.stdout, default stdout): `write()` and `flush()` will be called on it to output debug info to the user exit_func (callable, default sys.exit): callable that takes an exit code and is called, when the user exits the debugging process Returns: a clone of the model that has debugging enabled ''' nodes = _nodes_to_debug(model) dbg_state = _DebugState(nodes) orig_node_count = len(nodes) mod_counter = 1 # We cannot add the DebugNodes in one clone because the replacements will # hide parent nodes. while True: modifications = { n: user_function( _DebugNode(n, dbg_state, in_stream, out_stream, exit_func)) for n in nodes } model = model.clone(CloneMethod.share, modifications) from cntk.graph import plot nodes = _nodes_to_debug(model) if len(nodes) == 1: # last node is the model node, which we want to debug as well model = user_function( _DebugNode(model, dbg_state, in_stream, out_stream)) break if mod_counter > orig_node_count: raise ValueError('cannot debug this graph') mod_counter += 1 return model
def debug_model(model, in_stream=sys.stdin, out_stream=sys.stdout, exit_func=sys.exit): ''' Returns a cloned model that has debug nodes inserted everywhere. When the graph is evaluated or trained, those nodes will allow to inspect the graph. Args: model (root node): root node until which the nodes are to be debugged in_stream (object behaving like sys.stdin, default stdin): `readline()` will be called on it to obtain user input out_stream (object behaving like sys.stdout, default stdout): `write()` and `flush()` will be called on it to output debug info to the user exit_func (callable, default sys.exit): callable that takes an exit code and is called, when the user exits the debugging process Returns: a clone of the model that has debugging enabled ''' nodes = _nodes_to_debug(model) dbg_state = _DebugState(nodes) orig_node_count = len(nodes) mod_counter = 1 # We cannot add the DebugNodes in one clone because the replacements will # hide parent nodes. while True: modifications = {n: user_function(_DebugNode(n, dbg_state, in_stream, out_stream, exit_func)) for n in nodes} model = model.clone(CloneMethod.share, modifications) from cntk.graph import plot nodes = _nodes_to_debug(model) if len(nodes) == 1: # last node is the model node, which we want to debug as well model = user_function(_DebugNode(model, dbg_state, in_stream, out_stream)) break if mod_counter > orig_node_count: raise ValueError('cannot debug this graph') mod_counter += 1 return model
def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes): ''' Creates a proposal target layer that is used for training an object detection network as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Assigns object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. It also adds gt_boxes to candidates and samples fg and bg rois for training. Args: rpn_rois: The proposed ROIs, e.g. from a region proposal network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. num_classes: The number of classes in the data set Returns: rpn_target_rois - a set of rois containing the ground truth and a number of sampled fg and bg ROIs label_targets - the target labels for the rois bbox_targets - the regression coefficient targets for the rois bbox_inside_weights - the weights for the regression loss ''' ptl_param_string = "'num_classes': {}".format(num_classes) ptl = user_function(ProposalTargetLayer(rpn_rois, scaled_gt_boxes, param_str=ptl_param_string)) # use an alias if you need to access the outputs, e.g., when cloning a trained network rois = alias(ptl.outputs[0], name='rpn_target_rois') label_targets = ptl.outputs[1] bbox_targets = ptl.outputs[2] bbox_inside_weights = ptl.outputs[3] return rois, label_targets, bbox_targets, bbox_inside_weights
def test_ext_backpropstate(payload): class TestBackPropState(UserFunction): def __init__(self, arg, payload, name='f1'): self.payload = payload super(TestBackPropState, self).__init__([arg]) def infer_outputs(self): return [C.output_variable(self.inputs[0].shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes)] def forward(self, argument, device=None, outputs_to_retain=None): return self.payload, argument def backward(self, state, root_gradients): assert state == self.payload return root_gradients dim = 4 p = C.parameter(shape=(dim,), init=10) in1 = C.input_variable(dim, needs_gradient=True, name='i_var') m = C.user_function(TestBackPropState(in1, payload)) z = m + p lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) trainer = C.Trainer(None, (z), [C.sgd(z.parameters, lr_per_sample)]) for i in range(100): input_data = np.random.rand(dim) trainer.train_minibatch({in1: [input_data]})
def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes): ''' Creates a proposal target layer that is used for training an object detection network as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Assigns object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. It also adds gt_boxes to candidates and samples fg and bg rois for training. Args: rpn_rois: The proposed ROIs, e.g. from a region proposal network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. num_classes: The number of classes in the data set Returns: rpn_target_rois - a set of rois containing the ground truth and a number of sampled fg and bg ROIs label_targets - the target labels for the rois bbox_targets - the regression coefficient targets for the rois bbox_inside_weights - the weights for the regression loss ''' ptl_param_string = "'num_classes': {}".format(num_classes) ptl = user_function(ProposalTargetLayer(rpn_rois, scaled_gt_boxes, param_str=ptl_param_string)) rois = alias(ptl.outputs[0], name='rpn_target_rois') label_targets = alias(ptl.outputs[1], name='label_targets') bbox_targets = alias(ptl.outputs[2], name='bbox_targets') bbox_inside_weights = alias(ptl.outputs[3], name='bbox_inside_w') return rois, label_targets, bbox_targets, bbox_inside_weights
def test_proposal_layer(): cls_prob_shape_cntk = (18,61,61) cls_prob_shape_caffe = (18,61,61) rpn_bbox_shape = (36, 61, 61) im_info = [1000, 1000, 1] # Create input tensors with values cls_prob = np.random.random_sample(cls_prob_shape_cntk).astype(np.float32) rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32) # Create CNTK layer and call forward cls_prob_var = input_variable(cls_prob_shape_cntk) rpn_bbox_var = input_variable(rpn_bbox_shape) cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, cntk.constant(im_info, (3,)))) state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred]}) cntk_proposals = cntk_output[next(iter(cntk_output))][0] # Create Caffe layer and call forward cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe) bottom = [np.array([cls_prob_caffe]),np.array([rpn_bbox_pred]),np.array([im_info])] top = None # handled through return statement in caffe layer for unit testing param_str = "'feat_stride': 16" caffe_layer = CaffeProposalLayer() caffe_layer.set_param_str(param_str) caffe_layer.setup(bottom, top) caffe_output = caffe_layer.forward(bottom, top) caffe_proposals = caffe_output[:,1:] # assert that results are exactly the same assert cntk_proposals.shape == caffe_proposals.shape assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0) print("Verified ProposalLayer")
def create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg, use_native_proposal_layer=False): layer_config = {} layer_config["feat_stride"] = cfg["MODEL"].FEATURE_STRIDE layer_config["scales"] = cfg["DATA"].PROPOSAL_LAYER_SCALES layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH) layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE) layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH) layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE) if use_native_proposal_layer: cntk.ops.register_native_user_function('ProposalLayerOp', 'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'), 'CreateProposalLayer') rpn_rois_raw = ops.native_user_function('ProposalLayerOp', [rpn_cls_prob_reshape, rpn_bbox_pred, im_info], layer_config, 'native_proposal_layer') else: rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, layer_config)) return alias(rpn_rois_raw, name='rpn_rois')
def test_ext_backpropstate(payload): class TestBackPropState(UserFunction): def __init__(self, arg, payload, name='f1'): self.payload = payload super(TestBackPropState, self).__init__([arg]) def infer_outputs(self): return [ C.output_variable(self.inputs[0].shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes) ] def forward(self, argument, device=None, outputs_to_retain=None): return self.payload, argument def backward(self, state, root_gradients): assert state == self.payload return root_gradients dim = 4 p = C.parameter(shape=(dim, ), init=10) in1 = C.input_variable(dim, needs_gradient=True, name='i_var') m = C.user_function(TestBackPropState(in1, payload)) z = m + p lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) trainer = C.Trainer(None, (z), [C.sgd(z.parameters, lr_per_sample)]) for i in range(100): input_data = np.random.rand(dim) trainer.train_minibatch({in1: [input_data]})
def create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg, use_native_proposal_layer=False): layer_config = {} layer_config["feat_stride"] = cfg["MODEL"].FEATURE_STRIDE layer_config["scales"] = cfg["DATA"].PROPOSAL_LAYER_SCALES layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH) layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE) layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH) layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE) if use_native_proposal_layer: cntk.ops.register_native_user_function( 'ProposalLayerOp', 'Cntk.ProposalLayerLib-' + cntk.__version__.rstrip('+'), 'CreateProposalLayer') rpn_rois_raw = ops.native_user_function( 'ProposalLayerOp', [rpn_cls_prob_reshape, rpn_bbox_pred, im_info], layer_config, 'native_proposal_layer') else: rpn_rois_raw = user_function( ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, layer_config)) return alias(rpn_rois_raw, name='rpn_rois')
def build_test_function(): dev = C.cpu() w_value = np.asarray([[0.5, 2], [-0.5, 1.5]]).astype(np.float32) c1_value = 2.718 c2_value = -3.141 if not C.cntk_py.is_native_user_function_registered('NativeUserTimesOp'): C.ops.register_native_user_function('NativeUserTimesOp', 'Cntk.ExtensibilityExamples-' + C.__version__.rstrip('+'), 'CreateUserTimesFunction') x = C.input_variable((2)) w = C.parameter((2, 2), init=w_value, device=dev) op = C.user_function(MyPlus(x, C.constant(c1_value))) op = C.ops.native_user_function('NativeUserTimesOp', [w, op], user_function_instance_name='my_times') return dev, w_value, c1_value, c2_value, C.user_function(MyPlus(op, C.constant(c2_value)))
def test_anchor_target_layer(): from utils.rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer from utils.caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer rpn_cls_score_shape_cntk = (1, 18, 61, 61) num_gt_boxes = 50 gt_boxes_shape_cntk = (num_gt_boxes,5) dims_info_shape = (6,) im_info = [1000, 1000, 1] # Create input tensors with values rpn_cls_score_dummy = np.random.random_sample(rpn_cls_score_shape_cntk).astype(np.float32) dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32) x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500 wh = np.random.random_sample((num_gt_boxes, 2)) * 400 x2y2 = x1y1 + wh + 50 label = np.random.random_sample((num_gt_boxes, 1)) label = (label * 17.0) gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32) # Create CNTK layer and call forward rpn_cls_score_var = input_variable(rpn_cls_score_shape_cntk) gt_boxes_var = input_variable(gt_boxes_shape_cntk) dims_info_var = input_variable(dims_info_shape) cntk_layer = user_function(CntkAnchorTargetLayer(rpn_cls_score_var, gt_boxes_var, dims_info_var, deterministic=True)) state, cntk_output = cntk_layer.forward({rpn_cls_score_var: [rpn_cls_score_dummy], gt_boxes_var: [gt_boxes], dims_info_var: dims_input}) obj_key = [k for k in cntk_output if 'objectness_target' in str(k)][0] bbt_key = [k for k in cntk_output if 'rpn_bbox_target' in str(k)][0] bbw_key = [k for k in cntk_output if 'rpn_bbox_inside_w' in str(k)][0] cntk_objectness_target = cntk_output[obj_key][0] cntk_bbox_targets = cntk_output[bbt_key][0] cntk_bbox_inside_w = cntk_output[bbw_key][0] # Create Caffe layer and call forward bottom = [np.array(rpn_cls_score_dummy),np.array(gt_boxes), np.array(im_info)] top = None # handled through return statement in caffe layer for unit testing param_str = "'feat_stride': 16" caffe_layer = CaffeAnchorTargetLayer() caffe_layer.set_param_str(param_str) caffe_layer.setup(bottom, top) caffe_layer.set_deterministic_mode() caffe_objectness_target, caffe_bbox_targets, caffe_bbox_inside_w = caffe_layer.forward(bottom, top) # assert that results are exactly the same assert cntk_objectness_target.shape == caffe_objectness_target.shape assert cntk_bbox_targets.shape == caffe_bbox_targets.shape assert cntk_bbox_inside_w.shape == caffe_bbox_inside_w.shape assert np.allclose(cntk_objectness_target, caffe_objectness_target, rtol=0.0, atol=0.0) assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0) assert np.allclose(cntk_bbox_inside_w, caffe_bbox_inside_w, rtol=0.0, atol=0.0) print("Verified AnchorTargetLayer")
def test_ext_eval_3_no_input(): dim = 4 p = C.parameter(shape=(dim, ), init=10, name='p') m = C.user_function(MyPlus(p, C.constant(3))) z = m + 0 result = z.eval() # No batch dimension since we have no input assert np.allclose(result, np.zeros_like(p) + 10 + 3)
def test_ext_eval_4_b_inside_graph(): dim = 4 p_init = 10 p = C.parameter(shape=(dim, ), init=p_init, name='p') z = C.user_function(p * MyPlus(p, C.constant(3))) result = z.eval() # No batch dimension since we have no input assert np.allclose(result, ((p_init * np.ones_like(result)) + 3) * p_init)
def test_ext_eval_4_b_inside_graph(): dim = 4 p_init = 10 p = C.parameter(shape=(dim,), init=p_init, name='p') z = C.user_function(p * MyPlus(p, C.constant(3))) result = z.eval() # No batch dimension since we have no input assert np.allclose(result, ((p_init * np.ones_like(result)) + 3) * p_init)
def test_ext_eval_3_no_input(): dim = 4 p = C.parameter(shape=(dim,), init=10, name='p') m = C.user_function(MyPlus(p, C.constant(3))) z = m + 0 result = z.eval() # No batch dimension since we have no input assert np.allclose(result, np.zeros_like(p) + 10 + 3)
def test_ext_eval_5_times(): dim = 2 p_init = 10 p = C.parameter(shape=(dim, ), init=p_init, name='p') m = C.user_function(MyPlus(p, C.constant(3))) z = C.times(m, C.parameter(shape=(2, 50), init=2)) result = z.eval() # No batch dimension since we have no input assert np.allclose(result, ((p_init * np.ones_like(result)) + 3) * 2 * 2)
def test_ext_eval_1(): dim = 4 p = C.parameter(shape=(dim,), init=10, name='p') i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') m = C.user_function(MyPlus(i, C.constant(3))) z = m + p input_data = np.random.rand(dim) result = z.eval([input_data]) assert np.allclose(result[0][0], input_data + 3 + 10)
def test_ext_eval_1(): dim = 4 p = C.parameter(shape=(dim, ), init=10, name='p') i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') m = C.user_function(MyPlus(i, C.constant(3))) z = m + p input_data = np.random.rand(dim) result = z.eval([input_data]) assert np.allclose(result[0][0], input_data + 3 + 10)
def test_ext_eval_5_times(): dim = 2 p_init = 10 p = C.parameter(shape=(dim,), init=p_init, name='p') m = C.user_function(MyPlus(p, C.constant(3))) z = C.times(m, C.parameter(shape=(2, 50), init=2)) result = z.eval() # No batch dimension since we have no input assert np.allclose(result, ((p_init * np.ones_like(result)) + 3) * 2 * 2)
def test_anchor_target_layer(): rpn_cls_score_shape_cntk = (1, 18, 61, 61) num_gt_boxes = 50 gt_boxes_shape_cntk = (num_gt_boxes,5) dims_info_shape = (6,) im_info = [1000, 1000, 1] # Create input tensors with values rpn_cls_score_dummy = np.random.random_sample(rpn_cls_score_shape_cntk).astype(np.float32) dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32) x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500 wh = np.random.random_sample((num_gt_boxes, 2)) * 400 x2y2 = x1y1 + wh + 50 label = np.random.random_sample((num_gt_boxes, 1)) label = (label * 17.0) gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32) # Create CNTK layer and call forward rpn_cls_score_var = input_variable(rpn_cls_score_shape_cntk) gt_boxes_var = input_variable(gt_boxes_shape_cntk) dims_info_var = input_variable(dims_info_shape) cntk_layer = user_function(CntkAnchorTargetLayer(rpn_cls_score_var, gt_boxes_var, dims_info_var, deterministic=True)) state, cntk_output = cntk_layer.forward({rpn_cls_score_var: [rpn_cls_score_dummy], gt_boxes_var: [gt_boxes], dims_info_var: dims_input}) obj_key = [k for k in cntk_output if 'objectness_target' in str(k)][0] bbt_key = [k for k in cntk_output if 'rpn_bbox_target' in str(k)][0] bbw_key = [k for k in cntk_output if 'rpn_bbox_inside_w' in str(k)][0] cntk_objectness_target = cntk_output[obj_key][0] cntk_bbox_targets = cntk_output[bbt_key][0] cntk_bbox_inside_w = cntk_output[bbw_key][0] # Create Caffe layer and call forward bottom = [np.array(rpn_cls_score_dummy),np.array(gt_boxes), np.array(im_info)] top = None # handled through return statement in caffe layer for unit testing param_str = "'feat_stride': 16" caffe_layer = CaffeAnchorTargetLayer() caffe_layer.set_param_str(param_str) caffe_layer.setup(bottom, top) caffe_layer.set_deterministic_mode() caffe_objectness_target, caffe_bbox_targets, caffe_bbox_inside_w = caffe_layer.forward(bottom, top) # assert that results are exactly the same assert cntk_objectness_target.shape == caffe_objectness_target.shape assert cntk_bbox_targets.shape == caffe_bbox_targets.shape assert cntk_bbox_inside_w.shape == caffe_bbox_inside_w.shape assert np.allclose(cntk_objectness_target, caffe_objectness_target, rtol=0.0, atol=0.0) assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0) assert np.allclose(cntk_bbox_inside_w, caffe_bbox_inside_w, rtol=0.0, atol=0.0) print("Verified AnchorTargetLayer")
def test_proposal_layer(): from utils.rpn.proposal_layer import ProposalLayer as CntkProposalLayer from utils.caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer from FasterRCNN.FasterRCNN_config import cfg cls_prob_shape_cntk = (18,61,61) cls_prob_shape_caffe = (18,61,61) rpn_bbox_shape = (36, 61, 61) dims_info_shape = (6,) im_info = [1000, 1000, 1] # Create input tensors with values cls_prob = np.random.random_sample(cls_prob_shape_cntk).astype(np.float32) rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32) dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32) # Create CNTK layer and call forward cls_prob_var = input_variable(cls_prob_shape_cntk) rpn_bbox_var = input_variable(rpn_bbox_shape) dims_info_var = input_variable(dims_info_shape) layer_config = {} layer_config["feat_stride"] = 16 layer_config["scales"] = [8, 16, 32] layer_config["train_pre_nms_topN"] = cfg["TRAIN"].RPN_PRE_NMS_TOP_N layer_config["train_post_nms_topN"] = cfg["TRAIN"].RPN_POST_NMS_TOP_N layer_config["train_nms_thresh"] = float(cfg["TRAIN"].RPN_NMS_THRESH) layer_config["train_min_size"] = float(cfg["TRAIN"].RPN_MIN_SIZE) layer_config["test_pre_nms_topN"] = cfg["TEST"].RPN_PRE_NMS_TOP_N layer_config["test_post_nms_topN"] = cfg["TEST"].RPN_POST_NMS_TOP_N layer_config["test_nms_thresh"] = float(cfg["TEST"].RPN_NMS_THRESH) layer_config["test_min_size"] = float(cfg["TEST"].RPN_MIN_SIZE) cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var, layer_config)) state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred], dims_info_var: dims_input}) cntk_proposals = cntk_output[next(iter(cntk_output))][0] # Create Caffe layer and call forward cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe) bottom = [np.array([cls_prob_caffe]),np.array([rpn_bbox_pred]),np.array([im_info])] top = None # handled through return statement in caffe layer for unit testing param_str = "'feat_stride': 16" caffe_layer = CaffeProposalLayer() caffe_layer.set_param_str(param_str) caffe_layer.setup(bottom, top) caffe_output = caffe_layer.forward(bottom, top) caffe_proposals = caffe_output[:,1:] # assert that results are exactly the same assert cntk_proposals.shape == caffe_proposals.shape assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0) print("Verified ProposalLayer")
def test_udf_clone(): dim = 4 i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') m_udf = C.user_function(MyPlus(i, C.constant(3))) p = C.parameter(shape=(dim, ), init=10, name='p') z = m_udf + p z_clone = z.clone('share') input_data = np.random.rand(dim) result = z_clone.eval([input_data]) assert np.allclose(result[0][0], input_data + 3 + 10)
def test_udf_input_values_no_sharing(): i = C.input_variable(1, needs_gradient=True, name='i_var') m = C.user_function(MyArgumentPreservingPlus(i + 1, i + 2)) w = C.parameter(shape=(1,), init=1) m = m + w m2 = C.splice(m, m, axis=0) m3 = C.splice(m2, m2, axis=0) m4 = C.splice(m3, m3, axis=0) grad_value, result = m4.grad({i : np.asarray([2], dtype=np.float32)}, outputs=[m4], wrt=[w, i]) assert np.array_equal(result, [[8, 8, 8, 8, 8, 8, 8, 8]])
def test_ext_eval_2_only_param(): dim = 4 p = C.parameter(shape=(dim,), init=10, name='p') i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') m = C.user_function(MyPlus(p, C.constant(3))) # combine does not work # z = combine([m.output]) z = m + i input_data = np.random.rand(dim) result = z.eval([input_data]) assert np.allclose(result[0][0], input_data + 3 + 10)
def test_udf_clone(): dim = 4 i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') m_udf = C.user_function(MyPlus(i, C.constant(3))) p = C.parameter(shape=(dim,), init=10, name='p') z = m_udf + p z_clone = z.clone('share') input_data = np.random.rand(dim) result = z_clone.eval([input_data]) assert np.allclose(result[0][0], input_data + 3 + 10)
def test_ext_eval_2_only_param(): dim = 4 p = C.parameter(shape=(dim, ), init=10, name='p') i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') m = C.user_function(MyPlus(p, C.constant(3))) # combine does not work # z = combine([m.output]) z = m + i input_data = np.random.rand(dim) result = z.eval([input_data]) assert np.allclose(result[0][0], input_data + 3 + 10)
def test_ext_eval_freedimension_input(): i = C.sequence.input_variable((C.FreeDimension), needs_gradient=True, name='i_var') m = C.user_function(MyPlus(i, C.constant(3))) input_data = np.random.rand(3) gradient_value, result = m.grad({i: input_data}, wrt=[i], outputs=[m.output]) assert np.allclose(result[0][0], input_data + 3) assert np.allclose(gradient_value[0][0], np.ones_like(input_data)) input_data = np.random.rand(6) gradient_value, result = m.grad({i: input_data}, wrt=[i], outputs=[m.output]) assert np.allclose(result[0][0], input_data + 3) assert np.allclose(gradient_value[0][0], np.ones_like(input_data))
def CustomMultibitKernel(input, bit_map, mean_bits=None): if (mean_bits): bit_map = np.asarray(np.maximum(np.round(np.random.normal(mean_bits, 1, input.shape)), 1), dtype=np.int32) print("Mean Bits: ",np.mean(bit_map)) else: if (type(bit_map) == int): length = C.reshape(input, (-1)) bit_map = [bit_map]*length.shape[0] bit_map = np.asarray(bit_map) bit_map = bit_map.reshape(input.shape) else: bit_map = np.asarray(bit_map) assert (bit_map.shape == input.shape) return C.user_function(MultibitKernel(input, bit_map))
def test_udf_no_gradient_for_some_inputs(): dim = 2 x = C.sequence.input_variable(dim, needs_gradient=True, name='x') y = C.sequence.input_variable(dim, needs_gradient=True, name='y') op = C.user_function(MyPlusWithNoGradientToRightOperand(x, y)) x_data = [AA([[1., 2.], [3., 4.]], dtype=np.float32)] y_data = [AA([[5., 6.], [7., 8.]], dtype=np.float32)] gradients, result = op.grad({x: x_data, y: y_data}, op.arguments, [op.output]) assert np.allclose(gradients[op.arguments[0]], [[[1., 1.], [1., 1.]]]) assert np.allclose(gradients[op.arguments[1]], [[[0., 0.], [0., 0.]]]) assert np.allclose(result, [[[6., 8.], [10., 12.]]])
def test_udf_plus_and_last(): x = C.sequence.input_variable(shape=(2, )) y = C.input_variable(shape=(2, )) func = C.user_function(PlusAndLast(x, y)) dt_precision = np.float32 operand1 = [AA([[1., 2.], [3., 4.]], dtype=dt_precision)] operand2 = [AA([2., 2.], dtype=dt_precision)] _, result = func.forward({x: operand1, y: operand2}, [func.output]) expected_forward = AA([[[5., 6.]]], dtype=dt_precision) assert np.allclose(result[func.output], expected_forward)
def test_udf_plus_and_last(): x = C.sequence.input_variable(shape=(2,)) y = C.input_variable(shape=(2,)) func = C.user_function(PlusAndLast(x, y)) dt_precision = np.float32 operand1 = [AA([[1., 2.], [3., 4.]], dtype=dt_precision)] operand2 = [AA([2., 2.], dtype=dt_precision)] _, result = func.forward({x: operand1, y: operand2}, [func.output]) expected_forward = AA([[[5., 6.]]], dtype=dt_precision) assert np.allclose(result[func.output], expected_forward)
def test_override_serialize(tmpdir): dev = C.cpu() a, b = 1.2322341, -0.29084 op = MyPlusPlus([C.constant(a), C.constant(b)], '++') op = MyPlusPlus([op, op], '+++') op = MyPlusPlus([op, op], '++++') op = C.user_function(op) result1 = op.eval({}, device=dev) filepath = str(tmpdir / 'test_udf_with_renamed_deserialize.dat') op.save(filepath) op_reloaded = Function.load(filepath, device=dev) assert result1 == op_reloaded.eval({}, device=dev)
def test_multi_freedim_output_udf(): dim = 2 x = C.sequence.input_variable(dim, needs_gradient=True, name='x') y = C.sequence.input_variable(dim, needs_gradient=True, name='y') op = C.user_function(MultiFreeDimensionOutputUserFunction(x, y)) x_data = [AA([[1., 2.], [3., 4.]], dtype=np.float32)] y_data = [AA([[5., 6.], [7., 8.]], dtype=np.float32)] result = op.eval({x: x_data, y: y_data}) assert np.allclose(result[op.outputs[0]], x_data[0] + 2 * y_data[0]) assert np.allclose(result[op.outputs[1]], 2 * x_data[0] + y_data[0]) op = op.outputs[0] + op.outputs[1] gradients = op.grad({x: x_data, y: y_data}, op.arguments) assert np.allclose(gradients[op.arguments[0]], [[[3., 3.], [3., 3.]]]) assert np.allclose(gradients[op.arguments[1]], [[[3., 3.], [3., 3.]]])
def CustomMultibitKernel(input, bit_map, mean_bits=None): if (mean_bits): bit_map = np.asarray(np.maximum( np.round(np.random.normal(mean_bits, 1, input.shape)), 1), dtype=np.int32) print("Mean Bits: ", np.mean(bit_map)) else: if (type(bit_map) == int): length = C.reshape(input, (-1)) bit_map = [bit_map] * length.shape[0] bit_map = np.asarray(bit_map) bit_map = bit_map.reshape(input.shape) else: bit_map = np.asarray(bit_map) assert (bit_map.shape == input.shape) return C.user_function(MultibitKernel(input, bit_map))
def test_multioutput_udf(): dim = 2 x = C.sequence.input_variable(dim, needs_gradient=True, name='x') y = C.sequence.input_variable(dim, needs_gradient=True, name='y') op = C.user_function(MultiOutputUserFunction(x, y)) x_data = [AA([[1., 2.], [3., 4.]], dtype=np.float32)] y_data = [AA([[5., 6.], [7., 8.]], dtype=np.float32)] result = op.eval({x: x_data, y: y_data}) assert np.allclose(result[op.outputs[0]], x_data[0] + 2 * y_data[0]) assert np.allclose(result[op.outputs[1]], 2 * x_data[0] + y_data[0]) op = op.outputs[0] + op.outputs[1] gradients = op.grad({x: x_data, y: y_data}, op.arguments) assert np.allclose(gradients[op.arguments[0]], [[[3., 3.], [3., 3.]]]) assert np.allclose(gradients[op.arguments[1]], [[[3., 3.], [3., 3.]]])
def test_proposal_layer(): cls_prob_shape_cntk = (18, 61, 61) cls_prob_shape_caffe = (18, 61, 61) rpn_bbox_shape = (36, 61, 61) dims_info_shape = (6, ) im_info = [1000, 1000, 1] # Create input tensors with values cls_prob = np.random.random_sample(cls_prob_shape_cntk).astype(np.float32) rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32) dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32) # Create CNTK layer and call forward cls_prob_var = input_variable(cls_prob_shape_cntk) rpn_bbox_var = input_variable(rpn_bbox_shape) dims_info_var = input_variable(dims_info_shape) cntk_layer = user_function( CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var)) state, cntk_output = cntk_layer.forward({ cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred], dims_info_var: dims_input }) cntk_proposals = cntk_output[next(iter(cntk_output))][0] # Create Caffe layer and call forward cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe) bottom = [ np.array([cls_prob_caffe]), np.array([rpn_bbox_pred]), np.array([im_info]) ] top = None # handled through return statement in caffe layer for unit testing param_str = "'feat_stride': 16" caffe_layer = CaffeProposalLayer() caffe_layer.set_param_str(param_str) caffe_layer.setup(bottom, top) caffe_output = caffe_layer.forward(bottom, top) caffe_proposals = caffe_output[:, 1:] # assert that results are exactly the same assert cntk_proposals.shape == caffe_proposals.shape assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0) print("Verified ProposalLayer")
def test_ext_train(tmpdir): dim = 4 p = C.parameter(shape=(dim, ), init=10) i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') m = MyPlus(i, C.constant(3), 'my_plus') # keeping m unwrapped since we need to access its member variables z = C.user_function(m) + p momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size=1) trainer = C.Trainer(z, (z + 0, z + 0), [ C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True, minibatch_size=0) ]) i = 0 while i < 100: i += 1 input_data = np.random.rand(dim) trainer.train_minibatch([input_data]) assert m.forward_calls == m.backward_calls == 100 filepath = str(tmpdir / 'test_ext_train.dat') z.save(filepath) buf = open(filepath, 'rb').read() # this is only need for Python 2.7 # (which does not distinguish between bytes and strings) if isinstance(buf, str): buf = bytearray(buf) z1 = Function.load(buf) m1 = z1.find_by_name('my_plus') # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus, # using serialize as workaround: state = m1.serialize()['state'] assert state['forward_calls'] == state['backward_calls'] == 100
def test_lstm_over_lstm_thought_vectors_2(device_id): dev = cntk_device(device_id) input_vocab_size=3 emb_dim = 2 hidden_dim = 2 num_labels = 2 utterances_input = C.sequence.input_variable((input_vocab_size), is_sparse=True, name='utterances') conversation_lengths_input = C.input_variable((), name='conversation_sequence_lengths') label_input = C.sequence.input_variable(num_labels, is_sparse=True, sequence_axis=C.Axis('label_sequence'), name='labels') with C.default_options(initial_state=0.1): model = C.layers.Embedding(emb_dim, name='embed')(utterances_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.sequence.last(model) model = C.user_function(UtteranceBatchReshape(model, conversation_lengths_input)) model = C.to_sequence_like(model, label_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.layers.Dense(num_labels, name='classify')(model) z = model ce = C.cross_entropy_with_softmax(z, label_input) sentinel_utt_data = C.NDArrayView.from_csr(_to_csr([[0, 0, 1]]), device=C.cpu()) c1_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0], [1, 0, 0]]), device=C.cpu()) c1_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1]]), device=C.cpu()) c1_utt3_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1], [0, 1, 0]]), device=C.cpu()) c2_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 1]]), device=C.cpu()) c3_utt1_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0], [0, 1, 1], [1, 0, 0]]), device=C.cpu()) c3_utt2_data = C.NDArrayView.from_csr(_to_csr([[0, 1, 0]]), device=C.cpu()) all_utt_data = C.Value.create(C.sequence.input_variable((input_vocab_size), is_sparse=True), [c1_utt1_data, c1_utt2_data, c1_utt3_data, c2_utt1_data, sentinel_utt_data, sentinel_utt_data, c3_utt1_data, c3_utt2_data, sentinel_utt_data], device=C.cpu()).data conversation_lengths_data = np.asarray([3, 1, 2], dtype=np.float32) seq1_label_data = [[0, 1], [0, 1], [1, 0]] seq2_label_data = [[1, 0]] seq3_label_data = [[1, 0], [0, 1]] label_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data), _to_csr(seq3_label_data)] param_grads, loss_result = ce.grad({utterances_input : all_utt_data, label_input : label_data, conversation_lengths_input : conversation_lengths_data}, wrt=ce.parameters, outputs=[ce], as_numpy=False) loss_result = loss_result.as_sequences() absolute_tolerance = 0.01 assert np.allclose(loss_result[0], [[0.678914], [0.668076], [0.728129]], atol=absolute_tolerance) assert np.allclose(loss_result[1], [[0.679029]], atol=absolute_tolerance) assert np.allclose(loss_result[2], [[0.705393], [0.674243]], atol=absolute_tolerance)
def test_ext_lambdafunc(tmpdir): dim = 4 class CallbackCounter(object): def __init__(self): self.count = 0 def inc(self, arg): self.count += 1 cb = CallbackCounter() p = C.parameter(shape=(dim,), init=1) i = C.input_variable(dim, needs_gradient=True, name='i_var') k = i * p m = LambdaFunc(k, when=lambda arg: np.sum(arg) > 1, execute=cb.inc) m = C.user_function(m) z0 = m + 0 filepath = str(tmpdir / 'test_ext_lambdafunc.dat') z0.save(filepath) Function.register_udf_deserialize_callback('conditional_exec_lambda', lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc)) z = Function.load(filepath) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size = 1) trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) i = 0 input_data = 0.1 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 0 input_data = 0.3 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 1
def test_ext_lambdafunc(tmpdir): dim = 4 class CallbackCounter(object): def __init__(self): self.count = 0 def inc(self, arg): self.count += 1 cb = CallbackCounter() p = C.parameter(shape=(dim,), init=1) i = C.input_variable(dim, needs_gradient=True, name='i_var') k = i * p m = LambdaFunc(k, when=lambda arg: np.sum(arg) > 1, execute=cb.inc) m = C.user_function(m) z0 = m + 0 filepath = str(tmpdir / 'test_ext_lambdafunc.dat') z0.save(filepath) Function.register_udf_deserialize_callback('conditional_exec_lambda', lambda x, *unused: LambdaFunc(x, when=lambda arg: np.sum(arg) > 1, execute=cb.inc)) z = Function.load(filepath) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) i = 0 input_data = 0.1 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 0 input_data = 0.3 * np.ones(dim) trainer.train_minibatch([input_data]) assert cb.count == 1
def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, cfg): ''' Creates a proposal target layer that is used for training an object detection network as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Assigns object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. It also adds gt_boxes to candidates and samples fg and bg rois for training. Args: rpn_rois: The proposed ROIs, e.g. from a region proposal network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. num_classes: The number of classes in the data set Returns: rpn_target_rois - a set of rois containing the ground truth and a number of sampled fg and bg ROIs label_targets - the target labels for the rois bbox_targets - the regression coefficient targets for the rois bbox_inside_weights - the weights for the regression loss ''' ptl_param_string = "'num_classes': {}".format(cfg["DATA"].NUM_CLASSES) ptl = user_function( ProposalTargetLayer(rpn_rois, scaled_gt_boxes, batch_size=cfg.NUM_ROI_PROPOSALS, fg_fraction=cfg["TRAIN"].FG_FRACTION, normalize_targets=cfg.BBOX_NORMALIZE_TARGETS, normalize_means=cfg.BBOX_NORMALIZE_MEANS, normalize_stds=cfg.BBOX_NORMALIZE_STDS, fg_thresh=cfg["TRAIN"].FG_THRESH, bg_thresh_hi=cfg["TRAIN"].BG_THRESH_HI, bg_thresh_lo=cfg["TRAIN"].BG_THRESH_LO, param_str=ptl_param_string)) # use an alias if you need to access the outputs, e.g., when cloning a trained network rois = alias(ptl.outputs[0], name='rpn_target_rois') label_targets = ptl.outputs[1] bbox_targets = ptl.outputs[2] bbox_inside_weights = ptl.outputs[3] return rois, label_targets, bbox_targets, bbox_inside_weights
def test_ext_train(tmpdir): dim = 4 p = C.parameter(shape=(dim,), init=10) i = C.sequence.input_variable(dim, needs_gradient=True, name='i_var') m = MyPlus(i, C.constant(3), 'my_plus') # keeping m unwrapped since we need to access its member variables z = C.user_function(m) + p momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) trainer = C.Trainer(z, (z + 0, z + 0), [C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) i = 0 while i < 100: i += 1 input_data = np.random.rand(dim) trainer.train_minibatch([input_data]) assert m.forward_calls == m.backward_calls == 100 filepath = str(tmpdir / 'test_ext_train.dat') z.save(filepath) buf = open(filepath, 'rb').read() # this is only need for Python 2.7 # (which does not distinguish between bytes and strings) if isinstance(buf, str): buf = bytearray(buf) z1 = Function.load(buf) m1 = z1.find_by_name('my_plus') # m1 is an instance of UserFunction, cannot directly downcast it to MyPlus, # using serialize as workaround: state = m1.serialize()['state'] assert state['forward_calls'] == state['backward_calls'] == 100
def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, cfg): ''' Creates a proposal target layer that is used for training an object detection network as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Assigns object detection proposals to ground-truth targets. Produces proposal classification labels and bounding-box regression targets. It also adds gt_boxes to candidates and samples fg and bg rois for training. Args: rpn_rois: The proposed ROIs, e.g. from a region proposal network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. num_classes: The number of classes in the data set Returns: rpn_target_rois - a set of rois containing the ground truth and a number of sampled fg and bg ROIs label_targets - the target labels for the rois bbox_targets - the regression coefficient targets for the rois bbox_inside_weights - the weights for the regression loss ''' ptl_param_string = "'num_classes': {}".format(cfg["DATA"].NUM_CLASSES) ptl = user_function(ProposalTargetLayer(rpn_rois, scaled_gt_boxes, batch_size=cfg.NUM_ROI_PROPOSALS, fg_fraction=cfg["TRAIN"].FG_FRACTION, normalize_targets=cfg.BBOX_NORMALIZE_TARGETS, normalize_means=cfg.BBOX_NORMALIZE_MEANS, normalize_stds=cfg.BBOX_NORMALIZE_STDS, fg_thresh=cfg["TRAIN"].FG_THRESH, bg_thresh_hi=cfg["TRAIN"].BG_THRESH_HI, bg_thresh_lo=cfg["TRAIN"].BG_THRESH_LO, param_str=ptl_param_string)) # use an alias if you need to access the outputs, e.g., when cloning a trained network rois = alias(ptl.outputs[0], name='rpn_target_rois') label_targets = ptl.outputs[1] bbox_targets = ptl.outputs[2] bbox_inside_weights = ptl.outputs[3] return rois, label_targets, bbox_targets, bbox_inside_weights
def test_proposal_target_layer(): num_rois = 400 all_rois_shape_cntk = (num_rois,4) num_gt_boxes = 50 gt_boxes_shape_cntk = (num_gt_boxes,5) im_info = [1000, 1000, 1] # Create input tensors with values x1y1 = np.random.random_sample((num_rois, 2)) * 500 wh = np.random.random_sample((num_rois, 2)) * 400 x2y2 = x1y1 + wh + 50 all_rois = np.hstack((x1y1, x2y2)).astype(np.float32) #all_rois = np.random.random_sample(all_rois_shape_cntk).astype(np.float32) x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500 wh = np.random.random_sample((num_gt_boxes, 2)) * 400 x2y2 = x1y1 + wh + 50 label = np.random.random_sample((num_gt_boxes, 1)) label = (label * 17.0) gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32) # Create CNTK layer and call forward all_rois_var = input_variable(all_rois_shape_cntk) gt_boxes_var = input_variable(gt_boxes_shape_cntk) cntk_layer = user_function(CntkProposalTargetLayer(all_rois_var, gt_boxes_var, param_str="'num_classes': 17", deterministic=True)) state, cntk_output = cntk_layer.forward({all_rois_var: [all_rois], gt_boxes_var: [gt_boxes]}) roi_key = [k for k in cntk_output if 'rpn_target_rois_raw' in str(k)][0] labels_key = [k for k in cntk_output if 'label_targets_raw' in str(k)][0] bbox_key = [k for k in cntk_output if 'bbox_targets_raw' in str(k)][0] bbox_w_key = [k for k in cntk_output if 'bbox_inside_w_raw' in str(k)][0] cntk_rois = cntk_output[roi_key][0] cntk_labels_one_hot = cntk_output[labels_key][0] cntk_bbox_targets = cntk_output[bbox_key][0] cntk_bbox_inside_weights = cntk_output[bbox_w_key][0] cntk_labels = np.argmax(cntk_labels_one_hot, axis=1) # Create Caffe layer and call forward zeros = np.zeros((all_rois.shape[0], 1), dtype=gt_boxes.dtype) all_rois_caffe = np.hstack((zeros, all_rois)) bottom = [np.array(all_rois_caffe),np.array(gt_boxes)] top = None # handled through return statement in caffe layer for unit testing param_str = "'num_classes': 17" caffe_layer = CaffeProposalTargetLayer() caffe_layer.set_param_str(param_str) caffe_layer.setup(bottom, top) caffe_layer.set_deterministic_mode() caffe_rois, caffe_labels, caffe_bbox_targets, caffe_bbox_inside_weights = caffe_layer.forward(bottom, top) caffe_rois = caffe_rois[:,1:] num_caffe_rois = caffe_rois.shape[0] cntk_rois = cntk_rois[:num_caffe_rois,:] cntk_labels = cntk_labels[:num_caffe_rois] cntk_bbox_targets = cntk_bbox_targets[:num_caffe_rois,:] cntk_bbox_inside_weights = cntk_bbox_inside_weights[:num_caffe_rois,:] # assert that results are exactly the same assert cntk_rois.shape == caffe_rois.shape assert cntk_labels.shape == caffe_labels.shape assert cntk_bbox_targets.shape == caffe_bbox_targets.shape assert cntk_bbox_inside_weights.shape == caffe_bbox_inside_weights.shape caffe_labels = [int(x) for x in caffe_labels] assert np.allclose(cntk_rois, caffe_rois, rtol=0.0, atol=0.0) assert np.allclose(cntk_labels, caffe_labels, rtol=0.0, atol=0.0) assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0) assert np.allclose(cntk_bbox_inside_weights, caffe_bbox_inside_weights, rtol=0.0, atol=0.0) print("Verified ProposalTargetLayer")
def print_node(v): return C.user_function(LambdaFunc(v))
def __cntk_det__(m): return C.user_function(__cntk_class_det__(m))
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True, proposal_layer_param_string=None): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). Args: conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. im_info: (image_widht, image_height, image_scale) as CNTK variable or constant add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer. Returns: rpn_rois - the proposed ROIs rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) ''' # RPN network # init = 'normal', initValueScale = 0.01, initBias = 0.1 rpn_conv_3x3 = Convolution((3, 3), 256, activation=relu, pad=True, strides=1, init = normal(scale=0.01), init_bias=0.1)(conv_out) rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score", init = normal(scale=0.01), init_bias=0.1)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred", init = normal(scale=0.01), init_bias=0.1)(rpn_conv_3x3) # 4(coords) * 9(anchors) # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) num_predictions = int(np.prod(rpn_cls_score.shape) / 2) rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions)) rpn_cls_prob = softmax(rpn_cls_score_rshp, axis=0, name="objness_softmax") rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape) # proposal layer rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string)) rpn_rois = alias(rpn_rois_raw, name='rpn_rois') rpn_losses = None if(add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] # For loss functions: ignore label predictions for the 'ignore label', # i.e. set target and prediction to 0 --> needs to be softmaxed before rpn_labels_rshp = reshape(rpn_labels, (1, num_predictions)) ignore = user_function(IgnoreLabel(rpn_cls_prob, rpn_labels_rshp, ignore_label=-1)) rpn_cls_prob_ignore = ignore.outputs[0] fg_targets = ignore.outputs[1] bg_targets = 1 - fg_targets rpn_labels_ignore = splice(bg_targets, fg_targets, axis=0) # RPN losses rpn_loss_cls = cross_entropy_with_softmax(rpn_cls_prob_ignore, rpn_labels_ignore, axis=0) rpn_loss_bbox = user_function(SmoothL1Loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights)) rpn_losses = plus(reduce_sum(rpn_loss_cls), reduce_sum(rpn_loss_bbox), name="rpn_losses") return rpn_rois, rpn_losses
def test_udf_op_name(): dim = 4 i = C.input_variable(dim, needs_gradient=True, name='i_var') m = C.user_function(MyPlus(i, C.constant(3))) assert str(m.root_function) != ''
def dense_layer(inp, output_dim, nonlinearity): r = linear_layer(inp, output_dim) r = nonlinearity(r) if isinstance(r, UserFunction): r = C.user_function(r) return r
def _(x): return C.user_function(__cntk_class_mvn_log_prob__(x, mu, sig))
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True, proposal_layer_param_string=None, conv_bias_init=0.0): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). Args: conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. im_info: A CNTK variable or constant containing (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer. Returns: rpn_rois - the proposed ROIs rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) ''' # RPN network # init = 'normal', initValueScale = 0.01, initBias = 0.1 num_channels = cfg["CNTK"].RPN_NUM_CHANNELS rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1, init = normal(scale=0.01), init_bias=conv_bias_init)(conv_out) rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score", init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred", init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3) # 4(coords) * 9(anchors) # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) num_predictions = int(rpn_cls_score.shape[0] / 2) rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp") p_rpn_cls_score_rshp = cntk.placeholder() rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0) rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob') rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape") # proposal layer rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string)) rpn_rois = alias(rpn_rois_raw, name='rpn_rois') rpn_losses = None if(add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] # classification loss p_rpn_labels = cntk.placeholder() p_rpn_cls_score_rshp = cntk.placeholder() keeps = cntk.greater_equal(p_rpn_labels, 0.0) fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets") bg_labels = minus(1, fg_labels, name="bg_targets") rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0) rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0) rpn_loss_cls = element_times(rpn_ce, keeps) # The terms that are accounted for in the cls loss are those that have a label >= 0 cls_num_terms = reduce_sum(keeps) cls_normalization_factor = 1.0 / cls_num_terms normalized_rpn_cls_loss = reduce_sum(rpn_loss_cls) * cls_normalization_factor reduced_rpn_loss_cls = cntk.as_block(normalized_rpn_cls_loss, [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'CE_with_ignore', 'norm_rpn_cls_loss') # regression loss p_rpn_bbox_pred = cntk.placeholder() p_rpn_bbox_targets = cntk.placeholder() p_rpn_bbox_inside_weights = cntk.placeholder() rpn_loss_bbox = SmoothL1Loss(cfg["CNTK"].SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0) # The bbox loss is normalized by the rpn batch size bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE normalized_rpn_bbox_loss = reduce_sum(rpn_loss_bbox) * bbox_normalization_factor reduced_rpn_loss_bbox = cntk.as_block(normalized_rpn_bbox_loss, [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets), (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)], 'SmoothL1Loss', 'norm_rpn_bbox_loss') rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses") return rpn_rois, rpn_losses
def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). Args: conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. im_info: A CNTK variable or constant containing (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 cfg: The configuration dictionary add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses Returns: rpn_rois - the proposed ROIs rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) ''' # RPN network # init = 'normal', initValueScale = 0.01, initBias = 0.1 num_channels = cfg["MODEL"].RPN_NUM_CHANNELS rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1, init=normal(scale=0.01), init_bias=0.0)(conv_out) rpn_cls_score = Convolution( (1, 1), 18, activation=None, name="rpn_cls_score", init=normal(scale=0.01), init_bias=0.0)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) rpn_bbox_pred = Convolution( (1, 1), 36, activation=None, name="rpn_bbox_pred", init=normal(scale=0.01), init_bias=0.0)(rpn_conv_3x3) # 4(coords) * 9(anchors) # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) num_predictions = int(rpn_cls_score.shape[0] / 2) rpn_cls_score_rshp = reshape( rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp") p_rpn_cls_score_rshp = cntk.placeholder() rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0) rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob') rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape") # proposal layer rpn_rois = create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg) rpn_losses = None if (add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... proposal_layer_params = "'feat_stride': {}\n'scales':\n - {}". \ format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES])) atl = user_function( AnchorTargetLayer( rpn_cls_score, scaled_gt_boxes, im_info, rpn_batch_size=cfg["TRAIN"].RPN_BATCHSIZE, rpn_fg_fraction=cfg["TRAIN"].RPN_FG_FRACTION, clobber_positives=cfg["TRAIN"].RPN_CLOBBER_POSITIVES, positive_overlap=cfg["TRAIN"].RPN_POSITIVE_OVERLAP, negative_overlap=cfg["TRAIN"].RPN_NEGATIVE_OVERLAP, param_str=proposal_layer_params)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] # classification loss p_rpn_labels = cntk.placeholder() p_rpn_cls_score_rshp = cntk.placeholder() keeps = cntk.greater_equal(p_rpn_labels, 0.0) fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets") bg_labels = minus(1, fg_labels, name="bg_targets") rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0) rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0) rpn_loss_cls = element_times(rpn_ce, keeps) # The terms that are accounted for in the cls loss are those that have a label >= 0 cls_num_terms = reduce_sum(keeps) cls_normalization_factor = 1.0 / cls_num_terms normalized_rpn_cls_loss = reduce_sum( rpn_loss_cls) * cls_normalization_factor reduced_rpn_loss_cls = cntk.as_block( normalized_rpn_cls_loss, [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'CE_with_ignore', 'norm_rpn_cls_loss') # regression loss p_rpn_bbox_pred = cntk.placeholder() p_rpn_bbox_targets = cntk.placeholder() p_rpn_bbox_inside_weights = cntk.placeholder() rpn_loss_bbox = SmoothL1Loss(cfg.SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0) # The bbox loss is normalized by the rpn batch size bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE normalized_rpn_bbox_loss = reduce_sum( rpn_loss_bbox) * bbox_normalization_factor reduced_rpn_loss_bbox = cntk.as_block( normalized_rpn_bbox_loss, [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets), (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)], 'SmoothL1Loss', 'norm_rpn_bbox_loss') rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses") return rpn_rois, rpn_losses
def test_no_deadlock_init_outputs(): x = C.input_variable((3, C.FreeDimension, 2), name='x') from cntk import user_function with pytest.raises(RuntimeError): s = user_function(FaultyUserFunc(x))