def minus(left, right, name=''): ''' The output of this operation is left minus right tensor. It supports broadcasting. In case of scalars its backward pass propagates the received gradient. The operator (-) has been overloaded and can equally be used instead of minus() Example: >>> C.eval(C.minus([1, 2, 3], [4, 5, 6])) [array([[-3., -3., -3.]])] >>> C.eval(C.minus([[1,2],[3,4]], 1)) [array([[[ 0., 1.], [ 2., 3.]]])] Args: left: left side tensor right: right side tensor name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import minus left = sanitize_input(left, get_data_type(right)) right = sanitize_input(right, get_data_type(left)) return minus(left, right, name).output()
def test_minus_3(): cntk_op = C.minus([1, 2, 3], [[4, 5, 6], [7, 8, 9]]) cntk_ret = cntk_op.eval() ng_op, _ = CNTKImporter().import_model(cntk_op) ng_ret = ng.transformers.make_transformer().computation(ng_op)() assert np.array_equal(cntk_ret, ng_ret)
def create_network(feature_dim = 40, num_classes=256, feature_mean_file=None, feature_inv_stddev_file=None, feature_norm_files = None, label_prior_file = None, context=(0,0), model_type=None): def MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file): m = C.reshape(load_ascii_vector(feature_mean_file,'feature_mean'), shape=(1, feature_dim)) s = C.reshape(load_ascii_vector(feature_inv_stddev_file,'feature_invstddev'), shape=(1,feature_dim)) def _func(operand): return C.reshape(C.element_times(C.reshape(operand,shape=(1+context[0]+context[1], feature_dim)) - m, s), shape=operand.shape) return _func def MyDNNLayer(hidden_size=128, num_layers=2): return C.layers.Sequential([ C.layers.For(range(num_layers), lambda: C.layers.Dense(hidden_size, activation=C.sigmoid)) ]) def MyBLSTMLayer(hidden_size=128, num_layers=2): W = C.Parameter((C.InferredDimension, hidden_size), init=C.he_normal(1.0), name='rnn_parameters') def _func(operand): return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm' ) return _func # Input variables denoting the features and label data feature_var = C.sequence.input_variable(feature_dim * (1+context[0]+context[1])) label_var = C.sequence.input_variable(num_classes) feature_norm = MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file)(feature_var) label_prior = load_ascii_vector(label_prior_file, 'label_prior') log_prior = C.log(label_prior) if (model_type=="DNN"): net = MyDNNLayer(512,4)(feature_norm) elif (model_type=="BLSTM"): net = MyBLSTMLayer(512,2)(feature_norm) else: raise RuntimeError("model_type must be DNN or BLSTM") out = C.layers.Dense(num_classes, init=C.he_normal(scale=1/3))(net) # loss and metric ce = C.cross_entropy_with_softmax(out, label_var) pe = C.classification_error(out, label_var) ScaledLogLikelihood = C.minus(out, log_prior, name='ScaledLogLikelihood') # talk to the user C.logging.log_number_of_parameters(out) print() return { 'feature': feature_var, 'label': label_var, 'output': out, 'ScaledLogLikelihood': ScaledLogLikelihood, 'ce': ce, 'pe': pe, 'final_hidden': net # adding last hidden layer output for future use in CTC tutorial }
def SmoothL1Loss(sigma, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights): """ From https://github.com/smallcorgi/Faster-RCNN_TF/blob/master/lib/fast_rcnn/train.py ResultLoss = outside_weights * SmoothL1(inside_weights * (bbox_pred - bbox_targets)) SmoothL1(x) = 0.5 * (sigma * x)^2, if |x| < 1 / sigma^2 |x| - 0.5 / sigma^2, otherwise """ sigma2 = sigma * sigma inside_mul_abs = C.abs(C.element_times(bbox_inside_weights, C.minus(bbox_pred, bbox_targets))) smooth_l1_sign = C.less(inside_mul_abs, 1.0 / sigma2) smooth_l1_option1 = C.element_times(C.element_times(inside_mul_abs, inside_mul_abs), 0.5 * sigma2) smooth_l1_option2 = C.minus(inside_mul_abs, 0.5 / sigma2) smooth_l1_result = C.plus(C.element_times(smooth_l1_option1, smooth_l1_sign), C.element_times(smooth_l1_option2, C.minus(1.0, smooth_l1_sign))) return C.element_times(bbox_outside_weights, smooth_l1_result)
def _simple_dict(): d = {} d['i1'] = C.input_variable(shape=(2, 3), name='i1') d['c1'] = C.constant(shape=(2, 3), value=6, name='c1') d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1') d['op1'] = C.plus(d['i1'], d['c1'], name='op1') d['op2'] = C.times(d['op1'], d['p1'], name='op2') d['root'] = d['op2'] d['target'] = C.input_variable((), name='label') d['all'] = C.combine([d['root'], C.minus( d['target'], C.constant(1, name='c2'), name='minus')], name='all') return d
def MyDNNLayer(hidden_size=128, num_layers=2): return C.layers.Sequential([ C.layers.For(range(num_layers), lambda: C.layers.Dense(hidden_size)>> C.layers.BatchNormalization()>>C.sigmoid>>C.layers.Dropout(.3)) ]) def MyBLSTMLayer(hidden_size=128, num_layers=2): W = C.Parameter((C.InferredDimension, hidden_size), init=C.he_normal(1.0), name='rnn_parameters') #initialize weights of RNN #'C.Parameter'--> it creates a parameter tensor def _func(operand): #operand represents input data return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm' ) return _func # Input variables denoting the features and label data #shape of input data feature_var = C.sequence.input_variable(feature_dim * (1+context[0]+context[1])) #It creates an input in the network: a place where data, such as features and labels, should be provided. label_var = C.sequence.input_variable(num_classes) ###1st layer feature_norm = MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file)(feature_var) #feature_var is operand in _fun in MyMeanVarNorm function label_prior = load_ascii_vector(label_prior_file, 'label_prior') log_prior = C.log(label_prior) #Computes the element-wise the natural logarithm of label_prior if (model_type=="DNN"): net = MyDNNLayer(512,4)(feature_norm) ########### elif (model_type=="BLSTM"): net = MyBLSTMLayer(512,3)(feature_norm) else: raise RuntimeError("model_type must be DNN or BLSTM") #initial value of weights W #'C.he_normal'-->initializer for Parameter initialized to Gaussian distribution with mean 0 and standard deviation scale *.... out = C.layers.Dense(num_classes, init=C.he_normal(scale=1/3))(net) #####last layer in any network of both NN ##### loss and metric## ce = C.cross_entropy_with_softmax(out, label_var) #loss function ((objective function)) pe = C.classification_error(out, label_var) ###for evaluation ScaledLogLikelihood = C.minus(out, log_prior, name='ScaledLogLikelihood') # talk to the user C.logging.log_number_of_parameters(out) #print number of parameters in the whole model print() return { 'feature': feature_var, 'label': label_var, 'output': out, 'ScaledLogLikelihood': ScaledLogLikelihood, 'ce': ce, 'pe': pe, 'final_hidden': net # adding last hidden layer output for future use in CTC tutorial }
def create_binary_convolution_model(): # Input variables denoting the features and label data feature_var = C.input((num_channels, image_height, image_width)) label_var = C.input((num_classes)) # apply model to input scaled_input = C.element_times(C.constant(0.00390625), feature_var) # first layer is ok to be full precision z = C.layers.Convolution((3, 3), 64, pad=True, activation=C.relu)(scaled_input) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3, 3), 128, channels=64, pad=True) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3, 3), 128, channels=128, pad=True) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (1, 1), num_classes, channels=128, pad=True) z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z) z = C.reshape(z, (num_classes, )) # Add binary regularization (ala Gang Hua) weight_sum = C.constant(0) for p in z.parameters: if (p.name == "filter"): weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p)))) bin_reg = C.element_times(.000005, weight_sum) # After the last layer, we need to apply a learnable scale SP = C.parameter(shape=z.shape, init=0.001) z = C.element_times(z, SP) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) ce = C.plus(ce, bin_reg) pe = C.classification_error(z, label_var) return C.combine([z, ce, pe])
def create_binary_convolution_model(): # Input variables denoting the features and label data feature_var = C.input((num_channels, image_height, image_width)) label_var = C.input((num_classes)) # apply model to input scaled_input = C.element_times(C.constant(0.00390625), feature_var) # first layer is ok to be full precision z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True) z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z) z = C.reshape(z, (num_classes,)) # Add binary regularization (ala Gang Hua) weight_sum = C.constant(0) for p in z.parameters: if (p.name == "filter"): weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p)))) bin_reg = C.element_times(.000005, weight_sum) # After the last layer, we need to apply a learnable scale SP = C.parameter(shape=z.shape, init=0.001) z = C.element_times(z, SP) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) ce = C.plus(ce, bin_reg) pe = C.classification_error(z, label_var) return C.combine([z, ce, pe])
def build_trainer(self): # Set the learning rate, and the momentum parameters for the Adam optimizer. lr = learning_rate_schedule(self.lr, UnitType.minibatch) beta1 = momentum_schedule(0.9) beta2 = momentum_schedule(0.99) # Calculate the losses. loss_on_v = cntk.squared_error(self.R, self.v) pi_a_s = cntk.log(cntk.times_transpose(self.pi, self.action)) loss_on_pi = cntk.variables.Constant(-1) * (cntk.plus( cntk.times(pi_a_s, cntk.minus(self.R, self.v_calc)), 0.01 * cntk.times_transpose(self.pi, cntk.log(self.pi)))) #loss_on_pi = cntk.times(pi_a_s, cntk.minus(self.R, self.v_calc)) self.tensorboard_v_writer = TensorBoardProgressWriter( freq=10, log_dir="tensorboard_v_logs", model=self.v) self.tensorboard_pi_writer = TensorBoardProgressWriter( freq=10, log_dir="tensorboard_pi_logs", model=self.pi) # tensorboard --logdir=tensorboard_pi_logs http://localhost:6006/ # tensorboard --logdir=tensorboard_v_logs http://localhost:6006/ # Create the trainiers. self.trainer_v = cntk.Trainer(self.v, (loss_on_v), [ adam(self.pms_v, lr, beta1, variance_momentum=beta2, gradient_clipping_threshold_per_sample=2, l2_regularization_weight=0.01) ], self.tensorboard_v_writer) self.trainer_pi = cntk.Trainer(self.pi, (loss_on_pi), [ adam(self.pms_pi, lr, beta1, variance_momentum=beta2, gradient_clipping_threshold_per_sample=2, l2_regularization_weight=0.01) ], self.tensorboard_pi_writer)
def create_binary_convolution_model(): feature_var = C.input((num_channels, image_height, image_width)) label_var = C.input((num_classes)) scaled_input = C.element_times(C.constant(0.00390625), feature_var) z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution((3, 3), 128, channels=32, pad=True)(z) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution((3, 3), 128, channels=128, pad=True)(z) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution((1, 1), num_classes, channels=128, pad=True)(z) z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z) z = C.reshape(z, (num_classes, )) weight_sum = C.constant(0) for p in z.parameters: if (p.name == "filter"): weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p)))) bin_reg = C.element_times(.000005, weight_sum) SP = C.parameter(shape=z.shape, init=0.001) z = C.element_times(z, SP) ce = C.cross_entropy_with_softmax(z, label_var) ce = C.plus(ce, bin_reg) pe = C.classification_error(z, label_var) return C.combine([z, ce, pe])
def test_Sub(tmpdir, dtype): with C.default_options(dtype=dtype): model = C.minus( np.array([1, 2, 3]).astype(dtype), np.array([4, 5, 6]).astype(dtype)) verify_no_input(model, tmpdir, 'Sub_0')
def test_Sub(tmpdir): pytest.skip('Need to support new ONNX spec.') model = C.minus([1, 2, 3], [4, 5, 6]) verify_no_input(model, tmpdir, 'Sub_0')
def gram(x): features = C.minus(flatten(x), C.reduce_mean(x)) return C.times_transpose(features, features)
def test_Sub(tmpdir, dtype): with C.default_options(dtype = dtype): model = C.minus(np.array([1, 2, 3]).astype(dtype), np.array([4, 5, 6]).astype(dtype)) verify_no_input(model, tmpdir, 'Sub_0')
def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). Args: conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. im_info: A CNTK variable or constant containing (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 cfg: The configuration dictionary add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses Returns: rpn_rois - the proposed ROIs rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) ''' # RPN network # init = 'normal', initValueScale = 0.01, initBias = 0.1 num_channels = cfg["MODEL"].RPN_NUM_CHANNELS rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1, init=normal(scale=0.01), init_bias=0.0)(conv_out) rpn_cls_score = Convolution( (1, 1), 18, activation=None, name="rpn_cls_score", init=normal(scale=0.01), init_bias=0.0)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) rpn_bbox_pred = Convolution( (1, 1), 36, activation=None, name="rpn_bbox_pred", init=normal(scale=0.01), init_bias=0.0)(rpn_conv_3x3) # 4(coords) * 9(anchors) # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) num_predictions = int(rpn_cls_score.shape[0] / 2) rpn_cls_score_rshp = reshape( rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp") p_rpn_cls_score_rshp = cntk.placeholder() rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0) rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob') rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape") # proposal layer rpn_rois = create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg) rpn_losses = None if (add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... proposal_layer_params = "'feat_stride': {}\n'scales':\n - {}". \ format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES])) atl = user_function( AnchorTargetLayer( rpn_cls_score, scaled_gt_boxes, im_info, rpn_batch_size=cfg["TRAIN"].RPN_BATCHSIZE, rpn_fg_fraction=cfg["TRAIN"].RPN_FG_FRACTION, clobber_positives=cfg["TRAIN"].RPN_CLOBBER_POSITIVES, positive_overlap=cfg["TRAIN"].RPN_POSITIVE_OVERLAP, negative_overlap=cfg["TRAIN"].RPN_NEGATIVE_OVERLAP, param_str=proposal_layer_params)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] # classification loss p_rpn_labels = cntk.placeholder() p_rpn_cls_score_rshp = cntk.placeholder() keeps = cntk.greater_equal(p_rpn_labels, 0.0) fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets") bg_labels = minus(1, fg_labels, name="bg_targets") rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0) rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0) rpn_loss_cls = element_times(rpn_ce, keeps) # The terms that are accounted for in the cls loss are those that have a label >= 0 cls_num_terms = reduce_sum(keeps) cls_normalization_factor = 1.0 / cls_num_terms normalized_rpn_cls_loss = reduce_sum( rpn_loss_cls) * cls_normalization_factor reduced_rpn_loss_cls = cntk.as_block( normalized_rpn_cls_loss, [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'CE_with_ignore', 'norm_rpn_cls_loss') # regression loss p_rpn_bbox_pred = cntk.placeholder() p_rpn_bbox_targets = cntk.placeholder() p_rpn_bbox_inside_weights = cntk.placeholder() rpn_loss_bbox = SmoothL1Loss(cfg.SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0) # The bbox loss is normalized by the rpn batch size bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE normalized_rpn_bbox_loss = reduce_sum( rpn_loss_bbox) * bbox_normalization_factor reduced_rpn_loss_bbox = cntk.as_block( normalized_rpn_bbox_loss, [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets), (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)], 'SmoothL1Loss', 'norm_rpn_bbox_loss') rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses") return rpn_rois, rpn_losses
def get_model(f_dim, c_dim, l_dim, m_dim, num_stack_layers, super_res_class_weight, super_res_loss_weight, high_res_loss_weight): # Define the variables into which the minibatch data will be loaded. num_nlcd_classes, num_landcover_classes = c_dim _, block_size, _ = f_dim input_im = cntk.input_variable(f_dim, np.float32) lc = cntk.input_variable(l_dim, np.float32) lc_weight_map = cntk.input_variable((1, l_dim[1], l_dim[2]), np.float32) interval_center = cntk.input_variable(c_dim, np.float32) interval_radius = cntk.input_variable(c_dim, np.float32) mask = cntk.input_variable(m_dim, np.float32) # Create the model definition. c_map defines the number of filters trained # at layers of different depths in the model. num_stack_layers defines the # number of (modified) residual units per layer. # model = dense_fc_model( # input_tensor=input_im, # num_stack_layers=num_stack_layers, # c_map=[32, 32, 16, 16, 16], # num_classes=num_landcover_classes, # bs=block_size # ) model = cnn_model(input_tensor=input_im, num_stack_layers=num_stack_layers, c_map=[64, 32, 32, 32, 32], num_classes=num_landcover_classes, bs=block_size) # At this stage the model produces output for the whole region in the input # image, but we will focus only on the center of that region during # training. Here we drop the predictions at the edges. output = cntk.reshape(model, (num_landcover_classes, block_size, block_size)) probs = cntk.reshape(cntk.softmax(output, axis=0), (num_landcover_classes, block_size, block_size)) # Now we calculate the supre-res loss. Note that this loss function has the # potential to become negative since the variance is fractional. # Additionally, we need to make sure that when the nlcd mask[0, ...] # is always 1, which means that there's no nlcd label everywhere, # the supre_res_loss comes out as a constant. super_res_crit = 0 mask_size = cntk.reshape( cntk.reduce_sum(cntk.slice(mask, 0, 1, num_nlcd_classes)), (1, )) + 10.0 # Not considering nlcd class 0 for nlcd_id in range(1, num_nlcd_classes): c_mask = cntk.reshape(cntk.slice(mask, 0, nlcd_id, nlcd_id + 1), (1, block_size, block_size)) c_mask_size = cntk.reshape(cntk.reduce_sum(c_mask), (1, )) + 0.000001 c_interval_center = cntk.reshape( cntk.slice(interval_center, 0, nlcd_id, nlcd_id + 1), (num_landcover_classes, )) c_interval_radius = cntk.reshape( cntk.slice(interval_radius, 0, nlcd_id, nlcd_id + 1), (num_landcover_classes, )) # For each nlcd class, we have a landcover distribution: masked_probs = probs * c_mask # Mean mean of predicted distribution mean = cntk.reshape(cntk.reduce_sum(masked_probs, axis=(1, 2)), (num_landcover_classes, )) / c_mask_size # Mean var of predicted distribution var = cntk.reshape( cntk.reduce_sum(masked_probs * (1. - masked_probs), axis=(1, 2)), (num_landcover_classes, )) / c_mask_size c_super_res_crit = cntk.square(ddist(mean, c_interval_center, c_interval_radius)) / ( var / c_mask_size + c_interval_radius * c_interval_radius + 0.000001) \ + cntk.log(var + 0.03) super_res_crit += c_super_res_crit * c_mask_size / mask_size * super_res_class_weight[ nlcd_id] # Weight super_res loss according to the ratio of unlabeled LC pixels super_res_loss = cntk.reduce_sum(super_res_crit) * cntk.reduce_mean( cntk.slice(lc, 0, 0, 1)) log_probs = cntk.log(probs) high_res_crit = cntk.times([0.0, 1.0, 1.0, 1.0, 1.0], cntk.element_times( -cntk.element_times(log_probs, lc), lc_weight_map), output_rank=2) # Average across spatial dimensions # Sum over all landcover classes, only one of the landcover classes is non-zero #high_res_loss = cntk.reduce_mean(high_res_crit) print("probs", probs) print("lc", lc) print("lc_weight_map", lc_weight_map) print("cntk.element_times(probs, lc)", cntk.element_times(probs, lc)) iou_loss_i = cntk.element_times([0.0, 1.0, 1.0, 1.0, 1.0], cntk.reduce_sum(cntk.element_times( cntk.element_times(probs, lc), lc_weight_map), axis=(1, 2))) print("iou_loss_i", iou_loss_i) iou_loss_u = cntk.element_times([0.0, 1.0, 1.0, 1.0, 1.0], cntk.reduce_sum(cntk.minus( cntk.plus(probs, lc), cntk.element_times(probs, lc)), axis=(1, 2))) print("iou_loss_u", iou_loss_u) high_res_loss = 1.0 - ( (1 / 4.0) * cntk.reduce_mean(cntk.element_divide(iou_loss_i, iou_loss_u))) print("high_res_loss", high_res_loss) loss = super_res_loss_weight * super_res_loss + high_res_loss_weight * high_res_loss return input_im, lc, lc_weight_map, mask, interval_center, interval_radius, \ output, high_res_loss_weight * high_res_loss, loss
# ****************************************************************************** # Copyright 2017-2018 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ****************************************************************************** from __future__ import print_function import cntk as C import ngraph as ng from ngraph.frontends.cntk.cntk_importer.importer import CNTKImporter cntk_op = C.minus([1, 2, 3], [4, 5, 6]) ng_op, ng_placeholders = CNTKImporter().import_model(cntk_op) results = ng.transformers.make_transformer().computation(ng_op) print(results())
def test_Sub(tmpdir): model = C.minus([1, 2, 3], [4, 5, 6]) verify_no_input(model, tmpdir, 'Sub_0')
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True, proposal_layer_param_string=None, conv_bias_init=0.0): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). Args: conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. im_info: A CNTK variable or constant containing (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer. Returns: rpn_rois - the proposed ROIs rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) ''' # RPN network # init = 'normal', initValueScale = 0.01, initBias = 0.1 num_channels = cfg["CNTK"].RPN_NUM_CHANNELS rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1, init = normal(scale=0.01), init_bias=conv_bias_init)(conv_out) rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score", init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred", init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3) # 4(coords) * 9(anchors) # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) num_predictions = int(rpn_cls_score.shape[0] / 2) rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp") p_rpn_cls_score_rshp = cntk.placeholder() rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0) rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob') rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape") # proposal layer rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string)) rpn_rois = alias(rpn_rois_raw, name='rpn_rois') rpn_losses = None if(add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] # classification loss p_rpn_labels = cntk.placeholder() p_rpn_cls_score_rshp = cntk.placeholder() keeps = cntk.greater_equal(p_rpn_labels, 0.0) fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets") bg_labels = minus(1, fg_labels, name="bg_targets") rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0) rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0) rpn_loss_cls = element_times(rpn_ce, keeps) # The terms that are accounted for in the cls loss are those that have a label >= 0 cls_num_terms = reduce_sum(keeps) cls_normalization_factor = 1.0 / cls_num_terms normalized_rpn_cls_loss = reduce_sum(rpn_loss_cls) * cls_normalization_factor reduced_rpn_loss_cls = cntk.as_block(normalized_rpn_cls_loss, [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'CE_with_ignore', 'norm_rpn_cls_loss') # regression loss p_rpn_bbox_pred = cntk.placeholder() p_rpn_bbox_targets = cntk.placeholder() p_rpn_bbox_inside_weights = cntk.placeholder() rpn_loss_bbox = SmoothL1Loss(cfg["CNTK"].SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0) # The bbox loss is normalized by the rpn batch size bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE normalized_rpn_bbox_loss = reduce_sum(rpn_loss_bbox) * bbox_normalization_factor reduced_rpn_loss_bbox = cntk.as_block(normalized_rpn_bbox_loss, [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets), (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)], 'SmoothL1Loss', 'norm_rpn_bbox_loss') rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses") return rpn_rois, rpn_losses
import cntk a = [1, 2, 3] b = [4, 5, 6] c = cntk.minus(a, b).eval() print(c)
def train_model(reader_train, reader_test, model_func, max_epochs): # similar to placeholder in tensorflow input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # preprocess mean_removed_features = C.minus(input_var, C.constant(114), name='mean_removed_input') # output of network, loss and metrics z = model_func(mean_removed_features, out_dims=num_classes) ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) epoch_size = 10400 minibatch_size = 256 target_loss = 1.50 # learing rate, momentum coefficient and weight decay (regularization coefficient) lr_per_mb = [0.01] * 25 + [0.001] * 25 + [0.0001] * 25 + [0.00001] * 25 + [ 0.000001 ] lr_schedule = C.learning_parameter_schedule(lr_per_mb, minibatch_size=minibatch_size, epoch_size=epoch_size) mm_schedule = C.learners.momentum_schedule(0.9, minibatch_size=minibatch_size) l2_reg_weight = 0.0005 # CNTK L2 regularization is per sample, thus same as Caffe # optimizer learner = C.learners.momentum_sgd(z.parameters, lr=lr_schedule, momentum=mm_schedule, minibatch_size=minibatch_size, unit_gain=False, l2_regularization_weight=l2_reg_weight) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = C.Trainer(z, (ce, pe), [learner], [progress_printer]) # similar to feedict in tensorflow input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } C.logging.log_number_of_parameters(z) print() # train loop finished = False for epoch in range(1, max_epochs + 1): if finished: logging.info("Training finished!") break sample_count = 0 batch_cnt = 1 while sample_count < epoch_size: batch_begin_time = time.time() data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) _, dict_out = trainer.train_minibatch(data, outputs=[ce, pe]) curr_loss = np.asscalar(np.mean(dict_out[ce])) curr_err_rate = np.asscalar(np.mean(dict_out[pe])) #curr_loss = np.mean(ce.eval(data), axis=0) logging.info( "epoch[%d of %d] - batch[%d] - training loss=%f - training err_rate = %f %% - %f exampls/s" % (epoch, max_epochs, batch_cnt, curr_loss, curr_err_rate * 100, data[label_var].num_samples / (time.time() - batch_begin_time))) sample_count += data[label_var].num_samples batch_cnt += 1 if curr_loss < target_loss: finished = True break
def main(_): print("CNTK: " + cntk.__version__) print(cntk.minus([1, 2, 3], [4, 5, 6]).eval())
# Import CNTK library import cntk import numpy as np ################################# #### Mathematical operations #### ################################# # Initial definition a = [1, 2, 3] b = [3, 2, 1] # Get the type of the variable print(type(a)) # Subtraction print(cntk.minus(a, b).eval()) # Additive print(cntk.plus(a, b).eval()) # Element-wise division print(cntk.element_divide(a, b).eval()) # Defining variable variable = cntk.input_variable((2), np.float32) print(variable)
import cntk print("Tensor A = [1,2,3]") print("Tensor B = [4,5,6]\n") print("A+B:") sum = cntk.plus([1, 2, 3], [4, 5, 6]).eval() print("{}\n".format(sum)) print("A-B:") minus = cntk.minus([1, 2, 3], [4, 5, 6]).eval() print("{}\n".format(minus)) print("A*B:") times = cntk.times([1, 3, 4], [4, 5, 6]).eval() print("{}\n".format(times)) print("A/B:") divide = cntk.element_divide([4, 32, 15], [2, 4, 5]).eval() print("{}\n".format(divide)) print("A^B:") pow = cntk.pow([1, 3, 4], [4, 2, 3]).eval() print("{}\n".format(pow)) print("Min(A,B):") min = cntk.element_min([1, 2, 3], [4, 5, 6], [2, 1, 0]).eval() print("{}\n".format(min)) print("Max(A,B):") max = cntk.element_max([1, 2, 3], [4, 5, 6], [2, 9, 0]).eval() print("{}\n".format(max))