def calculate_loss_vector(network, path, location_path, communicator): source = DataSource(path, opt.vocab_file, location_path, opt.seqlength, opt.batchsize) # the curr row -> the curr col # the curr col -> the next row row_loss = C.log(C.softmax(network['model'].outputs[0])) col_loss = C.log(C.softmax(network['model'].outputs[1])) loss = C.combine([row_loss, col_loss]) row_loss_vector = np.zeros((opt.vocabsize, vocab_sqrt)) col_loss_vector = np.zeros((opt.vocabsize, vocab_sqrt)) flag = True while flag: mb = source.next_minibatch(opt.seqlength * opt.batchsize * Communicator.num_workers(), Communicator.num_workers(), communicator.rank()) result = loss.eval({ network['row']: mb[source.input1], network['col']: mb[source.input2], }) row_prob = result[loss.outputs[0]] col_prob = result[loss.outputs[1]] label1 = mb[source.word1].asarray() label2 = mb[source.word2].asarray() sequences = len(label1) for i in range(sequences): seqlength = len(row_prob[i]) for j in range(seqlength): row_word = int(label1[i][j][0]) col_word = int(label2[i][j][0]) row_loss_vector[row_word] -= row_prob[i][j] col_loss_vector[col_word] -= col_prob[i][j] flag = not mb[source.input1].sweep_end return col_loss_vector, row_loss_vector
def calculate_loss_vector(network, path, location_path, communicator): source = DataSource(path, opt.vocab_file, location_path, opt.seqlength, opt.batchsize) # the curr row -> the curr col # the curr col -> the next row row_loss = C.log(C.softmax(network['model'].outputs[0])) col_loss = C.log(C.softmax(network['model'].outputs[1])) loss = C.combine([row_loss, col_loss]) row_loss_vector = np.zeros((opt.vocabsize, vocab_sqrt)) col_loss_vector = np.zeros((opt.vocabsize, vocab_sqrt)) flag = True while flag: mb = source.next_minibatch(opt.seqlength * opt.batchsize * Communicator.num_workers(), Communicator.num_workers(), communicator.rank()) result = loss.eval({ network['row']: mb[source.input1], network['col']: mb[source.input2], }) row_prob = result[loss.outputs[0]] col_prob = result[loss.outputs[1]] label1 = mb[source.word1].asarray() label2 = mb[source.word2].asarray() sequences = len(label1) for i in range(sequences): seqlength = len(row_prob[i]) for j in range(seqlength): row_word = int(label1[i][j][0]) col_word = int(label2[i][j][0]) row_loss_vector[row_word] -= row_prob[i][j] col_loss_vector[col_word] -= col_prob[i][j] flag = not mb[source.input1].sweep_end return col_loss_vector, row_loss_vector
def test_factor_dense_for_prediction(): input_dim = 2 num_output_classes = 2 hidden_layer_dim = 50 num_minibatches_to_train = 2000 minibatch_size = 25 learning_rate = 0.5 input = C.input_variable(input_dim) label = C.input_variable(num_output_classes) z = _create_model_dense(input, input_dim, hidden_layer_dim, num_output_classes) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) # Run the trainer and perform model training training_progress_output_freq = 20 plotdata = {"batchsize": [], "loss": [], "error": []} for i in range(0, int(num_minibatches_to_train)): features, labels = _generate_random_data_sample( minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch data for training trainer.train_minibatch({input: features, label: labels}) # generate some data to predict features, labels = _generate_random_data_sample(10, 2, 2) # factor the model. newz = nc.factor_dense(z, projection_function=_get_rank_reduced_size, filter_function=_filter) original_out = C.softmax(z) factored_out = C.softmax(newz) original_labels_probs = original_out.eval({input: features}) predicted_label_probs = factored_out.eval({input: features}) original_prediction_percentage = _percentage_match(labels, original_labels_probs) # reduced model should have at leat 50% match compared to the original # For the test, we reduced the training minibatches, thus the match is lower. assert (original_prediction_percentage * 0.5 <= _percentage_match( labels, predicted_label_probs))
def hierarchical_softmax_layer(input_var, label_index, label_dim, label_classes=None): ''' A two layers hierarchical softmax function: Args: input_var: Variable with shape: [#,*](dim_x) label_index: index of label's category: [#,*](1) label_dim: number of the label categories label_classes: number of classes of the label categories Returns: output_prob: the probability of the given label [#,*](1) class_probs: the probability of all the label classes [#,*](label_classes) all_probs: the probability of all label classes ''' input_dim = input_var.shape[0] if not label_classes: label_classes = int(np.ceil(np.sqrt(float(label_dim)))) n_outputs_per_class = int(np.ceil(label_dim / label_classes)) target_class = C.floor((label_index + 0.5) / n_outputs_per_class) target_output_in_class = C.round(label_index - target_class * n_outputs_per_class) w1 = parameter(shape=(input_dim, label_classes), init=C.glorot_normal(), name='hsoftmax_w1') b1 = parameter(shape=(label_classes), init=C.glorot_normal(), name='hsoftmax_b1') w2s = parameter(shape=(label_classes, input_dim, n_outputs_per_class,), init=C.glorot_normal(), name='hsoftmax_w2s') b2s = parameter(shape=(label_classes, n_outputs_per_class,), init=C.glorot_normal(), name='hsoftmax_b2s') class_probs = softmax(b1 + times(input_var, w1)) # TODO: fix the bug in backprop for sparse, and use sparse embedding to accelerate target_class_one_hot = C.one_hot(target_class, num_classes=label_classes, sparse_output=False) w2 = C.reshape(C.times(target_class_one_hot, w2s, output_rank=2), [input_dim, -1]) b2 = C.reshape(times(target_class_one_hot, b2s, output_rank=1), [-1]) probs_in_class = softmax(b2 + times(input_var, w2)) prob_in_class = C.times_transpose(C.one_hot(target_output_in_class, num_classes=n_outputs_per_class, sparse_output=False), probs_in_class) class_prob = C.times_transpose(C.one_hot(target_class, num_classes=label_classes, sparse_output=False), class_probs) output_prob = prob_in_class * class_prob # this is for calculating all the outputs' probabilities all_probs = [] for i in range(label_classes): ci = C.constant(i) ci_one_hot = C.one_hot(ci, num_classes=label_classes, sparse_output=False) w2a = C.times(ci_one_hot, w2s, output_rank=2) b2a = C.times(ci_one_hot, b2s, output_rank=1) probs_in_classa = C.softmax(b2a + times(input_var, w2a)) class_proba = C.times_transpose(ci_one_hot, class_probs) output_proba = probs_in_classa * class_proba all_probs.append(output_proba) return output_prob, class_probs, all_probs
def test_factor_dense_for_prediction(): input_dim = 2 num_output_classes = 2 hidden_layer_dim = 50 num_minibatches_to_train = 2000 minibatch_size = 25 learning_rate = 0.5 input = C.input_variable(input_dim) label = C.input_variable(num_output_classes) z = _create_model_dense(input, input_dim, hidden_layer_dim, num_output_classes) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) # Run the trainer and perform model training training_progress_output_freq = 20 plotdata = {"batchsize":[], "loss":[], "error":[]} for i in range(0, int(num_minibatches_to_train)): features, labels = _generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch data for training trainer.train_minibatch({input : features, label : labels}) # generate some data to predict features, labels = _generate_random_data_sample(10, 2, 2) # factor the model. newz = nc.factor_dense(z, projection_function=_get_rank_reduced_size, filter_function = _filter) original_out = C.softmax(z) factored_out = C.softmax(newz) original_labels_probs = original_out.eval({input : features}) predicted_label_probs = factored_out.eval({input : features}) original_prediction_percentage = _percentage_match(labels, original_labels_probs) # reduced model should have at leat 50% match compared to the original # For the test, we reduced the training minibatches, thus the match is lower. assert(original_prediction_percentage * 0.5 <= _percentage_match(labels, predicted_label_probs))
def output_layer(self, attention_context, modeling_context): att_context = C.placeholder(shape=(8 * self.hidden_dim, )) mod_context = C.placeholder(shape=(2 * self.hidden_dim, )) #output layer start_logits = C.layers.Dense(1, name='out_start')(C.dropout( C.splice(mod_context, att_context), self.dropout)) if self.two_step: start_hardmax = seq_hardmax(start_logits) att_mod_ctx = C.sequence.last( C.sequence.gather(mod_context, start_hardmax)) else: start_prob = C.softmax(start_logits) att_mod_ctx = C.sequence.reduce_sum(mod_context * start_prob) att_mod_ctx_expanded = C.sequence.broadcast_as(att_mod_ctx, att_context) end_input = C.splice(att_context, mod_context, att_mod_ctx_expanded, mod_context * att_mod_ctx_expanded) m2 = OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='output_rnn')(end_input) end_logits = C.layers.Dense(1, name='out_end')(C.dropout( C.splice(m2, att_context), self.dropout)) return C.as_block(C.combine([start_logits, end_logits]), [(att_context, attention_context), (mod_context, modeling_context)], 'output_layer', 'output_layer')
def criterion(self): # hyperparameters lambda_val = 0.5 # Margin loss left = ct.square(ct.relu(0.9 - self.length)) right = ct.square(ct.relu(self.length - 0.1)) left = ct.reshape(left, (-1)) right = ct.reshape(right, (-1)) lc = self.labels * left + lambda_val * (1 - self.labels) * right margin_loss = ct.reduce_sum(lc, axis=0) margin_loss = ct.reduce_mean(margin_loss, axis=ct.axis.Axis.default_batch_axis()) # classification_error predict = ct.softmax(self.length, axis=0) error = ct.classification_error(ct.reshape(predict, (10)), self.labels) total_loss = margin_loss reconstruction_err = 0 if self.use_reconstruction: features = ct.reshape(self.features, shape=(-1,)) encoder = ct.reshape(self.training_model, shape=(-1,)) squared = ct.square(encoder - features) reconstruction_err = ct.reduce_mean(squared, axis=0) reconstruction_err = ct.reduce_mean(reconstruction_err, axis=ct.axis.Axis.default_batch_axis()) total_loss = margin_loss + (0.0005*784) * reconstruction_err return total_loss, error
def simi_attention(self, input, memory): ''' return: memory weighted vectors over input [#,c][d] weight ''' input_ph = C.placeholder() # [#,c][d] mem_ph = C.placeholder() # [#,q][d] input_dense = Dense(2 * self.hidden_dim, bias=False, input_rank=1) mem_dense = Dense(2 * self.hidden_dim, bias=False, input_rank=1) bias = C.Parameter(shape=(2 * self.hidden_dim, ), init=0.0) weight_dense = Dense(1, bias=False, input_rank=1) proj_inp = input_dense(input_ph) # [#,c][d] proj_mem = mem_dense(mem_ph) # [#,q][d] unpack_memory, mem_mask = C.sequence.unpack( proj_mem, 0).outputs # [#][*=q, d] [#][*=q] expand_mem = C.sequence.broadcast_as(unpack_memory, proj_inp) # [#,c][*=q,d] expand_mask = C.sequence.broadcast_as(mem_mask, proj_inp) # [#,c][*=q] matrix = C.reshape(weight_dense(C.tanh(proj_inp + expand_mem + bias)), (-1, )) # [#,c][*=q] matrix = C.element_select(expand_mask, matrix, -1e30) logits = C.softmax(matrix, axis=0) # [#,c][*=q] weight_mem = C.reduce_sum(C.reshape(logits, (-1, 1)) * expand_mem, axis=0) # [#,c][d] weight_mem = C.reshape(weight_mem, (-1, )) return C.as_block(C.combine(weight_mem, logits), [(input_ph, input), (mem_ph, memory)], 'simi_attention', 'simi_attention')
def build_graph(self_attention, self_penalty, embeded_dim=60, h_dim=150, d_a=350, r=30): with C.layers.default_options(init=C.xavier()): embeded = C.layers.Embedding(embeded_dim)(x) embeded = C.layers.Stabilizer()(embeded) H = create_birnn(C.layers.GRU(h_dim), C.layers.GRU(h_dim))(embeded) if self_attention: Ws1 = C.parameter(shape=(d_a, 2 * h_dim), name="Ws1") Ws2 = C.parameter(shape=(r, d_a), name="Ws2") A = C.softmax(C.times(Ws2, C.tanh(C.times_transpose(Ws1, H)))) H = C.times(A, H) # the M in the paper if self_penalty: I = C.constant(np.eye(r), dtype=np.float32) P = C.times_transpose(A, A) - I # r*r p = C.reduce_sum(C.abs(C.element_times( P, P))) # frobenius norm **2 y_ = C.layers.Dense(200, activation=C.ops.relu)(H) # y_pre = C.layers.Dense(num_labels, activation = None)(y_) def selfAtt(x): y_pre = C.layers.Dense(num_labels, activation=None)(y_) return y_pre if self_penalty: selfAtt.p = p return selfAtt
def predict_image(model, image_path): ''' Input: model, image_path Function: Passes the Image through the network, computes the output. Return: softmax output of the image ''' # load and format image (resize, RGB -> BGR, CHW -> HWC) try: img = Image.open(image_path) # Resize the image resized = img.resize((image_width, image_height), Image.ANTIALIAS) # RGB => BGR bgr_image = np.asarray(resized, dtype=np.float32)[..., [2, 1, 0]] # CHW => HWC hwc_format = np.ascontiguousarray(np.rollaxis(bgr_image, 2)) # compute model output arguments = {model.arguments[0]: [hwc_format]} output = model.eval(arguments) # Compute Softmax function over the output sm = C.softmax(output[0]) return sm.eval() except Exception as e: print(e) print("Could not open (skipping file): {}".format(image_path)) return None
def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model=None): print("creating eval model") last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME conv_layers = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) model_with_rpn = model if rpn_model is None else rpn_model rpn = clone_model(model_with_rpn, [last_conv_node_name], ["rpn_cls_prob_reshape", "rpn_bbox_pred"], CloneMethod.freeze) rpn_out = rpn(conv_out) # we need to add the proposal layer anew to account for changing configs when buffering proposals in 4-stage training rpn_rois = create_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg) roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = roi_fc_layers(conv_out, rpn_rois) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg.BBOX_NORMALIZE_TARGETS: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, rpn_rois, bbox_regr]) return eval_model
def softmax(x, name=''): ''' Squashes the input values `x` such that they add up to 1: :math:`softmax(x) = {\exp(x_i) - \max_{x_i \in x}(\exp(x_i)) \over {\sum_{x_i \in x} \exp(x_i)- \max_{x_i \in x}(\exp(x_i)) }}` The term :math:`\max_{x_i \in x}(\exp(x_i))` is subtracted for numerical stability. Example: >>> C.eval(C.softmax([[1, 1, 2, 3]])) [array([[[ 0.082595, 0.082595, 0.224515, 0.610296]]])] >>> C.eval(C.softmax([1, 1])) [array([[ 0.5, 0.5]])] Args: x: numpy array or any :class:`cntk.Function` that outputs a tensor name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import softmax x = sanitize_input(x) return softmax(x).output()
def create_fast_rcnn_eval_model(model, image_input, roi_proposals, cfg): print("creating eval model") predictor = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME, "roi_proposals"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = predictor(image_input, roi_proposals) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg.BBOX_NORMALIZE_TARGETS: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, bbox_regr]) if cfg["CNTK"].DEBUG_OUTPUT: plot( eval_model, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_eval." + cfg["CNTK"].GRAPH_TYPE)) return eval_model
def softmax(x, name=''): ''' Squashes the input values `x` such that they add up to 1: :math:`softmax(x) = {\exp(x_i) - \max_{x_i \in x}(\exp(x_i)) \over {\sum_{x_i \in x} \exp(x_i)- \max_{x_i \in x}(\exp(x_i)) }}` The term :math:`\max_{x_i \in x}(\exp(x_i))` is subtracted for numerical stability. Example: >>> C.eval(C.softmax([[1, 1, 2, 3]])) [array([[[ 0.082595, 0.082595, 0.224515, 0.610296]]])] >>> C.eval(C.softmax([1, 1])) [array([[ 0.5, 0.5]])] Args: x: numpy array or any :class:`cntk.Function` that outputs a tensor name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import softmax x = sanitize_input(x) return softmax(x).output()
def eval_single_image_imagenet(opt_model, loaded_model, image_path, image_dims): img = Image.open(image_path) if image_path.endswith("png"): temp = Image.new("RGB", img.size, (255, 255, 255)) temp.paste(img, img) img = temp resized = img.resize((image_dims[2], image_dims[1]), Image.ANTIALIAS) bgr_image = np.asarray(resized, dtype=np.float32)[..., [2, 1, 0]] hwc_format = np.ascontiguousarray(np.rollaxis(bgr_image, 2)) if "VGG" in opt_model: arguments = {loaded_model.arguments[0]: [hwc_format]} output = loaded_model.eval(arguments) sm = cntk.softmax(output[0]) return sm.eval() elif "InceptionV3" in opt_model: z = cntk.as_composite(loaded_model[0].owner) output = z.eval({z.arguments[0]: [hwc_format]}) else: z = cntk.as_composite(loaded_model[3].owner) output = z.eval({z.arguments[0]: [hwc_format]}) predictions = np.squeeze(output) return predictions
def gaussian_mdn_coeff(x, nmix: int, ndim: int): """ Extracts the coefficients for gaussian mixture density network. Assumes independence between gaussian dimensions. Example: ndim, nmix = 1, 3 a = C.input_variable(ndim) prediction = Dense((ndim + 2) * nmix)(a) coeffs = C.combine(gaussian_mdn_coeff(prediction_tensor, nmix=nmix, ndim=ndim)).eval({a: x}) alpha, mu, sigma = coeffs.values() Arguments: x: input tensor nmix (int): number of mixture ndim (int): number of dimension of gaussian Returns: tuple """ if len(x.shape) != 1: raise ValueError("Must be a 1d tensor, but input has shape {0}".format( x.shape)) alpha = C.softmax(C.slice(x, 0, 0, nmix), name='alpha') sigma = C.exp( C.slice(x, 0, nmix, 2 * nmix), name='sigma' ) # common variance for all components in single gaussian kernel mu = C.reshape(C.slice(x, 0, 2 * nmix, (ndim + 2) * nmix), shape=(nmix, ndim), name='mu') return alpha, mu, sigma
def score_models(distance_measure, unk_ivecs, spk_ivecs, calc_softmax=False): print('Score models') n_inputs = unk_ivecs.shape[0] n_spks = spk_ivecs.shape[0] scores = np.zeros(shape=(n_spks, n_inputs), dtype=np.float32) if calc_softmax: for j in range(n_spks): spk = spk_ivecs[j, :] print("speaker {}\n".format(j)) for i in range(n_inputs): scores[j, i] = C.softmax( distance_measure.eval({ distance_measure.arguments[0]: unk_ivecs[i, :], distance_measure.arguments[1]: spk })).eval()[0, 0] else: for j in range(n_spks): spk = spk_ivecs[j, :] print("speaker {}\n".format(j)) for i in range(n_inputs): scores[j, i] = distance_measure.eval({ distance_measure.arguments[0]: unk_ivecs[i, :], distance_measure.arguments[1]: spk })[0, 0] return scores
def create_eval_model(model, image_input, dims_input, rpn_model=None): print("creating eval model") conv_layers = clone_model(model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) model_with_rpn = model if rpn_model is None else rpn_model rpn = clone_model(model_with_rpn, [last_conv_node_name, "dims_input"], ["rpn_rois"], CloneMethod.freeze) rpn_rois = rpn(conv_out, dims_input) roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = roi_fc_layers(conv_out, rpn_rois) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg["TRAIN"].BBOX_NORMALIZE_TARGETS and cfg[ "TRAIN"].BBOX_NORMALIZE_TARGETS_PRECOMPUTED: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg["TRAIN"].BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg["TRAIN"].BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, rpn_rois, bbox_regr]) return eval_model
def new_attention(encoder_hidden_state, decoder_hidden_state): # encode_hidden_state: [#, e] [h] # decoder_hidden_state: [#, d] [H] unpacked_encoder_hidden_state, valid_mask = C.sequence.unpack(encoder_hidden_state, padding_value=0).outputs # unpacked_encoder_hidden_state: [#] [*=e, h] # valid_mask: [#] [*=e] projected_encoder_hidden_state = C.sequence.broadcast_as(attn_proj_enc(unpacked_encoder_hidden_state), decoder_hidden_state) # projected_encoder_hidden_state: [#, d] [*=e, attention_dim] broadcast_valid_mask = C.sequence.broadcast_as(C.reshape(valid_mask, (1,), 1), decoder_hidden_state) # broadcast_valid_mask: [#, d] [*=e] projected_decoder_hidden_state = attn_proj_dec(decoder_hidden_state) # projected_decoder_hidden_state: [#, d] [attention_dim] tanh_output = C.tanh(projected_decoder_hidden_state + projected_encoder_hidden_state) # tanh_output: [#, d] [*=e, attention_dim] attention_logits = attn_proj_tanh(tanh_output) # attention_logits = [#, d] [*=e, 1] minus_inf = C.constant(-1e+30) masked_attention_logits = C.element_select(broadcast_valid_mask, attention_logits, minus_inf) # masked_attention_logits = [#, d] [*=e] attention_weights = C.softmax(masked_attention_logits, axis=0) attention_weights = Label('attention_weights')(attention_weights) # attention_weights = [#, d] [*=e] attended_encoder_hidden_state = C.reduce_sum(attention_weights * C.sequence.broadcast_as(unpacked_encoder_hidden_state, attention_weights), axis=0) # attended_encoder_hidden_state = [#, d] [1, h] output = attn_final_stab(C.reshape(attended_encoder_hidden_state, (), 0, 1)) # output = [#, d], [h] return output
def predict(X, model, inputs, classification=True): if classification: model = C.softmax(model) pred = model.eval({inputs: X}) return np.array([[np.argmax(row)] for row in pred], dtype=np.float32) else: return model.eval({inputs: X})
def attention(query, key, value): dk = C.reduce_sum(C.ones_like(query)) # cannot use sequence.last, will conflict with recurrence # dk: [#, *] [1, ] and value = int(dim_of_query) unpacked_key = C.sequence.unpack(key, padding_value=0, no_mask_output=True) # [#] [-3, key_dim] unpacked_value = C.sequence.unpack(value, padding_value=0, no_mask_output=True) # [#] [-3, value_dim] broadcasted_key = C.sequence.broadcast_as(unpacked_key, query) # [#, *] [-3, key_dim] scaled = C.times_transpose(query, broadcasted_key) / dk # [#, *] [q_dim] @ [#, *] [key_dim, -3], assert q_dim == key_dim # scaled: [#, *] [-3, ] => for every key seq element, there is a corresponding score # masked out invalid temporal connections to obey_sequence_order if obey_sequence_order and max_seq_len: unpacked_scaled, scaled_mask = C.sequence.unpack(scaled, padding_value=0).outputs # unpacked_scaled: [#] [-3, -3] <== matrix will be top right diagonally zero-ed # scaled_mask: [#] [-3,] minus_inf = C.constant(-1e+30) valid_connections = C.Constant(np.tril(np.ones((max_seq_len, max_seq_len)), k=0)) # [] [max_seq, max_seq] valid_connections = C.reconcile_dynamic_axes(valid_connections, unpacked_scaled) # [#] [max_seq, max_seq] valid_connections = C.crop_manual(valid_connections, unpacked_scaled, 0, 0) # [#] [-3, -3] unpacked_scaled = C.element_select(valid_connections, unpacked_scaled, minus_inf) # [#] [-3, -3] scaled = C.to_sequence_like(unpacked_scaled, query) # [#, *] [-3] elif obey_sequence_order and not max_seq_len: raise ValueError("max_seq_len must be defined when obey_sequence_order is True") attended = C.times(C.softmax(scaled, axis=-1), C.sequence.broadcast_as(unpacked_value, query)) # [#, *] [value_dim,] return attended
def getPredictions(model, testX, testY, modelName): if modelName == 'Deep Neural Net': #model = [classifier,trainer,input,label] classifier = model[0] trainer = model[1] input = model[2] label = model[3] newCol = np.where(testY == 0, 1, 0) newCol = pd.DataFrame(newCol) testY = testY.reset_index(drop=True) testY = pd.concat([testY, newCol], axis=1, ignore_index=True) testY = np.ascontiguousarray(testY.as_matrix().astype(np.float32)) testX = np.ascontiguousarray(testX.as_matrix().astype(np.float32)) trainer.test_minibatch({input: testX, label: testY}) out = C.softmax(classifier) predictedLabelProbs = out.eval({input: testX}) predictedLabelProbs = pd.DataFrame(predictedLabelProbs) predictions = pd.DataFrame( np.where( predictedLabelProbs[predictedLabelProbs.columns[0]] > predictedLabelProbs[predictedLabelProbs.columns[1]], 1, 0)) else: predictions = model.predict(testX) return predictions
def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model=None): print("creating eval model") last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME conv_layers = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) model_with_rpn = model if rpn_model is None else rpn_model rpn = clone_model(model_with_rpn, [last_conv_node_name], ["rpn_cls_prob_reshape", "rpn_bbox_pred"], CloneMethod.freeze) rpn_out = rpn(conv_out) # we need to add the proposal layer anew to account for changing configs when buffering proposals in 4-stage training rpn_rois = create_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg) roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = roi_fc_layers(conv_out, rpn_rois) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg.BBOX_NORMALIZE_TARGETS: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, rpn_rois, bbox_regr]) return eval_model
def attention(h_enc, h_dec): history_axis = h_dec # we use history_axis wherever we pass this only for the sake of passing its axis # TODO: pull this apart so that we can compute the encoder window only once and apply it to multiple decoders # --- encoder state window (h_enc, h_enc_valid) = PastValueWindow( attention_span, axis=attention_axis, go_backwards=go_backwards)(h_enc).outputs h_enc_proj = attn_proj_enc(h_enc) # window must be broadcast to every decoder time step h_enc_proj = C.sequence.broadcast_as(h_enc_proj, history_axis) h_enc_valid = C.sequence.broadcast_as(h_enc_valid, history_axis) # --- decoder state # project decoder hidden state h_dec_proj = attn_proj_dec(h_dec) tanh_out = C.tanh(h_dec_proj + h_enc_proj) # (attention_span, attention_dim) u = attn_proj_tanh(tanh_out) # (attention_span, 1) u_masked = u + ( h_enc_valid - 1 ) * 50 # logzero-out the unused elements for the softmax denominator TODO: use a less arbitrary number than 50 attention_weights = C.softmax( u_masked, axis=attention_axis) #, name='attention_weights') attention_weights = Label('attention_weights')(attention_weights) # now take weighted sum over the encoder state vectors h_att = C.reduce_sum(C.element_times(h_enc_proj, attention_weights), axis=attention_axis) h_att = attn_final_stab(h_att) return h_att
def scale_dot_product_attention_block(self, contextQ, contextV, contextK, name): Q = C.placeholder(shape=(2 * self.hidden_dim, ), dynamic_axes=[self.b_axis, self.q_axis]) V = C.placeholder(shape=(2 * self.hidden_dim, ), dynamic_axes=[self.b_axis, self.q_axis]) K = C.placeholder(shape=(2 * self.hidden_dim, ), dynamic_axes=[self.b_axis, self.q_axis]) Ql = C.layers.Dense(100)(Q) Vl = C.layers.Dense(100)(V) Kl = C.layers.Dense(100)(K) kvw, kvw_mask = C.sequence.unpack(Kl, padding_value=0).outputs vvw, _ = C.sequence.unpack(Vl, padding_value=0).outputs KT = C.swapaxes(kvw) S = C.reshape(C.times(Ql, KT) / math.sqrt(100), -1) kvw_mask_expanded = C.sequence.broadcast_as(kvw_mask, Ql) S = C.softmax( C.element_select(kvw_mask_expanded, S, C.constant(-1e+30))) att = C.times(S, vvw) return C.as_block(att, [(Q, contextQ), (V, contextV), (K, contextK)], 'sdp_attention_block' + name, 'sdp_attention_block' + name)
def eval_single_image(loaded_model, image_path, image_dims): # Load and format image (resize, RGB -> BGR, CHW -> HWC) try: img = Image.open(image_path) if image_path.endswith("png"): temp = Image.new("RGB", img.size, (255, 255, 255)) temp.paste(img, img) img = temp resized = img.resize((image_dims[2], image_dims[1]), Image.ANTIALIAS) bgr_image = np.asarray(resized, dtype=np.float32)[..., [2, 1, 0]] hwc_format = np.ascontiguousarray(np.rollaxis(bgr_image, 2)) # compute model output arguments = {loaded_model.arguments[0]: [hwc_format]} output = loaded_model.eval(arguments) # return softmax probabilities sm = cntk.softmax(output[0]) return sm.eval() except FileNotFoundError: print("Could not open (skipping file): ", image_path) return ["None"]
def processInput(self, count): X, Y, W = next(self.gen) outs = self.net(X) for i in range(count): exOut = np.zeros((BoardLength, BoardLength)) outVec = cntk.softmax(outs[0][i]).eval() * 100.0 winVec = cntk.softmax(outs[1][i]).eval() for x in range(BoardLength): for y in range(BoardLength): exOut[x,y] = outVec[y * BoardLength + x] win = W[i,1] predWin = winVec[1] toMove = X[i,0,0,0] imgIn, imgOut = self.buildImages(X[i], np.argmax(Y[i]), exOut, toMove) yield imgIn, imgOut, toMove, win, predWin
def sclstm(input, inputH, inputC, svpair): emb = emb_in(input) #initial state of lstm, h0, c0, sv0 initial_state = (inputH, inputC, svpair) latent_vector = RecurrenceFrom(lstm_in, go_backwards=False, return_full_state=True)(*(initial_state + (emb,))) #output vector with vocab dimension output_vector = proj_in(latent_vector.outputs[0]) output_vector = C.softmax(output_vector) return output_vector, latent_vector.outputs[0], latent_vector.outputs[1], latent_vector.outputs[2], svpair
def _func(x): ph = C.placeholder() first_out = encoder(ph) second_out, third_out = bilm(first_out).outputs # [#,*][1024] dup_first_out = C.splice(first_out, first_out) #[#,*][1024] s = C.softmax(scales) out = gamma*(s[0]*dup_first_out+s[1]*second_out+s[2]*third_out) return C.as_block( out, [(ph, x)],'Elmo', 'Elmo' )
def finalize_network(reader, model_details, max_amount_of_epochs, samples_per_epoch, samples_per_minibatch, pixel_dimensions, classes, learning_rate): features = input_variable(shape=(pixel_dimensions['depth'], pixel_dimensions['height'], pixel_dimensions['width'])) label = input_variable(shape=len(classes)) # speeds up training normalized_features = element_times(1.0 / 256.0, features) model = create_tf_model(model_details, num_classes=len(classes), input_features=normalized_features, freeze=True) loss = cross_entropy_with_softmax(model, label) metric = classification_error(model, label) learner = momentum_sgd(parameters=model.parameters, lr=learning_rate_schedule(learning_rate, UnitType.minibatch), momentum=0.9, l2_regularization_weight=0.0005) reporter = ProgressPrinter(tag='training', num_epochs=max_amount_of_epochs) trainer = Trainer(model=model, criterion=(loss, metric), parameter_learners=[learner], progress_writers=[reporter]) log_number_of_parameters(model) map_input_to_streams_train = { features: reader.streams.features, label: reader.streams.labels } training_session(trainer=trainer, mb_source=reader, model_inputs_to_streams=map_input_to_streams_train, mb_size=samples_per_minibatch, progress_frequency=samples_per_epoch, checkpoint_config=CheckpointConfig( frequency=samples_per_epoch, filename=os.path.join("./checkpoints", "ConvNet_Lego_VisiOn"), restore=True)).train() network = {'features': features, 'label': label, 'model': softmax(model)} model_name = f"CNN-3200-224-resnet-18.model" export_path = os.path.abspath( os.path.join("..", "..", "Final models", "CNN", model_name)) model.save(export_path) return network
def attention_layer(self, context, query, layer): q_processed = C.placeholder(shape=(2*self.hidden_dim,)) p_processed = C.placeholder(shape=(2*self.hidden_dim,)) qvw, qvw_mask = C.sequence.unpack(q_processed, padding_value=0).outputs wq = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform()) wp = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform()) wg = C.parameter(shape=(8*self.hidden_dim, 8*self.hidden_dim), init=C.glorot_uniform()) v = C.parameter(shape=(2*self.hidden_dim, 1), init=C.glorot_uniform()) # seq[tensor[2d]] p_len x 2d wpt = C.reshape(C.times(p_processed, wp), (-1, 2*self.hidden_dim)) # q_len x 2d wqt = C.reshape(C.times(qvw, wq), (-1, 2*self.hidden_dim)) # seq[tensor[q_len]] S = C.reshape(C.times(C.tanh(C.sequence.broadcast_as(wqt, p_processed) + wpt), v), (-1)) qvw_mask_expanded = C.sequence.broadcast_as(qvw_mask, p_processed) # seq[tensor[q_len]] S = C.element_select(qvw_mask_expanded, S, C.constant(-1e+30)) # seq[tensor[q_len]] A = C.softmax(S, axis=0) # seq[tensor[2d]] swap_qvw = C.swapaxes(qvw) cq = C.reshape(C.reduce_sum(A * C.sequence.broadcast_as(swap_qvw, A), axis=1), (-1)) # seq[tensor[4d]] uc_concat = C.splice(p_processed, cq, p_processed * cq, cq * cq) # seq[tensor[4d]] gt = C.tanh(C.times(uc_concat, wg)) # seq[tensor[4d]] uc_concat_star = gt * uc_concat # seq[tensor[4d]] vp = C.layers.Sequential([ C.layers.Dropout(self.dropout), OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name=layer+'_attention_rnn')])(uc_concat_star) return C.as_block( vp, [(p_processed, context), (q_processed, query)], 'attention_layer', 'attention_layer')
def criteria(label, output, block_size, c_classes, weights): ''' Define the loss function and metric ''' probs = cntk.softmax(output, axis=0) log_probs = cntk.log(probs) ce = cntk.times(weights, -cntk.element_times(log_probs, label), output_rank=2) mean_ce = cntk.reduce_mean(ce) _, w, h = label.shape pe = cntk.classification_error(probs, label, axis=0) - \ cntk.reduce_sum(cntk.slice(label, 0, 0, 1)) / cntk.reduce_sum(label) return (mean_ce, pe)
def computeOutput(self, inferenceResults): probs = C.softmax(inferenceResults).eval() probs = np.squeeze(np.asarray(probs)) with open("model/labels.json") as jsonFile: labels = json.load(jsonFile) result = [] for i in range(len(probs)): obj = { 'label': str(labels[str(i)]), 'probability': float(probs[i]) } result.append(obj) return result
def __init__(self, num_bandits: int, num_arms: int, hp: Hyperparameters): self.gang = BanditGang(num_bandits, num_arms) self.input_var = C.input(2, dtype=np.float32, name="input_var") #state and proposed action self.output_var = C.input(1, name="output_var") self.label_var = C.input(1, name="label_var") self.create_model(hp) self.actions = np.arange(num_arms, dtype=np.int32) self.softmax = C.softmax(self.output_var) self.in_data = np.array((2,), dtype=np.float32) #dummy input for network, for now. #self.truth = self.softmax.eval(np.array(self.bandit.arms, dtype=np.float32)) self.hp = hp # self.error = self.get_squared_error() self.plotdata = {"loss":[]}
def test_op_softmax_axis(sample, device_id, precision): t = AA(sample, dtype=PRECISION_TO_TYPE[precision]) assert len(t.shape) == 2 x_max = t - t.max() exp_x = np.exp(x_max) forward = exp_x / np.sum(exp_x) expected_forward = AA([forward]) from cntk import softmax result = softmax(sample, axis=0).eval() assert np.array_equal(result, expected_forward[0])
def test_op_softmax_axis(sample, device_id, precision): t = AA(sample, dtype=PRECISION_TO_TYPE[precision]) assert len(t.shape) == 2 x_max = t - t.max() exp_x = np.exp(x_max) forward = exp_x / np.sum(exp_x) expected_forward = AA([forward]) from cntk import softmax result = softmax(sample, axis=0).eval() assert np.array_equal(result, expected_forward[0])
def test_op_softmax_with_freedimension(sample, device_id, precision): t = AA(sample, dtype=PRECISION_TO_TYPE[precision]) assert len(t.shape) == 2 x_max = t - t.max() exp_x = np.exp(x_max) forward = exp_x / np.sum(exp_x) expected_forward = AA([forward]) from cntk import softmax, input_variable x = input_variable((C.FreeDimension, t.shape[1])) result = softmax(x, axis=0).eval({x:[sample]})[0] assert np.array_equal(result, expected_forward[0])
def test_data_resize(): batch_size = 8 w = C.parameter(shape=(3, 2), name='w1') x = C.input_variable(shape=[3], name='x') y = C.softmax(C.times(x, w)) y = C.unpack_batch(y) y = C.reshape(y, [batch_size * 2]) loss = C.reduce_mean(-C.log(y)) learning_rate = 0.01 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(y.parameters, lr_schedule, gradient_clipping_threshold_per_sample=1.0) trainer = C.Trainer(y, (loss), [learner]) features = np.random.randn(batch_size, 3) trainer.train_minibatch({x: features})
def create_fast_rcnn_eval_model(model, image_input, roi_proposals, cfg): print("creating eval model") predictor = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME, "roi_proposals"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = predictor(image_input, roi_proposals) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg.BBOX_NORMALIZE_TARGETS: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, bbox_regr]) if cfg["CNTK"].DEBUG_OUTPUT: plot(eval_model, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_eval." + cfg["CNTK"].GRAPH_TYPE)) return eval_model
def test_load_save_unique_input(tmpdir): i1 = C.input_variable((1,2), name='i1') root_node = C.softmax(i1) input1 = [[[1,2]]] result = root_node.eval(input1) expected = [[[[ 0.268941, 0.731059]]]] assert np.allclose(result, expected) filename = str(tmpdir / 'i_plus_0.mod') root_node.save(filename) loaded_node = C.Function.load(filename) # Test specifying the only value for a unique input loaded_result = loaded_node.eval(input1) assert np.allclose(loaded_result, expected) filename = filename + '.legacy' save_as_legacy_model(root_node, filename) loaded_node = C.Function.load(filename) loaded_result = loaded_node.eval(input1) assert np.allclose(loaded_result, expected)
def create_eval_model(model, image_input, dims_input, rpn_model=None): print("creating eval model") conv_layers = clone_model(model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) model_with_rpn = model if rpn_model is None else rpn_model rpn = clone_model(model_with_rpn, [last_conv_node_name, "dims_input"], ["rpn_rois"], CloneMethod.freeze) rpn_rois = rpn(conv_out, dims_input) roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = roi_fc_layers(conv_out, rpn_rois) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg["TRAIN"].BBOX_NORMALIZE_TARGETS and cfg["TRAIN"].BBOX_NORMALIZE_TARGETS_PRECOMPUTED: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg["TRAIN"].BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg["TRAIN"].BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, rpn_rois, bbox_regr]) return eval_model
# Trainer. minibatch_size = 32 progress_writer = cntk.logging.ProgressPrinter(50) # helper for logging progress; log every 50 minibatches trainer = cntk.Trainer(None, criterion, [learner], [progress_writer]) # Train! for i in range(0, len(X_train), minibatch_size): # loop over minibatches x = X_train[i:i+minibatch_size] # get one minibatch worth of data y = Y_train[i:i+minibatch_size] trainer.train_minibatch({data: x, label_one_hot: y}) # update model from one minibatch trainer.summarize_training_progress() # Test error rate on the test set. evaluator = cntk.Evaluator(metric, [progress_writer]) for i in range(0, len(X_test), minibatch_size): # loop over minibatches x = X_test[i:i+minibatch_size] # get one minibatch worth of data y = Y_test[i:i+minibatch_size] evaluator.test_minibatch({data: x, label_one_hot: y}) # test one minibatch evaluator.summarize_test_progress() # Inspect predictions on one minibatch, for illustration. # For evaluation, we map the output of the network between 0-1 and convert them into probabilities # for the two classes. We use a softmax function to get the probabilities of each of the class. get_probability = cntk.softmax(model) X_check, Y_check = generate_synthetic_data(25) # a small batch of 25 examples result = get_probability.eval(X_check) print("Label :", [label.todense().argmax() for label in Y_check]) print("Predicted:", [result[i,:].argmax() for i in range(len(result))])
def hierarchical_softmax_layer_for_sequence(input_var, num_output_classes, target_class, target_output_in_class, batch_size, w1, b1, w2s, b2s): ''' A two layers hierarchical softmax function with sequence axis input: Example: >>> input_dim = 2 >>> num_output_classes = 4 >>> minibatch_size = 3 >>> seq_size = 5 >>> n_classes = int(math.ceil(math.sqrt(num_output_classes))) >>> n_outputs_per_class = n_classes >>> w1 = C.parameter(shape=(input_dim, n_classes), init=C.glorot_normal(seed=2), name='w1') >>> b1 = C.parameter(shape=(n_classes), init=C.glorot_normal(seed=3), name='b1') >>> w2s = C.parameter(shape=(n_classes, input_dim, n_outputs_per_class), init=C.glorot_normal(seed=4), name='w2s') >>> b2s = C.parameter(shape=(n_classes, n_outputs_per_class), init=C.glorot_normal(seed=5), name='b2s') # neural network structure for hierarchical softmax >>> h_input = C.sequence.input_variable(input_dim) >>> h_target_class = C.sequence.input_variable([1]) >>> h_target_output_in_class = C.sequence.input_variable([1]) >>> h_z, class_probs, all_probs = hierarchical_softmax_layer_for_sequence(h_input, num_output_classes, h_target_class, h_target_output_in_class, minibatch_size, w1, b1, w2s, b2s) >>> a = np.reshape(np.arange(seq_size * minibatch_size * input_dim, dtype = np.float32), (seq_size, minibatch_size, input_dim)) >>> labels = np.reshape(np.arange(seq_size * minibatch_size, dtype = np.float32), (seq_size, minibatch_size, 1)) % num_output_classes >>> target_labels = labels // n_outputs_per_class >>> target_output_in_labels = labels % n_outputs_per_class >>> h_z.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels})[1] array([[ 0.000859], [ 0. ], [ 0. ]], dtype=float32) Args: input_var: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis num_output_classes: int target_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis target_output_in_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis batch_size: int w1: C.parameter b1: C.parameter w2s: C.parameter b2s: C.parameter Returns: output_prob: class:`~cntk.ops.functions.Function` class_probs: class:`~cntk.ops.functions.Function` all_probs: a list of class:`~cntk.ops.functions.Function` ''' input_dim = input_var.shape[0] n_classes = int(math.ceil(math.sqrt(num_output_classes))) n_outputs_per_class = n_classes class_probs = C.softmax(b1 + C.times(input_var, w1)) w2_temp = C.gather(w2s, target_class) w2 = reshape(w2_temp, (input_dim, n_outputs_per_class)) w2 = C.sequence.broadcast_as(w2, input_var) b2 = reshape(C.gather(b2s, target_class), (n_outputs_per_class)) b2 = C.sequence.broadcast_as(b2, input_var) times_result = times(input_var, w2) probs_in_class = softmax(b2 + times_result) probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class) target_output_in_class = C.one_hot(target_output_in_class, n_outputs_per_class, False) probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class) prob_in_class = C.times_transpose(probs_in_class, target_output_in_class) target_class = C.one_hot(target_class, n_classes, False) class_probs = C.sequence.broadcast_as(class_probs, target_class) class_prob = C.times_transpose(class_probs, target_class) output_prob = C.element_times(class_prob, prob_in_class) # this is for calculating all the outputs' probabilities all_probs = [] for i in range(n_classes): ci = C.constant(i) w2a = C.reshape(C.gather(w2s, ci), (input_dim, n_outputs_per_class)) w2a = C.sequence.broadcast_as(w2a, input_var) b2a = C.reshape(C.gather(b2s, ci), (n_outputs_per_class)) b2a = C.sequence.broadcast_as(b2a, input_var) probs_in_classa = C.softmax(b2a + times(input_var, w2a)) cia = C.constant(i, shape=[1]) cia = C.reconcile_dynamic_axes(cia, class_probs) cia = C.one_hot(cia, n_outputs_per_class, False) class_proba = C.times_transpose(class_probs, cia) class_proba = C.sequence.broadcast_as(class_proba, probs_in_classa) output_proba = C.element_times(class_proba, probs_in_classa) all_probs.append(output_proba) return output_prob, class_probs, all_probs
def test_Softmax(tmpdir, dtype): with C.default_options(dtype = dtype): model = C.softmax(np.array([[1, 1, 2, 3]]).astype(dtype)) verify_no_input(model, tmpdir, 'Softmax_0')
def test_Softmax(tmpdir): model = C.softmax([[1, 1, 2, 3]]) verify_no_input(model, tmpdir, 'Softmax_0')
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True, proposal_layer_param_string=None): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). Args: conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. im_info: (image_widht, image_height, image_scale) as CNTK variable or constant add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer. Returns: rpn_rois - the proposed ROIs rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) ''' # RPN network # init = 'normal', initValueScale = 0.01, initBias = 0.1 rpn_conv_3x3 = Convolution((3, 3), 256, activation=relu, pad=True, strides=1, init = normal(scale=0.01), init_bias=0.1)(conv_out) rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score", init = normal(scale=0.01), init_bias=0.1)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred", init = normal(scale=0.01), init_bias=0.1)(rpn_conv_3x3) # 4(coords) * 9(anchors) # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) num_predictions = int(np.prod(rpn_cls_score.shape) / 2) rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions)) rpn_cls_prob = softmax(rpn_cls_score_rshp, axis=0, name="objness_softmax") rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape) # proposal layer rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string)) rpn_rois = alias(rpn_rois_raw, name='rpn_rois') rpn_losses = None if(add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] # For loss functions: ignore label predictions for the 'ignore label', # i.e. set target and prediction to 0 --> needs to be softmaxed before rpn_labels_rshp = reshape(rpn_labels, (1, num_predictions)) ignore = user_function(IgnoreLabel(rpn_cls_prob, rpn_labels_rshp, ignore_label=-1)) rpn_cls_prob_ignore = ignore.outputs[0] fg_targets = ignore.outputs[1] bg_targets = 1 - fg_targets rpn_labels_ignore = splice(bg_targets, fg_targets, axis=0) # RPN losses rpn_loss_cls = cross_entropy_with_softmax(rpn_cls_prob_ignore, rpn_labels_ignore, axis=0) rpn_loss_bbox = user_function(SmoothL1Loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights)) rpn_losses = plus(reduce_sum(rpn_loss_cls), reduce_sum(rpn_loss_bbox), name="rpn_losses") return rpn_rois, rpn_losses
def get_probability(data): return C.softmax(model(data))
plt.plot(plotdata["batchsize"], plotdata["avgerror"], 'r--') plt.xlabel('Minibatch number') plt.ylabel('Label Prediction Error') plt.title('Minibatch run vs. Label Prediction Error') plt.show() ### Evaluation/Testing # Run the trained model on newly generated dataset test_minibatch_size = 25 features, labels = generate_random_data_sample(test_minibatch_size, input_dim, num_output_classes) trainer.test_minibatch({feature: features, label: labels}) # Checking prediction/evaluation out = C.softmax(z) result = out.eval({feature: features}) print("Label :", [np.argmax(label) for label in labels]) print("Predicted:", [np.argmax(result[i, :]) for i in range(len(result))]) # Visualization # Model parameters print(param_dict['b'].value) bias_vector = param_dict['b'].value weight_matrix = param_dict['w'].value # Plot the data import matplotlib.pyplot as plt
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True, proposal_layer_param_string=None, conv_bias_init=0.0): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). Args: conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. im_info: A CNTK variable or constant containing (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer. Returns: rpn_rois - the proposed ROIs rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) ''' # RPN network # init = 'normal', initValueScale = 0.01, initBias = 0.1 num_channels = cfg["CNTK"].RPN_NUM_CHANNELS rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1, init = normal(scale=0.01), init_bias=conv_bias_init)(conv_out) rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score", init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred", init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3) # 4(coords) * 9(anchors) # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) num_predictions = int(rpn_cls_score.shape[0] / 2) rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp") p_rpn_cls_score_rshp = cntk.placeholder() rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0) rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob') rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape") # proposal layer rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string)) rpn_rois = alias(rpn_rois_raw, name='rpn_rois') rpn_losses = None if(add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] # classification loss p_rpn_labels = cntk.placeholder() p_rpn_cls_score_rshp = cntk.placeholder() keeps = cntk.greater_equal(p_rpn_labels, 0.0) fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets") bg_labels = minus(1, fg_labels, name="bg_targets") rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0) rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0) rpn_loss_cls = element_times(rpn_ce, keeps) # The terms that are accounted for in the cls loss are those that have a label >= 0 cls_num_terms = reduce_sum(keeps) cls_normalization_factor = 1.0 / cls_num_terms normalized_rpn_cls_loss = reduce_sum(rpn_loss_cls) * cls_normalization_factor reduced_rpn_loss_cls = cntk.as_block(normalized_rpn_cls_loss, [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'CE_with_ignore', 'norm_rpn_cls_loss') # regression loss p_rpn_bbox_pred = cntk.placeholder() p_rpn_bbox_targets = cntk.placeholder() p_rpn_bbox_inside_weights = cntk.placeholder() rpn_loss_bbox = SmoothL1Loss(cfg["CNTK"].SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0) # The bbox loss is normalized by the rpn batch size bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE normalized_rpn_bbox_loss = reduce_sum(rpn_loss_bbox) * bbox_normalization_factor reduced_rpn_loss_bbox = cntk.as_block(normalized_rpn_bbox_loss, [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets), (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)], 'SmoothL1Loss', 'norm_rpn_bbox_loss') rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses") return rpn_rois, rpn_losses