def testShapeInferenceTranspose(self): model = model_helper.ModelHelper(name="test_model") workspace.FeedBlob( "tensor", np.random.rand(4, 2, 3, 3, 5).astype(np.float32) ) # Testing with axes undefined brew.transpose( model, ["tensor"], "transpose", ) self.InferTensorRunAndCompare(model) # Testing with axes defined brew.transpose( model, ["tensor"], "transpose", axes=np.random.permutation(5) ) return self.InferTensorRunAndCompare(model)
def testShapeInferenceTranspose(self): model = model_helper.ModelHelper(name="test_model") workspace.FeedBlob( "tensor", np.random.rand(4, 2, 3, 3, 5).astype(np.float32) ) # Testing with axes undefined brew.transpose( model, ["tensor"], "transpose", ) self.InferTensorRunAndCompare(model) # Testing with axes defined brew.transpose( model, ["tensor"], "transpose", axes=np.random.permutation(5) ) return self.InferTensorRunAndCompare(model)
def _calc_attention_logits_from_sum_match( model, decoder_hidden_encoder_outputs_sum, encoder_output_dim, scope, ): # [encoder_length, batch_size, encoder_output_dim] decoder_hidden_encoder_outputs_sum = model.net.Tanh( decoder_hidden_encoder_outputs_sum, decoder_hidden_encoder_outputs_sum, ) # [encoder_length, batch_size, 1] attention_logits = brew.fc( model, decoder_hidden_encoder_outputs_sum, s(scope, 'attention_logits'), dim_in=encoder_output_dim, dim_out=1, axis=2, freeze_bias=True, ) # [batch_size, encoder_length, 1] attention_logits_transposed = brew.transpose( model, attention_logits, s(scope, 'attention_logits_transposed'), axes=[1, 0, 2], ) return attention_logits_transposed
def _calc_attention_logits_from_sum_match( model, decoder_hidden_encoder_outputs_sum, encoder_output_dim, scope, ): # [encoder_length, batch_size, encoder_output_dim] decoder_hidden_encoder_outputs_sum = model.net.Tanh( decoder_hidden_encoder_outputs_sum, decoder_hidden_encoder_outputs_sum, ) # [encoder_length, batch_size, 1] attention_logits = brew.fc( model, decoder_hidden_encoder_outputs_sum, s(scope, 'attention_logits'), dim_in=encoder_output_dim, dim_out=1, axis=2, freeze_bias=True, ) # [batch_size, encoder_length, 1] attention_logits_transposed = brew.transpose( model, attention_logits, s(scope, 'attention_logits_transposed'), axes=[1, 0, 2], ) return attention_logits_transposed
def prepare_input(self, model, input_blob): if self.encoder_outputs_transposed is None: self.encoder_outputs_transposed = brew.transpose( model, self.encoder_outputs, self.scope('encoder_outputs_transposed'), axes=[1, 2, 0], ) if self.weighted_encoder_outputs is None: self.weighted_encoder_outputs = brew.fc( model, self.encoder_outputs, self.scope('weighted_encoder_outputs'), dim_in=self.encoder_output_dim, dim_out=self.encoder_output_dim, axis=2, ) return self.decoder_cell.prepare_input(model, input_blob)
def prepare_input(self, model, input_blob): if self.encoder_outputs_transposed is None: self.encoder_outputs_transposed = brew.transpose( model, self.encoder_outputs, self.scope('encoder_outputs_transposed'), axes=[1, 2, 0], ) if self.weighted_encoder_outputs is None: self.weighted_encoder_outputs = brew.fc( model, self.encoder_outputs, self.scope('weighted_encoder_outputs'), dim_in=self.encoder_output_dim, dim_out=self.encoder_output_dim, axis=2, ) return self.decoder_cell.prepare_input(model, input_blob)
def _calc_attention_logits_from_sum_match( model, decoder_hidden_encoder_outputs_sum, encoder_output_dim, scope, ): # [encoder_length, batch_size, encoder_output_dim] decoder_hidden_encoder_outputs_sum = model.net.Tanh( decoder_hidden_encoder_outputs_sum, decoder_hidden_encoder_outputs_sum, ) attention_v = model.param_init_net.XavierFill( [], s(scope, 'attention_v'), shape=[1, encoder_output_dim], ) model.params.append(attention_v) attention_zeros = model.param_init_net.ConstantFill( [], s(scope, 'attention_zeros'), value=0.0, shape=[1], ) # [encoder_length, batch_size, 1] attention_logits = model.net.FC( [decoder_hidden_encoder_outputs_sum, attention_v, attention_zeros], [s(scope, 'attention_logits')], axis=2, ) # [batch_size, encoder_length, 1] attention_logits_transposed = brew.transpose( model, attention_logits, s(scope, 'attention_logits_transposed'), axes=[1, 0, 2], ) return attention_logits_transposed
def Transpose(self, *args, **kwargs): return brew.transpose(self, *args, use_cudnn=self.use_cudnn, **kwargs)
def build_crf_net(self, input_blob, initial_state, transitions): """ Adds the crf_net recurrent operator to the model. model: model_helper.ModelHelper object new operators would be added to input_blob: the input sequence in a format T x N x D where T is sequence size, N - batch size and D - input dimension ##Only supports batch-size 1## seq_lengths: blob containing sequence lengths (unused) """ scope = "crf_net" def s(name): "" # We have to manually scope due to our internal/external blob # relationships. return "{}/{}".format(str(scope), str(name)) step_model = model_helper.ModelHelper(name="crf_step", param_model=self.model) input_t, cell_t_prev, _ = step_model.net.AddExternalInputs( core.ScopedBlobReference("input_t"), core.ScopedBlobReference("cell_t_prev"), transitions, ) zero_segment_id = step_model.param_init_net.ConstantFill( [], [s("zero_segment_id")], value=0, shape=[self.num_classes_padded], dtype=core.DataType.INT32, ) # A hack to bypass model cloning for test step_model.param_init_net.AddExternalOutput(zero_segment_id) """ the CRF step """ # Do tile prev_transpose = brew.transpose( step_model, cell_t_prev, [s("prev_transpose")], axes=(0, 2, 1) ) prev_tiled = step_model.net.Tile( prev_transpose, [s("prev_tiled")], tiles=self.num_classes_padded, axis=2 ) input_t_tiled = step_model.net.Tile( input_t, [s("input_t_tiled")], tiles=self.num_classes_padded, axis=1 ) input_with_prev = step_model.net.Add( [prev_tiled, input_t_tiled], [s("input_with_prev")] ) all_with_transitions = step_model.net.Add( [input_with_prev, transitions], [s("prev_with_transitions")], broadcast=1, use_grad_hack=1, ) all_with_transitions_reshaped, _ = step_model.net.Reshape( all_with_transitions, [s("all_with_transitions_reshaped"), s("all_with_transitions_orig")], shape=(self.num_classes_padded, self.num_classes_padded), ) cell_t = step_model.net.SortedSegmentRangeLogSumExp( [all_with_transitions_reshaped, zero_segment_id], [s("cell_t")] ) step_model.net.AddExternalOutputs(cell_t) """ recurrent network """ cell_input_blob = initial_state out_all, out_last = recurrent.recurrent_net( net=self.model.net, cell_net=step_model.net, inputs=[(input_t, input_blob)], initial_cell_inputs=[(cell_t_prev, cell_input_blob)], links={cell_t_prev: cell_t}, scope=scope, outputs_with_grads=(1,), ) return out_last
def apply_soft_coverage_attention( model, encoder_output_dim, encoder_outputs_transposed, weighted_encoder_outputs, decoder_hidden_state_t, decoder_hidden_state_dim, scope, encoder_lengths, coverage_t_prev, coverage_weights, ): weighted_decoder_hidden_state = _apply_fc_weight_for_sum_match( model=model, input=decoder_hidden_state_t, dim_in=decoder_hidden_state_dim, dim_out=encoder_output_dim, scope=scope, name='weighted_decoder_hidden_state', ) # [encoder_length, batch_size, encoder_output_dim] decoder_hidden_encoder_outputs_sum_tmp = model.net.Add( [weighted_encoder_outputs, weighted_decoder_hidden_state], s(scope, 'decoder_hidden_encoder_outputs_sum_tmp'), broadcast=1, ) # [batch_size, encoder_length] coverage_t_prev_2d = model.net.Squeeze( coverage_t_prev, s(scope, 'coverage_t_prev_2d'), dims=[0], ) # [encoder_length, batch_size] coverage_t_prev_transposed = brew.transpose( model, coverage_t_prev_2d, s(scope, 'coverage_t_prev_transposed'), ) # [encoder_length, batch_size, encoder_output_dim] scaled_coverage_weights = model.net.Mul( [coverage_weights, coverage_t_prev_transposed], s(scope, 'scaled_coverage_weights'), broadcast=1, axis=0, ) # [encoder_length, batch_size, encoder_output_dim] decoder_hidden_encoder_outputs_sum = model.net.Add( [decoder_hidden_encoder_outputs_sum_tmp, scaled_coverage_weights], s(scope, 'decoder_hidden_encoder_outputs_sum'), ) # [batch_size, encoder_length, 1] attention_logits_transposed = _calc_attention_logits_from_sum_match( model=model, decoder_hidden_encoder_outputs_sum=decoder_hidden_encoder_outputs_sum, encoder_output_dim=encoder_output_dim, scope=scope, ) # [batch_size, encoder_length, 1] attention_weights_3d = _calc_attention_weights( model=model, attention_logits_transposed=attention_logits_transposed, scope=scope, encoder_lengths=encoder_lengths, ) # [batch_size, encoder_output_dim, 1] attention_weighted_encoder_context = _calc_weighted_context( model=model, encoder_outputs_transposed=encoder_outputs_transposed, encoder_output_dim=encoder_output_dim, attention_weights_3d=attention_weights_3d, scope=scope, ) # [batch_size, encoder_length] attention_weights_2d = model.net.Squeeze( attention_weights_3d, s(scope, 'attention_weights_2d'), dims=[2], ) coverage_t = model.net.Add( [coverage_t_prev, attention_weights_2d], s(scope, 'coverage_t'), broadcast=1, ) return ( attention_weighted_encoder_context, attention_weights_3d, [decoder_hidden_encoder_outputs_sum], coverage_t, )
def Transpose(self, *args, **kwargs): return brew.transpose(self, *args, use_cudnn=self.use_cudnn, **kwargs)
def create_caffe2_model(model, input_shape, use_cudnn=True, init_params=False, keras_channel_last=True): arg_scope = {'order': 'NCHW', 'use_cudnn': use_cudnn} caffe2_model = model_helper.ModelHelper(name='model', init_params=init_params, arg_scope=arg_scope) num_conv_layers = 0 layer_num = 0 layer_sizes = {} prev_layer_name = '' for layer in model.layers: inb_node = layer._inbound_nodes[0] num_input_layers = len(inb_node.inbound_layers) input_name_list = [] for ii in range(0, num_input_layers): inp_layer = inb_node.inbound_layers[ii] input_name_list.append(inp_layer.name) prev_layer_name = inp_layer.name if isinstance(inp_layer, keras.layers.Flatten): pass #pinb_node = inp_layer._inbound_nodes[0] #prev_layer_name = pinb_node.inbound_layers[0].name name = layer.name config = layer.get_config() inputShape = layer.input_shape outputShape = layer.output_shape if isinstance(layer, keras.engine.input_layer.InputLayer): input_sizes = (input_shape[2], input_shape[3]) layer_sizes[name] = input_sizes else: if (input_name_list[0] not in layer_sizes): raise ValueError("Can't find layer size for ", input_name_list[0]) else: input_sizes = layer_sizes[input_name_list[0]] layer_dim = len(outputShape) if (layer_dim == 4): if (keras_channel_last): out_sizes = (outputShape[1], outputShape[2]) else: out_sizes = (outputShape[2], outputShape[3]) elif (layer_dim == 2): out_sizes = (0, 0) #flattened else: raise ValueError( 'Unsupported layer dimension : {0}'.format(layer_dim)) if isinstance(layer, keras.layers.Flatten): tmp_prev = prev_layer_name if (keras_channel_last): tmp_prev = prev_layer_name + '_transpose' #nb, img_h, img_w, chan <-- nb, chan, img_h, img_w c2_layer = brew.transpose(caffe2_model, prev_layer_name, tmp_prev, axes=(0, 2, 3, 1)) c2_layer = caffe2_model.net.Flatten(tmp_prev, name) #print('FLatten previous layer ', prev_layer_name, ' current layer ', name , 'inputshape ', inputShape) layer_sizes[name] = out_sizes elif isinstance(layer, keras.layers.Dropout): #print('name is ', name, ' prev_layer_name ', prev_layer_name) c2_layer = caffe2_model.net.Dropout(prev_layer_name, name, is_test=True #ratio=config['rate'] ) #same size layer_sizes[name] = input_sizes elif (isinstance(layer, keras.layers.convolutional.Conv2D)): dim_in = inputShape[-1] dim_out = outputShape[-1] kernel = config['kernel_size'][0] stride = config['strides'][0] if (config['padding'] == 'same'): pad_sizes = get_padding_sizes(input_sizes, config['kernel_size'], config['strides']) elif (config['padding'] == 'valid'): pad_sizes = ((0, 0), (0, 0)) else: raise ValueError('unsupported padding') #print('pad sizes ', pad_sizes) layer_sizes[name] = out_sizes c2_layer = brew.conv(caffe2_model, prev_layer_name, name, dim_in=dim_in, dim_out=dim_out, kernel=kernel, stride=stride, pad_l=pad_sizes[0][0], pad_r=pad_sizes[0][1], pad_t=pad_sizes[1][0], pad_b=pad_sizes[1][1]) if config['activation'] == 'linear': pass elif config['activation'] == 'relu': c2_layer = brew.relu(caffe2_model, name, name) elif config['activation'] == 'softmax': #c2_layer = brew.softmax(caffe2_model, name, name) c2_layer = brew.softmax(caffe2_model, name, 'softmax') else: raise ValueError( 'The only supported activation for conv layer is relu') elif isinstance(layer, keras.layers.MaxPooling2D): kernel = config['pool_size'][0] stride = config['strides'][0] pad_size = ((0, 0), (0, 0)) layer_sizes[name] = out_sizes c2_layer = brew.max_pool(caffe2_model, prev_layer_name, name, kernel=kernel, stride=stride) elif isinstance(layer, keras.layers.AveragePooling2D): kernel = config['pool_size'][0] stride = config['strides'][0] pad_size = ((0, 0), (0, 0)) layer_sizes[name] = out_sizes c2_layer = brew.average_pool(caffe2_model, prev_layer_name, name, kernel=kernel, stride=stride) elif isinstance(layer, keras.layers.BatchNormalization): dim_in = inputShape[-1] epsilon = config['epsilon'] momentum = config['momentum'] c2_layer = brew.spatial_bn(caffe2_model, prev_layer_name, name, dim_in=dim_in, epsilon=epsilon, momentum=momentum, is_test=True) #same size layer_sizes[name] = input_sizes elif (isinstance(layer, keras.layers.core.Dense)): dim_in = inputShape[-1] dim_out = outputShape[-1] #print('input shape for dense is ', inputShape) if (len(inputShape) == 2): #flattened input c2_layer = brew.fc(caffe2_model, prev_layer_name, name, dim_in=dim_in, dim_out=dim_out) else: #fully convolutional input c2_layer = brew.conv(caffe2_model, prev_layer_name, name, dim_in=dim_in, dim_out=dim_out, kernel=1, stride=1) activation = config['activation'] if activation == 'relu': c2_layer = brew.relu(caffe2_model, name, name) elif activation == 'softmax': c2_layer = brew.softmax(caffe2_model, name, 'softmax') elif activation == 'linear': pass # else: raise ValueError( 'The only supported activations for fc layer are relu and softmax' ) #same size layer_sizes[name] = input_sizes elif (isinstance(layer, keras.layers.advanced_activations.LeakyReLU)): dim_in = inputShape[-1] c2_layer = caffe2_model.net.LeakyRelu(prev_layer_name, name, alpha=config['alpha']) #same size layer_sizes[name] = input_sizes elif (isinstance(layer, keras.layers.merge.Add)): c2_layer = brew.sum(caffe2_model, [input_name_list[0], input_name_list[1]], name) #same size layer_sizes[name] = input_sizes layer_num = layer_num + 1 if (layer_num == len(model.layers)): caffe2_model.net.AddExternalOutput(c2_layer) return caffe2_model
def build_crf_net(self, input_blob, initial_state, transitions): ''' Adds the crf_net recurrent operator to the model. model: model_helper.ModelHelper object new operators would be added to input_blob: the input sequence in a format T x N x D where T is sequence size, N - batch size and D - input dimention ##Only supports batch-size 1## seq_lengths: blob containing sequence lengths (unused) ''' scope = 'crf_net' def s(name): '' # We have to manually scope due to our internal/external blob # relationships. return "{}/{}".format(str(scope), str(name)) step_model = model_helper.ModelHelper(name='crf_step', param_model=self.model) input_t, cell_t_prev, _ = ( step_model.net.AddExternalInputs( core.ScopedBlobReference('input_t'), core.ScopedBlobReference('cell_t_prev'), transitions ) ) zero_segment_id = step_model.param_init_net.ConstantFill( [], [s('zero_segment_id')], value=0, shape=[self.num_classes_padded], dtype=core.DataType.INT32, ) # A hack to bypass model cloning for test step_model.param_init_net.AddExternalOutput(zero_segment_id) """ the CRF step """ # Do tile prev_transpose = brew.transpose( step_model, cell_t_prev, [s('prev_transpose')], axes=(0, 2, 1), ) prev_tiled = step_model.net.Tile( prev_transpose, [s('prev_tiled')], tiles=self.num_classes_padded, axis=2, ) input_t_tiled = step_model.net.Tile( input_t, [s('input_t_tiled')], tiles=self.num_classes_padded, axis=1, ) input_with_prev = step_model.net.Add( [prev_tiled, input_t_tiled], [s('input_with_prev')] ) all_with_transitions = step_model.net.Add( [input_with_prev, transitions], [s('prev_with_transitions')], broadcast=1, use_grad_hack=1, ) all_with_transitions_reshaped, _ = step_model.net.Reshape( all_with_transitions, [s('all_with_transitions_reshaped'), s('all_with_transitions_orig')], shape=(self.num_classes_padded, self.num_classes_padded) ) cell_t = step_model.net.SortedSegmentRangeLogSumExp( [all_with_transitions_reshaped, zero_segment_id], [s('cell_t')], ) step_model.net.AddExternalOutputs(cell_t) """ recurrent network """ cell_input_blob = initial_state out_all, out_last = recurrent.recurrent_net( net=self.model.net, cell_net=step_model.net, inputs=[(input_t, input_blob)], initial_cell_inputs=[ (cell_t_prev, cell_input_blob), ], links={ cell_t_prev: cell_t, }, scope=scope, outputs_with_grads=(1,) ) return out_last