Exemplo n.º 1
0
    def extract_lstm(self,
                     input_name,
                     state_name,
                     output_name,
                     scope_id,
                     steps=-1,
                     scope_name="basic_lstm_cell"):
        self.scopes[scope_id] = scope_name
        self.scopes[scope_id + 1] = "kernel"
        kernel_name = self.generate_name(self.scopes, scope_id + 2)
        kernel = self.get_tensor(kernel_name)
        self.scopes[scope_id + 1] = "bias"
        bias_name = self.generate_name(self.scopes, scope_id + 2)
        bias = self.get_tensor(bias_name)

        layer = caffe_net.LayerParameter(name=output_name,
                                         type='LSTM',
                                         bottom=[input_name, state_name],
                                         top=[output_name])

        num_output_4 = len(kernel[0])
        if (bias is not None):
            if (len(bias) != num_output_4):
                print("[ERROR] extract_lstm failed")
                exit(0)
        num_output = num_output_4 // 4
        layer.lstm_param(num_output, steps)
        layer.add_data(kernel.transpose([1, 0]), bias)
        self.caffe_model.add_layer(layer)
        self.data_dict[output_name], self.data_dict[
            state_name] = Operators.lstm(self.data_dict[input_name],
                                         self.data_dict[state_name], kernel,
                                         bias, output_name, state_name)
        return output_name
Exemplo n.º 2
0
 def extract_lstm(self,
                  input_name,
                  state_name,
                  output_name,
                  scope_id,
                  steps=-1,
                  scope_name="basic_lstm_cell",
                  use_proj=False,
                  zoneout_cell=0,
                  zoneout_output=0):
     if (isinstance(scope_name, str)):
         scope_name = [scope_name]
     kernels = []
     biases = []
     projections = []
     projection_biases = []
     for i in range(len(scope_name)):
         kernel, bias = self.get_weights(scope_id,
                                         [scope_name[i], "kernel", "bias"])
         projection_size = 0
         projection = None
         if (use_proj):
             self.scopes[scope_id] = scope_name[i]
             projection, projection_bias = self.get_weights(
                 scope_id + 1, ["projection", "kernel", "bias"])
             projection_size = projection.shape[0]
         num_output_4 = len(kernel[0])
         if (bias is not None):
             if (len(bias) != num_output_4):
                 print("[ERROR] extract_lstm failed")
                 exit(0)
         if (use_proj):
             num_output = projection.shape[1]
         else:
             num_output = num_output_4 // 4
         kernels.append(kernel.transpose([1, 0]))
         if (bias is None):
             bias = np.zeros([num_output_4 // 2])
         biases.append(bias)
         if (use_proj):
             projections.append(projection.transpose([1, 0]))
             if (projection_bias is not None):
                 projection_bias = np.zeros(num_output)
             projection_biases.append(projection_bias)
         else:
             projections.append(None)
             projection_biases.append(None)
     bottom = [input_name]
     if (state_name is not None):
         bottom.append(state_name)
     layer = caffe_net.LayerParameter(name=output_name,
                                      type='LSTM',
                                      bottom=bottom,
                                      top=[output_name])
     layer.lstm_param(num_output, steps, projection_size, zoneout_cell,
                      zoneout_output)
     if (use_proj):
         layer.add_data(np.concatenate(kernels, axis=0),
                        np.concatenate(biases, axis=0),
                        np.concatenate(projections, axis=0),
                        np.concatenate(projection_biases, axis=0))
     else:
         layer.add_data(np.concatenate(kernels, axis=0),
                        np.concatenate(biases, axis=0))
     self.caffe_model.add_layer(layer)
     if (len(scope_name) == 1):
         self.data_dict[
             output_name], self.data_dict[state_name] = Operators.lstm(
                 self.data_dict[input_name], self.data_dict[state_name],
                 kernels[0], biases[0], projections[0],
                 projection_biases[0], zoneout_cell, zoneout_output,
                 output_name, state_name)
     elif (len(scope_name) == 2):
         self.data_dict[output_name] = Operators.bi_lstm(
             self.data_dict[input_name], kernels, biases, projections,
             projection_biases, zoneout_cell, zoneout_output, output_name)
     return output_name