Exemplo n.º 1
0
 def extract_lstm(self, input_name, state_name, output_name, scope_id,
         steps=-1, scope_name="basic_lstm_cell",
         use_proj=False, zoneoutCell=0, zoneoutOutput=0):
     if (isinstance(scope_name, str)):
         scope_name = [scope_name]
     kernels = []
     biases = []
     projections = []
     projection_biases = []
     for i in range(len(scope_name)):
         kernel, bias = self.get_weights(scope_id, [scope_name[i], "kernel", "bias"])
         projection_size = 0;
         projection = None
         if (use_proj):
             self.scopes[scope_id] = scope_name[i]
             projection, projection_bias = self.get_weights(scope_id+1, ["projection", "kernel", "bias"])
             projection_size = projection.shape[0]
         num_output_4 = len(kernel[0])
         if (bias is not None):
             if (len(bias) != num_output_4):
                 print("[ERROR] extract_lstm failed")
                 exit(0)
         if (use_proj):
             num_output = projection.shape[1]
         else:
             num_output = num_output_4 // 4
         if (len(kernel) != self.get_tensor_shape(input_name)[-1] + num_output):
             kernel_2, bias_2 = self.get_weights(scope_id, [scope_name[i], "recurrent_kernel", "bias"])
             kernel = np.concatenate([kernel, kernel_2], axis = 0)
         kernels.append(kernel.transpose([1, 0]))
         if (bias is None):
             bias = np.zeros([num_output_4 // 2])
         biases.append(bias)
         if (use_proj):
             projections.append(projection.transpose([1, 0]))
             if (projection_bias is not None):
                 projection_bias = np.zeros(num_output)
             projection_biases.append(projection_bias)
         else:
             projections.append(None)
             projection_biases.append(None)
     bottom = [input_name]
     if (state_name is not None):
         bottom.append(state_name)
     layer = caffe_net.LayerParameter(name=output_name, type='LSTM',
                 bottom=bottom, top=[output_name])
     layer.lstm_param(num_output, steps, projection_size, zoneoutCell, zoneoutOutput)
     if (use_proj):
         if (projection_biases[0] is not None):
             layer.add_data(np.concatenate(kernels, axis=0), np.concatenate(biases, axis=0),
                 np.concatenate(projections, axis=0), np.concatenate(projection_biases, axis=0))
         else:
             layer.add_data(np.concatenate(kernels, axis=0), np.concatenate(biases, axis=0),
                 np.concatenate(projections, axis=0))
     else:
         layer.add_data(np.concatenate(kernels, axis=0),
             np.concatenate(biases, axis=0))
     self.caffe_model.add_layer(layer)
     #if (len(scope_name) == 1):
     if (steps >= 0):
         self.data_dict[output_name] = Operators.fw_lstm(self.data_dict[input_name],
             kernels[0],
             biases[0],
             projections[0],
             projection_biases[0],
             zoneoutCell, zoneoutOutput,
             output_name)
     elif (steps == -1):
         self.data_dict[output_name], self.data_dict[state_name] = Operators.lstm(self.data_dict[input_name],
             self.data_dict[state_name],
             kernels[0],
             biases[0],
             projections[0],
             projection_biases[0],
             zoneoutCell, zoneoutOutput,
             output_name,
             state_name)
     #elif (len(scope_name) == 2):
     elif (steps == -2):
         self.data_dict[output_name] = Operators.bi_lstm(self.data_dict[input_name],
             kernels,
             biases,
             projections,
             projection_biases,
             zoneoutCell, zoneoutOutput,
             output_name)
     return output_name