def extract_dense(self, input_name, dense_name, scope_id, scope_name="dense"): self.scopes[scope_id] = scope_name self.scopes[scope_id + 1] = "kernel" kernel_name = self.generate_name(self.scopes, scope_id + 2) kernel = self.get_tensor(kernel_name) self.scopes[scope_id + 1] = "bias" bias_name = self.generate_name(self.scopes, scope_id + 2) bias = self.get_tensor(bias_name) layer = caffe_net.LayerParameter(name=dense_name, type='InnerProduct', bottom=[input_name], top=[dense_name]) num_output = len(kernel[0]) kernel = kernel.transpose((1, 0)) layer.inner_product_param(num_output, bias_term=bias is not None) if bias is not None: if len(bias) != num_output: print("[ERROR] extract_dense failed") exit(0) layer.add_data(kernel, bias) else: layer.add_data(kernel) self.caffe_model.add_layer(layer) self.data_dict[dense_name] = Operators.fully_connect( self.data_dict[input_name], kernel.transpose((1, 0)), bias, dense_name) return dense_name
def extract_dense(self, input_name, output_name, scope_id, scope_name="dense"): if (isinstance(scope_name, str)): layer_names = [scope_name, "kernel", "bias"] elif (isinstance(scope_name, list)): layer_names = scope_name else: print("[ERROR] unsupported dense scope_name") exit(1) kernel, bias = self.get_weights(scope_id, layer_names) layer = caffe_net.LayerParameter(name=output_name, type='InnerProduct', bottom=[input_name], top=[output_name]) num_output = len(kernel[0]) kernel = kernel.transpose((1, 0)) layer.inner_product_param(num_output, bias_term=bias is not None) if bias is not None: if len(bias) != num_output: print("[ERROR] extract_dense failed") exit(0) layer.add_data(kernel, bias) else: layer.add_data(kernel) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.fully_connect( self.data_dict[input_name], kernel.transpose((1, 0)), bias, output_name) return output_name
def generate(self, input = None): #batch_seq_length = 16 input_text_name = "input_text" input_text_shape = [self.batch, 16] self.add_input(input_text_name, input_text_shape) self.set_input(input) #embedding x = self.extract_embedding(input_text_name, 0, "emb_table", "embedding_lookup") #bilstm x = self.extract_lstm(x, None, "BiLSTM", 0, steps = -2, scope_name = ["BiLSTM/fw/lstm_cell", "BiLSTM/bw/lstm_cell"]) #FC weight = self.get_weight("W") bias = self.get_weight("b") layer = caffe_net.LayerParameter("wb_fc_output", type='InnerProduct', bottom=[x], top=["wb_fc_output"]) num_output = len(weight[0]) weight = weight.transpose((1,0)) layer.inner_product_param(num_output, bias_term=bias is not None) if len(bias) != num_output: print("[ERROR] extract_dense failed") exit(0) layer.add_data(weight, bias) self.caffe_model.add_layer(layer) self.data_dict["wb_fc_output"] = Operators.fully_connect(self.data_dict[x], weight.transpose((1, 0)), bias, "wb_fc_output") x = "wb_fc_output" #softmax x = self.add_softmax(x, "softmax_output", -1) #argmax x = self.add_argmax(x, -1, "output") self.save_caffe_model()
def extract_denses(self, input_name, output_names, output_nums, scope_id, scope_name="dense", share_index=0, share_num=1): if (isinstance(scope_name, str)): layer_names = [scope_name, "kernel", "bias"] elif (isinstance(scope_name, list)): layer_names = scope_name else: print("[ERROR] unsupported dense scope_name") exit(1) kernels, biases = self.get_weights(scope_id, layer_names) if (share_num == 1): last_sum = 0 for index in range(len(output_nums)): kernel = kernels[:, last_sum:last_sum+output_nums[index]] bias = None if biases is not None: bias = biases[last_sum:last_sum+output_nums[index]] layer = caffe_net.LayerParameter(name=output_names[index], type='InnerProduct', bottom=[input_name], top=[output_names[index]]) num_output = len(kernel[0]) kernel = kernel.transpose((1, 0)) layer.inner_product_param(num_output, bias_term=bias is not None) if bias is not None: if len(bias) != num_output: print("[ERROR] extract_denses failed") exit(0) layer.add_data(kernel, bias) else: layer.add_data(kernel) self.caffe_model.add_layer(layer) self.data_dict[output_names[index]] = Operators.fully_connect(self.data_dict[input_name], kernel.transpose((1, 0)), bias, output_names[index]) last_sum = last_sum + output_nums[index] if (last_sum != len(kernels[0])): print("[ERROR] extract_denses failed") exit(0) else: self.scopes[scope_id] = layer_names[0] kernel_name = self.generate_name(self.scopes, scope_id+1) + "/kernel" bias_name = self.generate_name(self.scopes, scope_id+1) + "/bias" if (share_index == 0): self.add_weight(kernel_name, weight=kernels) if (biases is not None): self.add_weight(bias_name, weight=biases) tmp_name = self.add_matmul(input_name, kernel_name, self.generate_name(self.scopes, scope_id+1)+"/matmul"+str(share_index)) if (biases is not None): tmp_name = self.add_sum([tmp_name, bias_name], self.generate_name(self.scopes, scope_id+1)+"/sum"+str(share_index)) slice_point = [] last_sum = 0 for i in range(len(output_nums)-1): last_sum = last_sum + output_nums[i] slice_point.append(last_sum) shape_len = len(self.get_tensor_shape(self.generate_name(self.scopes, scope_id+1)+"/matmul"+str(share_index))) self.add_slice(tmp_name, output_names, shape_len-1, slice_point) return output_names
def extract_embeddings(self, word_input_name, position_input_name, token_input_name): # embedding block self.scopes[1] = "embeddings" # word embedding word_embedding_name_1 = "we_1" self.extract_embedding(word_input_name, 2, "word_embeddings", word_embedding_name_1) self.scopes[2] = "word_embeddings_2" weight_name = self.generate_name(self.scopes, 3) weight = self.get_tensor(weight_name) word_embedding_name_2 = "we_2" layer = caffe_net.LayerParameter(name=word_embedding_name_2, type='InnerProduct', bottom=[word_embedding_name_1], top=[word_embedding_name_2]) num_output = len(weight[0]) layer.inner_product_param(num_output, bias_term=False) layer.add_data(weight) self.caffe_model.add_layer(layer) self.data_dict[word_embedding_name_2] = Operators.fully_connect( self.data_dict[word_embedding_name_1], weight, None, word_embedding_name_2) # position embedding position_embedding_name = "pe" self.extract_embedding(position_input_name, 2, "position_embeddings", position_embedding_name) # token type embedding token_type_embedding_name = "tte" self.extract_embedding(token_input_name, 2, "token_type_embeddings", token_type_embedding_name) # eltwise sum_name = "embedding_sum" self.add_sum([ word_embedding_name_2, position_embedding_name, token_type_embedding_name ], sum_name) # layer norm layer_norm_name = "embedding_ln" self.extract_layer_norm(sum_name, layer_norm_name, 2) return layer_norm_name
def extract_dense_prefix(self, input_name, dense_name, weight_name_prefix): kernel_name = weight_name_prefix + "weight" bias_name = weight_name_prefix + "bias" kernel = self.get_weight(kernel_name) bias = self.get_weight(bias_name) layer = caffe_net.LayerParameter(name=dense_name, type='InnerProduct', bottom=[input_name], top=[dense_name]) num_output = len(kernel) layer.inner_product_param(num_output, bias_term=bias is not None) if bias is not None: if len(bias) != num_output: print("[ERROR] extract extract_dense_prefix") layer.add_data(kernel, bias) else: layer.add_data(kernel) self.caffe_model.add_layer(layer) self.data_dict[dense_name] = Operators.fully_connect( self.data_dict[input_name], kernel.transpose((1, 0)), bias, dense_name)
def extract_dense(self, input_name, output_name, scope_id, scope_name="dense", share_index=0, share_num=1): if (isinstance(scope_name, str)): layer_names = [scope_name, "kernel", "bias"] elif (isinstance(scope_name, list)): layer_names = scope_name else: print("[ERROR] unsupported dense scope_name") exit(1) kernel, bias = self.get_weights(scope_id, layer_names) if (share_num == 1): layer = caffe_net.LayerParameter(name=output_name, type='InnerProduct', bottom=[input_name], top=[output_name]) num_output = len(kernel[0]) kernel = kernel.transpose((1, 0)) layer.inner_product_param(num_output, bias_term=bias is not None) if bias is not None: if len(bias) != num_output: print("[ERROR] extract_dense failed") exit(0) layer.add_data(kernel, bias) else: layer.add_data(kernel) self.caffe_model.add_layer(layer) self.data_dict[output_name] = Operators.fully_connect(self.data_dict[input_name], kernel.transpose((1, 0)), bias, output_name) else: self.scopes[scope_id] = layer_names[0] kernel_name = self.generate_name(self.scopes, scope_id+1) + "/kernel" bias_name = self.generate_name(self.scopes, scope_id+1) + "/bias" if (share_index == 0): self.add_weight(kernel_name, weight=kernel) if (bias is not None): self.add_weight(bias_name, weight=bias) tmp_name = self.add_matmul(input_name, kernel_name, output_name+"/matmul"+str(share_index)) if (bias is not None): self.add_sum([tmp_name, bias_name], output_name) return output_name