class SRLNetwork(GradientOptimizable): def __init__(self, problem_character = None, nn_architecture = None, trans_mat_prior = None): # x shape: [mini-batch size, feature-dim]. # In this problem [mini-batch feature-dim] if ( problem_character is None or nn_architecture is None): raise Exception("both problem and architecture must be provided") word_num = problem_character['word_num'] POS_type_num = problem_character['POS_type_num'] dist_to_verb_num = problem_character['dist_to_verb_num'] dist_to_word_num = problem_character['dist_to_word_num'] # 1,word vector # output shape: (batch size,sentence_len, word_feature_num) self.word_embedding_layer = LookupTableLayer( table_size = word_num, feature_num = nn_architecture.word_feature_dim ) # 3,word POS tag vector # output shape: (batch size,sentence_len, POS_feature_num) self.pos_embedding_layer = LookupTableLayer( table_size = POS_type_num, feature_num = nn_architecture.pos_feature_dim, ) # self.loc_embedding_layer = LookupTableLayer( # table_size = loc_type_num, # feature_num = nn_architecture.dist_feature_dim, # ) # 5,distance tag vector # output shape: (batch size,sentence_len, POS_feature_num) self.locdiff_word_embedding_layer = LookupTableLayer( table_size = dist_to_word_num, feature_num = nn_architecture.dist_feature_dim, ) self.locdiff_verb_embedding_layer = LookupTableLayer( table_size = dist_to_verb_num, feature_num = nn_architecture.dist_feature_dim, ) conv_input_dim = nn_architecture.word_feature_dim * 3 + \ nn_architecture.pos_feature_dim * 3 + \ nn_architecture.dist_feature_dim * 4 conv_shape = (nn_architecture.conv_output_dim, 1, nn_architecture.conv_window_height, conv_input_dim) self.conv_layer = Conv1DMaxPoolLayer( activator_type="sigmoid", tensor_shape = conv_shape) self.embedding_conv_layers = [self.word_embedding_layer, self.pos_embedding_layer, self.locdiff_word_embedding_layer, self.locdiff_verb_embedding_layer, self.conv_layer] input_dim = nn_architecture.conv_output_dim self.perception_layers = [] for idx, output_dim in enumerate(nn_architecture.hidden_layer_output_dims): hidden_layer = PerceptionLayer( input_dim = input_dim, output_dim = output_dim, activator_type = "sigmoid") self.perception_layers.append(hidden_layer) input_dim = output_dim out_layer = PerceptionLayer( input_dim = input_dim, output_dim = problem_character["SRL_type_num"], activator_type = "softmax") self.perception_layers.append(out_layer) self.cost = create_cost({"type": "cross_entropy"}) # self.output_layer = PathTransitionLayer('output', # class_num=SRL_type_num, # trans_mat_prior= trans_mat_prior) # self.output_layer = SoftMaxLayer(n_in= nn_architecture.hidden_layer_output_dims[-1], # n_out = SRL_type_num,) X = theano.tensor.matrix("X") self.__output_func = theano.function([X], outputs = self.__output(X)) self.__predict_expr = theano.tensor.argmax(self.__output(X), axis = 1) self.__predict_func = theano.function([X], outputs = self.__predict_expr) def __output(self, X): # X.sentence_word_id = [] #当前句子的全局word id 列表 # X.sentence_pos_id = [] #当前句子的全局词性 id 列表 # # #每个<word, verb> pair 一条记录 # X.cur_word_id = [] # 当前word 的词id # X.cur_verb_id = [] # 当前verb 的词id # X.cur_word_pos_id = [] # 当前word的词性 id # X.cur_verb_pos_id = [] # 当前verb的词性 id # X.cur_word_loc_id = [] # 当前word的位置 id # NOT IN USE # X.cur_verb_loc_id = [] # 当前verb的位置 id # NOT IN USE # X.cur_word2verb_dist_id = [] # 当前word 到 当前verb的位置距离 id # X.cur_verb2word_dist_id = [] # 当前verb 到 当前word的位置距离 id # X.other_word2verb_dist_id = [] # 其他word 到当前verb的位置距离 id # NOT IN USE # X.other_word2word_dist_id = [] # 其他word 到当前word的位置距离 id # NOT IN USE start_idx = 0 sentence_len = X[0, start_idx].astype('int32') start_idx += 1 sentence_word_id = X[0, start_idx:start_idx+sentence_len].astype('int32') start_idx += sentence_len sentence_pos_id = X[0, start_idx:start_idx+sentence_len].astype('int32') start_idx += sentence_len cur_word_id = X[:, start_idx].astype('int32') start_idx += 1 cur_verb_id = X[:, start_idx].astype('int32') start_idx += 1 cur_word_pos_id = X[:, start_idx].astype('int32') start_idx += 1 cur_verb_pos_id = X[:, start_idx].astype('int32') start_idx += 1 cur_word_loc_id = X[:, start_idx].astype('int32') start_idx += 1 cur_verb_loc_id = X[:, start_idx].astype('int32') start_idx += 1 cur_word2verb_dist_id = X[:, start_idx].astype('int32') start_idx += 1 cur_verb2word_dist_id = X[:,start_idx].astype('int32') start_idx += 1 other_word2verb_dist_id = X[:, start_idx:start_idx+sentence_len].astype('int32') start_idx += sentence_len other_word2word_dist_id = X[:, start_idx:start_idx+sentence_len].astype('int32') start_idx += sentence_len wordvec = self.word_embedding_layer.output( inputs = cur_word_id #word_id_input ) verbvec = self.word_embedding_layer.output( inputs = cur_verb_id #verb_id_input ) wordPOSvec = self.pos_embedding_layer.output( inputs = cur_word_pos_id #word_pos_input ) verbPOSvec = self.pos_embedding_layer.output( inputs = cur_verb_pos_id #verb_pos_input ) # wordlocvec = self.loc_embedding_layer.output( # inputs = word_loc_input, # ) # verblocvec = self.loc_embedding_layer.output( # inputs = verb_loc_input, # ) locdiff_word2verb_vec = self.locdiff_verb_embedding_layer.output( inputs = cur_word2verb_dist_id ) locdiff_verb2word_vec = self.locdiff_word_embedding_layer.output( inputs = cur_verb2word_dist_id ) sentence_word_vec = self.word_embedding_layer.output( inputs = sentence_word_id, ) sentence_pos_vec = self.pos_embedding_layer.output( inputs = sentence_pos_id, ) other_loc2word_vec = self.locdiff_word_embedding_layer.output( inputs = other_word2word_dist_id ) other_loc2verb_vec = self.locdiff_verb_embedding_layer.output( inputs = other_word2verb_dist_id ) batch_size = sentence_len conv_input_feature = T.concatenate( ( wordvec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2), verbvec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2), wordPOSvec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2), verbPOSvec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2), locdiff_word2verb_vec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2), locdiff_verb2word_vec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2), sentence_word_vec.dimshuffle("x", "x", 0, 1).repeat(batch_size, axis=0), sentence_pos_vec.dimshuffle("x", "x", 0, 1).repeat(batch_size, axis=0), other_loc2word_vec.dimshuffle(0, "x", 1, 2), other_loc2verb_vec.dimshuffle(0, "x", 1, 2), ), axis=3 ) conv_out = self.conv_layer.output(conv_input_feature).reshape((batch_size, -1)) layer_input = conv_out for layer in self.perception_layers: layer_input = layer.output(layer_input) return layer_input def predict(self, X): return self.__predict_func(X) def predict_prob(self,X): return self.__output_func(X) def object_gradient(self, X, y): object_expr = self.cost.cost(self.__output(X), y) params = self.params() grad = T.grad(object_expr, params) gradient_vec = [] for param in grad: gradient_vec.append(param.flatten()) gradient_expr = theano.tensor.concatenate(gradient_vec) return [object_expr, gradient_expr] def get_parameter(self): all_layes = self.embedding_conv_layers + self.perception_layers param_vec = [layer.get_parameter() for layer in all_layes] return numpy.concatenate(param_vec) def set_parameter(self, param_vec): all_layes = self.embedding_conv_layers + self.perception_layers parameter_size_vec = [layer.get_parameter_size() for layer in all_layes] start_idx = [0] + list(numpy.cumsum(parameter_size_vec)) for idx, layer in enumerate(all_layes): layer.set_parameter(param_vec[start_idx[idx]:start_idx[idx] + parameter_size_vec[idx]]) def params(self): all_layes = self.embedding_conv_layers + self.perception_layers return list(itertools.chain.from_iterable([layer.params() for layer in all_layes])) def __getstate__(self): state = dict() state['name'] = "srl-machine" state['word_embedding_layer'] = self.word_embedding_layer.__getstate__() state['word_conv_layer'] = self.word_conv_layer.__getstate__() state['pos_embedding_layer'] = self.pos_embedding_layer.__getstate__() state['pos_conv_layer'] = self.pos_conv_layer.__getstate__() state['loc_embedding_layer'] = self.loc_embedding_layer.__getstate__() state['locdiff_word_embedding_layer'] = self.locdiff_word_embedding_layer.__getstate__() state['locdiff_word_conv_layer'] = self.locdiff_word_conv_layer.__getstate__() state['locdiff_verb_embedding_layer'] = self.locdiff_verb_embedding_layer.__getstate__() state['locdiff_verb_conv_layer'] = self.locdiff_verb_conv_layer.__getstate__() for idx, hidden_layer in enumerate(self.perception_layers): state['hidden_layer_' + str(idx)] = hidden_layer.__getstate__() state['output_layer'] = self.output_layer.__getstate__() return state def __setstate__(self, state): assert state['name'] == "srl-machine" self.word_embedding_layer = LookupTableLayer() self.word_embedding_layer.__setstate__(state["word_embedding_layer"]) self.pos_embedding_layer = LookupTableLayer() self.pos_embedding_layer.__setstate__(state["pos_embedding_layer"]) self.loc_embedding_layer = LookupTableLayer() self.loc_embedding_layer.__setstate__(state["loc_embedding_layer"]) self.locdiff_word_embedding_layer = LookupTableLayer() self.locdiff_word_embedding_layer.__setstate__(state["locdiff_word_embedding_layer"]) self.locdiff_verb_embedding_layer = LookupTableLayer() self.locdiff_verb_embedding_layer.__setstate__(state["locdiff_verb_embedding_layer"]) self.word_conv_layer = Conv1DMaxPoolLayer() self.word_conv_layer.__setstate__(state["word_conv_layer"]) self.pos_conv_layer = Conv1DMaxPoolLayer() self.pos_conv_layer.__setstate__(state["pos_conv_layer"]) self.locdiff_word_conv_layer = Conv1DMaxPoolLayer() self.locdiff_word_conv_layer.__setstate__(state["locdiff_word_conv_layer"]) self.locdiff_verb_conv_layer = Conv1DMaxPoolLayer() self.locdiff_verb_conv_layer.__setstate__(state["locdiff_verb_conv_layer"])