def build_computation_graph(self): """Builds the computation graph.""" # initialize the word embeddings using the pre-trained embedding file embeddings, emb_dim = load_embeddings_file(self.embeds, self.languages, lower=self.lower) self.w_in_dim = emb_dim num_words = len(set(embeddings.keys()).union(set(self.word2id.keys()))) self.wembeds = self.model.add_lookup_parameters((num_words, emb_dim)) self.oov_id = set(range(num_words)) #Find words which do not appear in the pre-trained embeddings #by removing words which have appeared for i, word in enumerate(embeddings.keys()): if word not in self.word2id: self.word2id[word] = len(self.word2id.keys()) self.wembeds.init_row(self.word2id[word], embeddings[word]) self.oov_id.remove(self.word2id[word]) layers = [] # inner layers output_layers_dict = {} # from task_name to actual predictor # we have a separate layer for each task for cross-stitching; # otherwise just 1 layer for all tasks with hard parameter sharing num_task_layers = len(self.task_names) if self.cross_stitch else 1 cross_stitch_layers = [] for layer_num in range(self.h_layers): print(">>> %d layer_num" % layer_num, flush=True) input_dim = self.w_in_dim if layer_num == 0 \ else self.h_dim task_layers = [] # get one layer per task for cross-stitching or just one layer for task_id in range(num_task_layers): builder = dynet.LSTMBuilder(1, input_dim, self.h_dim, self.model) task_layers.append(BiRNNSequencePredictor(builder)) layers.append(task_layers) if self.cross_stitch: print('Using cross-stitch units after layer %d...' % layer_num, flush=True) cross_stitch_layers.append( CrossStitchLayer(self.model, len(self.task_names), self.h_dim, self.num_subspaces, self.cross_stitch_init_scheme)) layer_stitch_layers = [] # store at which layer to predict task for task_name in self.task_names: task_num_labels = len(self.task2label2id[task_name]) print('Using an MLP for task losses.', flush=True) input_dim = self.h_dim * 2 activation = dynet.softmax layer_output = None if ('sentiment' in task_name): #Multi-label classification #use one binary classification layer for each label layer_output = [] for _ in range(task_num_labels): layer_output.append( Layer(self.model, input_dim, 2, activation, mlp=True)) else: layer_output = Layer(self.model, input_dim, task_num_labels, activation, mlp=True) output_layers_dict[task_name] = layer_output #sequence_predictor if (self.h_layers > 1): # w/o cross-stitching, we only use one LayerStitchLayer layer_stitch_layers.append( LayerStitchLayer(self.model, self.h_layers, self.h_dim, self.layer_stitch_init_scheme)) print('#\nOutput layers: %d\n' % len(output_layers_dict), flush=True) predictors = dict() predictors["inner"] = layers predictors['cross_stitch'] = cross_stitch_layers predictors['layer_stitch'] = layer_stitch_layers predictors["output_layers_dict"] = output_layers_dict return predictors
def __init__(self, vocab, pos, rels, w2i, c2i, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) #if options.learning_rate is not None: # self.trainer = AdamTrainer(self.model, alpha=options.learning_rate) # print("Adam initial learning rate:", options.learning_rate) self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))} self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.bibiFlag = options.bibiFlag self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.pos = {word: ind for ind, word in enumerate(pos)} self.id2pos = {ind: word for ind, word in enumerate(pos)} self.c2i = c2i self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels self.pdims = options.pembedding_dims self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims)) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims)) if options.external_embedding is not None: ext_embeddings, ext_emb_dim = load_embeddings_file(options.external_embedding, lower=True) assert (ext_emb_dim == self.wdims) print("Initializing word embeddings by pre-trained vectors") count = 0 for word in self.vocab: _word = unicode(word, "utf-8") if _word in ext_embeddings: count += 1 self.wlookup.init_row(self.vocab[word], ext_embeddings[_word]) print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count)) self.pos_builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model)] self.pos_bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] if self.bibiFlag: self.builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model)] self.bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] elif self.layers > 0: self.builders = [VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model), VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.pdims, self.ldims, self.model)] else: self.builders = [SimpleRNNBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model)] self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.pos), softmax)) self.hidden_units = options.hidden_units self.hidBias = self.model.add_parameters((self.ldims * 8)) self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8)) self.hid2Bias = self.model.add_parameters((self.hidden_units)) self.outLayer = self.model.add_parameters((1, self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) if self.labelsFlag: self.rhidBias = self.model.add_parameters((self.ldims * 8)) self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8)) self.rhid2Bias = self.model.add_parameters((self.hidden_units)) self.routLayer = self.model.add_parameters( (len(self.irels), self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) self.routBias = self.model.add_parameters((len(self.irels))) self.ffRelPredictor = FFSequencePredictor( Layer(self.model, self.hidden_units if self.hidden_units > 0 else self.ldims * 8, len(self.irels), softmax)) self.char_rnn = RNNSequencePredictor(LSTMBuilder(1, self.cdims, self.cdims, self.model))
def __init__(self, vocab, pos, rels, w2i, c2i, m2i, t2i, morph_dict, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) #if options.learning_rate is not None: # self.trainer = AdamTrainer(self.model, alpha=options.learning_rate) # print("Adam initial learning rate:", options.learning_rate) self.activations = {'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x)))} self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.bibiFlag = options.bibiFlag self.morphFlag = options.morphFlag self.goldMorphFlag = options.goldMorphFlag self.morphTagFlag = options.morphTagFlag self.goldMorphTagFlag = options.goldMorphTagFlag self.lowerCase = options.lowerCase self.mtag_encoding_composition_type = options.mtag_encoding_composition_type self.mtag_encoding_composition_alpha = options.mtag_encoding_composition_alpha self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.mdims = options.membedding_dims self.tdims = options.tembedding_dims self.cdims = options.cembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in iter(w2i.items())} self.pos = {word: ind for ind, word in enumerate(pos)} self.id2pos = {ind: word for ind, word in enumerate(pos)} self.c2i = c2i self.m2i = m2i self.t2i = t2i self.i2t = {t2i[i]:i for i in self.t2i} self.morph_dict = morph_dict self.rels = {word: ind for ind, word in enumerate(rels)} self.irels = rels self.pdims = options.pembedding_dims self.tagging_attention_size = options.tagging_att_size self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters((len(vocab) + 3, self.wdims)) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.plookup = self.model.add_lookup_parameters((len(pos), self.pdims)) self.ext_embeddings = None if options.external_embedding is not None: ext_embeddings, ext_emb_dim = load_embeddings_file(options.external_embedding, lower=self.lowerCase, type=options.external_embedding_type) assert (ext_emb_dim == self.wdims) print("Initializing word embeddings by pre-trained vectors") count = 0 for word in self.vocab: if word in ext_embeddings: count += 1 self.wlookup.init_row(self.vocab[word], ext_embeddings[word]) self.ext_embeddings = ext_embeddings print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count)) self.morph_dims = 2*2*self.mdims if self.morphFlag else 0 self.mtag_dims = 2*self.tdims if self.morphTagFlag else 0 self.pos_builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model)] self.pos_bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] if self.bibiFlag: self.builders = [VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model)] self.bbuilders = [VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model)] elif self.layers > 0: self.builders = [VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model), VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims + self.pdims, self.ldims, self.model)] else: self.builders = [SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.morph_dims + self.mtag_dims, self.ldims, self.model)] self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.pos), softmax)) self.hidden_units = options.hidden_units self.hidBias = self.model.add_parameters((self.ldims * 8)) self.hidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8)) self.hid2Bias = self.model.add_parameters((self.hidden_units)) self.outLayer = self.model.add_parameters((1, self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) if self.labelsFlag: self.rhidBias = self.model.add_parameters((self.ldims * 8)) self.rhidLayer = self.model.add_parameters((self.hidden_units, self.ldims * 8)) self.rhid2Bias = self.model.add_parameters((self.hidden_units)) self.routLayer = self.model.add_parameters( (len(self.irels), self.hidden_units if self.hidden_units > 0 else self.ldims * 8)) self.routBias = self.model.add_parameters((len(self.irels))) self.ffRelPredictor = FFSequencePredictor( Layer(self.model, self.hidden_units if self.hidden_units > 0 else self.ldims * 8, len(self.irels), softmax)) self.char_rnn = RNNSequencePredictor(LSTMBuilder(1, self.cdims, self.cdims, self.model)) if self.morphFlag: self.seg_lstm = [VanillaLSTMBuilder(1, self.cdims, self.cdims, self.model), VanillaLSTMBuilder(1, self.cdims, self.cdims, self.model)] self.seg_hidLayer = self.model.add_parameters((1, self.cdims*2)) self.slookup = self.model.add_lookup_parameters((len(self.c2i), self.cdims)) self.char_lstm = [VanillaLSTMBuilder(1, self.cdims, self.mdims, self.model), VanillaLSTMBuilder(1, self.cdims, self.mdims, self.model)] self.char_hidLayer = self.model.add_parameters((self.mdims, self.mdims*2)) self.mclookup = self.model.add_lookup_parameters((len(self.c2i), self.cdims)) self.morph_lstm = [VanillaLSTMBuilder(1, self.mdims*2, self.wdims, self.model), VanillaLSTMBuilder(1, self.mdims*2, self.wdims, self.model)] self.morph_hidLayer = self.model.add_parameters((self.wdims, self.wdims*2)) self.mlookup = self.model.add_lookup_parameters((len(m2i), self.mdims)) self.morph_rnn = RNNSequencePredictor(LSTMBuilder(1, self.mdims*2, self.mdims*2, self.model)) if self.morphTagFlag: # All weights for morpheme taging will be here. (CURSOR) # Decoder self.dec_lstm = VanillaLSTMBuilder(1, 2 * self.cdims + self.tdims + self.cdims * 2, self.cdims, self.model) # Attention self.attention_w1 = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2)) self.attention_w2 = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2)) self.attention_v = self.model.add_parameters((1, self.tagging_attention_size)) # Attention Context self.attention_w1_context = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2)) self.attention_w2_context = self.model.add_parameters((self.tagging_attention_size, self.cdims * 2)) self.attention_v_context = self.model.add_parameters((1, self.tagging_attention_size)) # MLP - Softmax self.decoder_w = self.model.add_parameters((len(t2i), self.cdims)) self.decoder_b = self.model.add_parameters((len(t2i))) self.mtag_rnn = RNNSequencePredictor(VanillaLSTMBuilder(1, self.tdims, self.tdims, self.model)) self.tlookup = self.model.add_lookup_parameters((len(t2i), self.tdims)) if self.mtag_encoding_composition_type != "None": self.mtag_encoding_f_w = self.model.add_parameters((2 * self.tdims, 4 * self.tdims)) self.mtag_encoding_f_b = self.model.add_parameters((2 * self.tdims)) self.mtag_encoding_b_w = self.model.add_parameters((2 * self.tdims, 4 * self.tdims)) self.mtag_encoding_b_b = self.model.add_parameters((2 * self.tdims))
def build_computation_graph(self, num_words, num_chars): """Builds the computation graph.""" # initialize the word embeddings if self.embeds_file: print('Loading embeddings', flush=True) embeddings, emb_dim = load_embeddings_file(self.embeds_file, lower=self.lower) assert (emb_dim == self.in_dim) # initialize all words with embeddings; for very large vocabularies, # we don't want to do this num_words = len( set(embeddings.keys()).union(set(self.word2id.keys()))) # init model parameters and initialize them wembeds = self.model.add_lookup_parameters( (num_words, self.in_dim)) cembeds = self.model.add_lookup_parameters( (num_chars, self.c_in_dim)) for i, word in enumerate(embeddings.keys()): if word not in self.word2id: self.word2id[word] = len(self.word2id.keys()) wembeds.init_row(self.word2id[word], embeddings[word]) print('Initialized %d word embeddings...' % i, flush=True) else: wembeds = self.model.add_lookup_parameters( (num_words, self.in_dim)) cembeds = self.model.add_lookup_parameters( (num_chars, self.c_in_dim)) layers = [] # inner layers output_layers_dict = {} # from task_name to actual softmax predictor task_expected_at = {} # maps task_name => output_layer id # connect output layers to tasks for output_layer_id, task_name in zip(self.pred_layer, self.task_names): assert output_layer_id <= self.h_layers,\ ('Error: Task cannot be predicted at layer beyond model. ' 'Increase h_layers.') task_expected_at[task_name] = output_layer_id print('Task expected at', task_expected_at, flush=True) print('h_layers:', self.h_layers, flush=True) # we have a separate layer for each task for cross-stitching; # otherwise just 1 layer for all tasks with hard parameter sharing num_task_layers = len(self.task_names) if self.cross_stitch else 1 cross_stitch_layers = [] for layer_num in range(self.h_layers): print(">>> %d layer_num" % layer_num, flush=True) input_dim = self.in_dim + self.c_in_dim * 2 if layer_num == 0 \ else self.h_dim task_layers = [] # get one layer per task for cross-stitching or just one layer for task_id in range(num_task_layers): builder = dynet.LSTMBuilder(1, input_dim, self.h_dim, self.model) task_layers.append(BiRNNSequencePredictor(builder)) layers.append(task_layers) if self.cross_stitch: print('Using cross-stitch units after layer %d...' % layer_num, flush=True) cross_stitch_layers.append( CrossStitchLayer(self.model, len(self.task_names), self.h_dim, self.num_subspaces, self.cross_stitch_init_scheme)) layer_stitch_layers = [] # store at which layer to predict task for task_name in self.task_names: task_num_labels = len(self.task2tag2idx[task_name]) # use a small MLP both for the task losses print('Using an MLP for task losses.', flush=True) # if we concatenate, the FC layer has to have a larger input_dim input_dim = self.h_dim * 2 * self.h_layers\ if self.layer_connect == CONCAT else self.h_dim * 2 layer_output = Layer(self.model, input_dim, task_num_labels, dynet.softmax, mlp=True) sequence_predictor = SequencePredictor(layer_output) output_layers_dict[task_name] = sequence_predictor if self.layer_connect == STITCH: print('Using layer-stitch units for task %s...' % task_name, flush=True) # w/o cross-stitching, we only use one LayerStitchLayer layer_stitch_layers.append( LayerStitchLayer(self.model, self.h_layers, self.h_dim, self.layer_stitch_init_scheme)) print('#\nOutput layers: %d\n' % len(output_layers_dict), flush=True) # initialize the char RNN char_rnn = RNNSequencePredictor( dynet.LSTMBuilder(1, self.c_in_dim, self.c_in_dim, self.model)) predictors = dict() predictors["inner"] = layers predictors['cross_stitch'] = cross_stitch_layers predictors['layer_stitch'] = layer_stitch_layers predictors["output_layers_dict"] = output_layers_dict predictors["task_expected_at"] = task_expected_at return predictors, char_rnn, wembeds, cembeds
def __init__(self, vocab, ner, rels, w2i, c2i, options): self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) if options.learning_rate is not None: self.trainer = AdamTrainer(self.model, alpha=options.learning_rate) print("Adam initial learning rate:", options.learning_rate) self.activations = { 'tanh': tanh, 'sigmoid': logistic, 'relu': rectify, 'tanh3': (lambda x: tanh(cwise_multiply(cwise_multiply(x, x), x))) } self.activation = self.activations[options.activation] self.blstmFlag = options.blstmFlag self.labelsFlag = options.labelsFlag self.costaugFlag = options.costaugFlag self.bibiFlag = options.bibiFlag self.ldims = options.lstm_dims self.wdims = options.wembedding_dims self.cdims = options.cembedding_dims self.layers = options.lstm_layers self.wordsCount = vocab self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()} self.ner = {word: ind for ind, word in enumerate(ner)} self.id2ner = {ind: word for ind, word in enumerate(ner)} self.c2i = c2i self.rels = {word: ind for ind, word in enumerate(rels)} self.id2rels = rels # print self.rels # print self.id2rels self.nerdims = options.nembedding_dims self.mixture_weight = options.mixture_weight #self.posCount = postagCount #self.pos2id = {word: ind + 1 for ind, word in enumerate(postagCount.keys())} #self.pdims = options.pembedding_dims self.vocab['*PAD*'] = 1 self.vocab['*INITIAL*'] = 2 self.wlookup = self.model.add_lookup_parameters( (len(vocab) + 3, self.wdims)) self.clookup = self.model.add_lookup_parameters((len(c2i), self.cdims)) self.nerlookup = self.model.add_lookup_parameters( (len(ner), self.nerdims)) #self.plookup = self.model.add_lookup_parameters((len(postagCount.keys()) + 1, self.pdims)) if options.external_embedding is not None: ext_embeddings, ext_emb_dim = load_embeddings_file( options.external_embedding, lower=True) assert (ext_emb_dim == self.wdims) print("Initializing word embeddings by pre-trained vectors") count = 0 for word in self.vocab: _word = unicode(word, "utf-8") if _word in ext_embeddings: count += 1 self.wlookup.init_row(self.vocab[word], ext_embeddings[_word]) print("Vocab size: %d; #words having pretrained vectors: %d" % (len(self.vocab), count)) self.ner_builders = [ VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2, self.ldims, self.model) ] self.ner_bbuilders = [ VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model) ] if self.bibiFlag: self.builders = [ VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model), VanillaLSTMBuilder(1, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model) ] self.bbuilders = [ VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model), VanillaLSTMBuilder(1, self.ldims * 2, self.ldims, self.model) ] elif self.layers > 0: self.builders = [ VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model), VanillaLSTMBuilder(self.layers, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model) ] else: self.builders = [ SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model), SimpleRNNBuilder(1, self.wdims + self.cdims * 2 + self.nerdims, self.ldims, self.model) ] # self.ffSeqPredictor = FFSequencePredictor(Layer(self.model, self.ldims * 2, len(self.ner), softmax)) self.hidden_units = options.hidden_units self.char_rnn = RNNSequencePredictor( LSTMBuilder(1, self.cdims, self.cdims, self.model)) self.crf_module = CRF(self.model, self.id2ner) self.tanh_layer_W = self.model.add_parameters( (self.hidden_units, 2 * self.ldims)) self.tanh_layer_b = self.model.add_parameters((self.hidden_units)) self.last_layer_W = self.model.add_parameters( (len(self.ner), self.hidden_units)) self.last_layer_b = self.model.add_parameters((len(self.ner))) W = orthonormal_initializer(self.hidden_units, 2 * self.ldims) self.head_layer_W = self.model.parameters_from_numpy(W) self.head_layer_b = self.model.add_parameters( (self.hidden_units, ), init=dynet.ConstInitializer(0.)) self.dep_layer_W = self.model.parameters_from_numpy(W) self.dep_layer_b = self.model.add_parameters( (self.hidden_units, ), init=dynet.ConstInitializer(0.)) self.rel_U = self.model.add_parameters( (len(self.rels) * self.hidden_units, self.hidden_units), init=dynet.ConstInitializer(0.)) self.rel_W = self.model.parameters_from_numpy( orthonormal_initializer(len(self.rels), 2 * self.hidden_units)) #self.rel_W = self.model.add_parameters((len(self.rels), self.hidden_units * 2)) self.rel_b = self.model.add_parameters((len(self.rels), ), init=dynet.ConstInitializer(0.))