def __init__(self, tagset_size, num_lstm_layers, hidden_dim, word_embeddings, train_vocab_ctr, use_char_rnn, charset_size, vocab_size=None, word_embedding_dim=None): self.model = dy.Model() self.tagset_size = tagset_size self.train_vocab_ctr = train_vocab_ctr if word_embeddings is not None: # Use pretrained embeddings vocab_size = word_embeddings.shape[0] word_embedding_dim = word_embeddings.shape[1] self.words_lookup = self.model.add_lookup_parameters((vocab_size, word_embedding_dim)) self.words_lookup.init_from_array(word_embeddings) else: self.words_lookup = self.model.add_lookup_parameters((vocab_size, word_embedding_dim)) # Char LSTM Parameters self.use_char_rnn = use_char_rnn if use_char_rnn: self.char_lookup = self.model.add_lookup_parameters((charset_size, 20)) self.char_bi_lstm = dy.BiRNNBuilder(1, 20, 128, self.model, dy.LSTMBuilder) # Word LSTM parameters if use_char_rnn: input_dim = word_embedding_dim + 128 else: input_dim = word_embedding_dim self.word_bi_lstm = dy.BiRNNBuilder(num_lstm_layers, input_dim, hidden_dim, self.model, dy.LSTMBuilder) # Matrix that maps from Bi-LSTM output to num tags self.lstm_to_tags_params = self.model.add_parameters((tagset_size, hidden_dim)) self.lstm_to_tags_bias = self.model.add_parameters(tagset_size) self.mlp_out = self.model.add_parameters((tagset_size, tagset_size)) self.mlp_out_bias = self.model.add_parameters(tagset_size)
def __init__(self, vocab_size, char_size, char_embedding_dim, char_hidden_size, word_embedding_dim, hidden_dim, label_size, lstm_num_layers, pattern_hidden_dim, pattern_embeddings_dim, rule_size, max_rule_length): self.vocab_size = vocab_size self.char_size = char_size self.word_embedding_dim = word_embedding_dim self.char_embedding_dim = char_embedding_dim self.hidden_dim = hidden_dim self.model = dy.Model() self.trainer = dy.SimpleSGDTrainer(self.model) self.label_size = label_size self.lstm_num_layers = lstm_num_layers self.char_hidden_size = char_hidden_size self.rule_size = rule_size self.max_rule_length = max_rule_length self.pattern_hidden_dim = pattern_hidden_dim self.pattern_embeddings_dim = pattern_embeddings_dim self.word_embeddings = self.model.add_lookup_parameters( (self.vocab_size, self.word_embedding_dim)) self.char_embeddings = self.model.add_lookup_parameters( (self.char_size, self.char_embedding_dim)) self.character_lstm = dy.BiRNNBuilder( self.lstm_num_layers, self.char_embedding_dim, self.char_hidden_size, self.model, dy.VanillaLSTMBuilder, ) self.encoder_lstm = dy.BiRNNBuilder( self.lstm_num_layers, self.word_embedding_dim, # + char_hidden_size, self.hidden_dim, self.model, dy.VanillaLSTMBuilder, ) self.attention_weight = self.model.add_parameters((1, self.hidden_dim)) self.lb = self.model.add_parameters( (self.hidden_dim, 2 * self.hidden_dim)) self.lb_bias = self.model.add_parameters((self.hidden_dim)) self.lb2 = self.model.add_parameters((1, self.hidden_dim)) self.lb2_bias = self.model.add_parameters((1)) self.pattern_embeddings = self.model.add_lookup_parameters( (self.rule_size, self.pattern_embeddings_dim)) self.decoder_lstm = dy.LSTMBuilder( self.lstm_num_layers, self.hidden_dim + self.pattern_embeddings_dim, self.pattern_hidden_dim, self.model, ) self.pt = self.model.add_parameters( (self.rule_size, self.pattern_hidden_dim + self.hidden_dim)) self.pt_bias = self.model.add_parameters((self.rule_size))
def __init__(self, tagset_sizes, num_lstm_layers, hidden_dim, word_embeddings, no_we_update, use_char_rnn, charset_size, char_embedding_dim, att_props=None, vocab_size=None, word_embedding_dim=None): ''' :param tagset_sizes: dictionary of attribute_name:number_of_possible_tags :param num_lstm_layers: number of desired LSTM layers :param hidden_dim: size of hidden dimension (same for all LSTM layers, including character-level) :param word_embeddings: pre-trained list of embeddings, assumes order by word ID (optional) :param no_we_update: if toggled, don't update embeddings :param use_char_rnn: use "char->tag" option, i.e. concatenate character-level LSTM outputs to word representations (and train underlying LSTM). Only 1-layer is supported. :param charset_size: number of characters expected in dataset (needed for character embedding initialization) :param char_embedding_dim: desired character embedding dimension :param att_props: proportion of loss to assign each attribute for back-propagation weighting (optional) :param vocab_size: number of words in model (ignored if pre-trained embeddings are given) :param word_embedding_dim: desired word embedding dimension (ignored if pre-trained embeddings are given) ''' self.model = dy.Model() self.tagset_sizes = tagset_sizes self.attributes = tagset_sizes.keys() self.we_update = not no_we_update if att_props is not None: self.att_props = defaultdict(float, {att:(1.0-p) for att,p in att_props.iteritems()}) else: self.att_props = None if word_embeddings is not None: # Use pretrained embeddings vocab_size = word_embeddings.shape[0] word_embedding_dim = word_embeddings.shape[1] self.words_lookup = self.model.add_lookup_parameters((vocab_size, word_embedding_dim)) if word_embeddings is not None: self.words_lookup.init_from_array(word_embeddings) # Char LSTM Parameters self.use_char_rnn = use_char_rnn if use_char_rnn: self.char_lookup = self.model.add_lookup_parameters((charset_size, char_embedding_dim)) self.char_bi_lstm = dy.BiRNNBuilder(1, char_embedding_dim, hidden_dim, self.model, dy.LSTMBuilder) # Word LSTM parameters if use_char_rnn: input_dim = word_embedding_dim + hidden_dim else: input_dim = word_embedding_dim self.word_bi_lstm = dy.BiRNNBuilder(num_lstm_layers, input_dim, hidden_dim, self.model, dy.LSTMBuilder) # Matrix that maps from Bi-LSTM output to num tags self.lstm_to_tags_params = {} self.lstm_to_tags_bias = {} self.mlp_out = {} self.mlp_out_bias = {} for att, set_size in tagset_sizes.items(): self.lstm_to_tags_params[att] = self.model.add_parameters((set_size, hidden_dim)) self.lstm_to_tags_bias[att] = self.model.add_parameters(set_size) self.mlp_out[att] = self.model.add_parameters((set_size, set_size)) self.mlp_out_bias[att] = self.model.add_parameters(set_size)
def __init__( self, model, tag_vocab, word_vocab, label_vocab, tag_embedding_dim, word_embedding_dim, label_embedding_dim, lstm_layers, lstm_dim, label_hidden_dim, split_hidden_dim, dropout, ): self.spec = locals() self.spec.pop("self") self.spec.pop("model") self.model = model.add_subcollection("Parser") self.tag_vocab = tag_vocab self.word_vocab = word_vocab self.label_vocab = label_vocab self.lstm_dim = lstm_dim self.tag_embeddings = self.model.add_lookup_parameters( (tag_vocab.size, tag_embedding_dim)) self.word_embeddings = self.model.add_lookup_parameters( (word_vocab.size, word_embedding_dim)) self.label_embeddings = self.model.add_lookup_parameters( (label_vocab.size, label_embedding_dim)) self.lstm = dy.BiRNNBuilder(lstm_layers, tag_embedding_dim + word_embedding_dim, 2 * lstm_dim, self.model, dy.VanillaLSTMBuilder) self.lstm2 = dy.BiRNNBuilder(lstm_layers, label_embedding_dim, 2 * lstm_dim, self.model, dy.VanillaLSTMBuilder) self.f_label = Feedforward(self.model, 2 * lstm_dim, [label_hidden_dim], label_vocab.size) self.f_connect = Feedforward(self.model, 2 * lstm_dim, [split_hidden_dim], 2) self.crf = crf(self.model, label_vocab.size) self.crf_connect = crf(self.model, 2) self.dropout = dropout
def __init__(self, config, model): self.num_layers = 1 self.input_dim = config.embedding_dim self.model = model self.use_char_rnn = config.use_char_rnn self.char_rnn = CharRNN(config, model) if self.use_char_rnn else None input_size = self.input_dim if not self.char_rnn else self.input_dim + config.charlstm_hidden_dim self.bilstm = dy.BiRNNBuilder(1, input_size, config.hidden_dim, self.model, dy.LSTMBuilder) print("Input to word-level BiLSTM size: %d" % (input_size)) print("BiLSTM hidden size: %d" % (config.hidden_dim)) # self.bilstm.set_dropout(config.dropout_bilstm) self.num_labels = len(config.label2idx) self.label2idx = config.label2idx self.labels = config.idx2labels # print(config.hidden_dim) # self.tanh_w = self.model.add_parameters((config.tanh_hidden_dim, config.hidden_dim)) # self.tanh_bias = self.model.add_parameters((config.tanh_hidden_dim,)) self.linear_w = self.model.add_parameters( (self.num_labels, config.hidden_dim)) self.linear_bias = self.model.add_parameters((self.num_labels, )) self.transition = self.model.add_lookup_parameters( (self.num_labels, self.num_labels)) vocab_size = len(config.word2idx) self.word2idx = config.word2idx print("Word Embedding size: %d x %d" % (vocab_size, self.input_dim)) self.word_embedding = self.model.add_lookup_parameters( (vocab_size, self.input_dim), init=config.word_embedding) self.dropout = config.dropout
def setUp(self): self.file = "bilstm.model" # create models self.m = dy.ParameterCollection() self.m2 = dy.ParameterCollection() # Create birnn self.b = dy.BiRNNBuilder(2, 10, 10, self.m, dy.LSTMBuilder)
def __init__(self, words, embeds, embeds_dim, hidden_size, projection_size, target_size): self.words = words self.vocab_size = len(words) self.embeds_dim = embeds_dim self.hidden_size = hidden_size self.embeds = embeds self.projection_size = projection_size self.target_size = target_size self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model) # print(self.trainer.learning_rate) if np.any(embeds): self.word_embeddings = self.model.lookup_parameters_from_numpy(self.embeds) else: self.word_embeddings = self.model.add_lookup_parameters((self.vocab_size, self.embeds_dim)) self.encoder_lstm = dy.BiRNNBuilder( 2, self.embeds_dim, self.hidden_size, self.model, dy.VanillaLSTMBuilder, ) self.Phis = list() for i in range(self.projection_size): self.Phis.append(self.model.add_parameters((self.hidden_size, self.hidden_size))) self.W = self.model.add_parameters((target_size, projection_size)) self.b = self.model.add_parameters((target_size))
def __init__(self, model, byte_embed_dim, hash_dim): model = self.model = model.add_subcollection(self.__class__.__name__) self.spec = byte_embed_dim, hash_dim self.byte_embeds = Embedder(model, range(256), byte_embed_dim) self.hashLSTM = dy.BiRNNBuilder(1, byte_embed_dim, hash_dim, model, dy.LSTMBuilder)
def __init__(self, tagset_size, num_lstm_layers, hidden_dim, input_dim, model=None): if model is None: self.model = dy.Model() else: self.model = model self.tagset_size = tagset_size + 2 self.START = tagset_size self.STOP = tagset_size + 1 # LSTM parameters self.bi_lstm = dy.BiRNNBuilder(num_lstm_layers, input_dim, hidden_dim, self.model, dy.LSTMBuilder) # Matrix that maps from Bi-LSTM output to num tags self.lstm_to_tags_params = self.model.add_parameters( (self.tagset_size, hidden_dim)) self.lstm_to_tags_bias = self.model.add_parameters(self.tagset_size) self.mlp_out = self.model.add_parameters( (self.tagset_size, self.tagset_size)) self.mlp_out_bias = self.model.add_parameters(self.tagset_size) # Transition matrix for tagging layer, [i,j] is score of transitioning to i from j self.transitions = self.model.add_lookup_parameters( (self.tagset_size, self.tagset_size))
def __init__(self, vocab_size, output_size, embed_size=86, hidden_size=8, embeddings=None): self.name = self.__class__.__name__ self.model = dy.ParameterCollection() self.trainer = dy.SimpleSGDTrainer(self.model) # Embedding if embeddings is None: self.lookup = self.model.add_lookup_parameters( (vocab_size, embed_size)) else: self.lookup = self.model.lookup_parameters_from_numpy(embeddings) (embed_size, vocab_size), _ = self.lookup.dim() # Bi-LSTM self.bilstm = dy.BiRNNBuilder(num_layers=1, input_dim=embed_size, hidden_dim=hidden_size * 2, model=self.model, rnn_builder_factory=dy.LSTMBuilder) self.num_tags = output_size # Dense layer self.w = self.model.add_parameters((self.num_tags, hidden_size * 2)) self.b = self.model.add_parameters(self.num_tags) # For CRF self.trans_mat = self.model.add_parameters( (self.num_tags, self.num_tags))
def __init__(self, tagset_size, num_lstm_layers, hidden_dim, word_embeddings, morpheme_embeddings, morpheme_projection, morpheme_decomps, train_vocab_ctr): self.model = dy.Model() self.tagset_size = tagset_size self.train_vocab_ctr = train_vocab_ctr # Word embedding parameters vocab_size = word_embeddings.shape[0] word_embedding_dim = word_embeddings.shape[1] self.words_lookup = self.model.add_lookup_parameters((vocab_size, word_embedding_dim)) self.words_lookup.init_from_array(word_embeddings) # Morpheme embedding parameters # morpheme_vocab_size = morpheme_embeddings.shape[0] # morpheme_embedding_dim = morpheme_embeddings.shape[1] # self.morpheme_lookup = self.model.add_lookup_parameters((morpheme_vocab_size, morpheme_embedding_dim)) # self.morpheme_lookup.init_from_array(morpheme_embeddings) # self.morpheme_decomps = morpheme_decomps # if morpheme_projection is not None: # self.morpheme_projection = self.model.add_parameters((word_embedding_dim, morpheme_embedding_dim)) # self.morpheme_projection.init_from_array(morpheme_projection) # else: # self.morpheme_projection = None # LSTM parameters self.bi_lstm = dy.BiRNNBuilder(num_lstm_layers, word_embedding_dim, hidden_dim, self.model, dy.LSTMBuilder) # Matrix that maps from Bi-LSTM output to num tags self.lstm_to_tags_params = self.model.add_parameters((tagset_size, hidden_dim)) self.lstm_to_tags_bias = self.model.add_parameters(tagset_size) self.mlp_out = self.model.add_parameters((tagset_size, tagset_size)) self.mlp_out_bias = self.model.add_parameters(tagset_size) # Transition matrix for tagging layer, [i,j] is score of transitioning to i from j self.transitions = self.model.add_lookup_parameters((tagset_size, tagset_size))
def setUp(self): self.file = "bilstm.model" # create models self.m = dy.Model() self.m2 = dy.Model() # Create birnn self.b = dy.BiRNNBuilder(2, 10, 10, self.m, dy.LSTMBuilder)
def init_builder(self, encoder_spec, model): params = self.use_params(encoder_spec, [ "layers", "input_dim", "hidden_dim", model, dy.VanillaLSTMBuilder, "dropout" ], map_to_default_layer_dim=["hidden_dim"]) self.dropout = params.pop() self.builder = dy.BiRNNBuilder(*params)
def setUp(self): self.file = "bilstm.model" self.m = dy.ParameterCollection() self.m2 = dy.ParameterCollection() self.b = dy.BiRNNBuilder(2, 10, 10, self.m, dy.LSTMBuilder) # Custom parameters self.W1 = self.m.add_parameters(10) self.W2 = self.m.add_parameters(12)
def __init__( self, model, tag_vocab, char_vocab, word_vocab, char_embedding_dim, char_lstm_layers, char_lstm_dim, word_embedding_dim, lstm_layers, lstm_dim, label_hidden_dim, dropout, ): self.spec = locals() self.spec.pop("self") self.spec.pop("model") self.model = model.add_subcollection("BiLSTMTagger") self.tag_vocab = tag_vocab self.char_vocab = char_vocab self.word_vocab = word_vocab self.char_lstm_dim = char_lstm_dim self.lstm_dim = lstm_dim self.char_embeddings = self.model.add_lookup_parameters( (char_vocab.size, char_embedding_dim)) self.word_embeddings = self.model.add_lookup_parameters( (word_vocab.size, word_embedding_dim)) self.trans_embeddings = self.model.add_lookup_parameters( (tag_vocab.size, tag_vocab.size)) self.char_lstm = dy.BiRNNBuilder(char_lstm_layers, char_embedding_dim, 2 * char_lstm_dim, self.model, dy.VanillaLSTMBuilder) self.lstm = dy.BiRNNBuilder(lstm_layers, 2 * char_lstm_dim + word_embedding_dim, 2 * lstm_dim, self.model, dy.VanillaLSTMBuilder) self.f_label = utils.OutputLayer(self.model, 2 * lstm_dim, label_hidden_dim, self.tag_vocab.size) self.dropout = dropout
def init_rnn_params(self, indexed_dim): rnn = dy.BiRNNBuilder( self.lstm_layers, self.lstm_layer_dim if self.embedding_layers else indexed_dim, self.lstm_layer_dim, self.model, self.rnn_builder()) self.params["birnn"] = rnn return [ p for f, b in rnn.builder_layers for r in (f, b) for l in r.get_parameters() for p in l ]
def __init__(self, args, model, c2i, emb, inf_rules, dev_sents): super().__init__(args, model) self.pred_key = 'token-oword' # identify the prediction self.train_input_key = 'gold_linearized_tokens' # or 'linearized tokens' self.train_output_key = 'gold_linearized_tokens' # or 'linearized tokens' if 'swap' in self.args.tasks: self.pred_input_key = 'sorted_tokens' elif 'gen' in self.args.tasks: self.pred_input_key = 'generated_tokens' elif any(task in ['tsp', 'tsp-full', 'lin'] for task in self.args.tasks): self.pred_input_key = 'linearized_tokens' else: self.pred_input_key = 'input_tokens' self.pred_output_key = 'inflected_tokens' self.vec_key = 'inf_vec' if 'seq' in self.args.tree_vecs: self.seq_encoder = SeqEncoder(self.args, self.model, 'inf_seq') if 'bag' in self.args.tree_vecs: self.bag_encoder = BagEncoder(self.args, self.model, 'inf_bag') if 'tree' in self.args.tree_vecs: self.tree_encoder = TreeEncoder(self.args, self.model, 'inf_tree') self.c2i = c2i self.emb = emb self.i2e = ['<=>', '</$>', '✓', '✗'] + [c for c in c2i.keys()] # assert len(self.i2e) == len(set(self.i2e)) self.e2i = {e: i for i, e in enumerate(self.i2e)} self.max_len = 100 self.lstm_encoder = dy.BiRNNBuilder(1, self.args.char_dim, self.args.char_dim, self.model, dy.VanillaLSTMBuilder) self.lstm_decoder = dy.VanillaLSTMBuilder( 1, 2 * self.args.char_dim + self.args.token_dim, self.args.char_dim, self.model) self.attention = dm.Attention(self.model, self.args.char_dim + 1, self.args.char_dim, self.args.char_dim) self.mlp = dm.MLP(self.model, self.args.char_dim, len(self.i2e), self.args.hid_dim) self.init_c = self.model.add_parameters(2 * self.args.char_dim) self.empty = self.model.add_parameters(self.args.char_dim) self.log( f'Initialized <{self.__class__.__name__}>, params = {self.model.parameter_count()}' ) self.inf_rules = {} if not self.args.no_inf_rules: self.inf_rules = inf_rules if dev_sents: self.eval_rules(dev_sents)
def _init_pool(self, dsz, layers=1, **kwargs): hsz = kwargs.get('rnnsz', kwargs.get('hsz', 100)) if type(hsz) is list: hsz = hsz[0] self.rnn = dy.BiRNNBuilder(layers, dsz, hsz, self.pc, dy.VanillaLSTMBuilder) def pool(input_, lengths): return rnn_encode(self.rnn, input_, lengths) return hsz, pool
def __init__(self, pc, V, embed_dim, hidden_dim, num_layers=1, embeddings=None): self.embeddings = embeddings or pc.add_lookup_parameters( (V, embed_dim), _initializer) self.encoder = dy.BiRNNBuilder(num_layers, embed_dim, hidden_dim, pc, dy.VanillaLSTMBuilder)
def __init__(self, model, tag_vocab, word_vocab, label_vocab, _, word_embedding_dim, lstm_layers, lstm_dim, label_hidden_dim, __, dropout, use_elmo=True, predict_pos=True): self.spec = locals() self.spec.pop("self") self.spec.pop("model") self.use_elmo = use_elmo self.model = model.add_subcollection("Parser") self.mlp = self.model.add_subcollection("mlp") self.tag_vocab = tag_vocab print('tag vocab', tag_vocab.size) self.word_vocab = word_vocab self.label_vocab = label_vocab self.lstm_dim = lstm_dim self.hidden_dim = label_hidden_dim lstm_input_dim = word_embedding_dim if self.use_elmo: self.elmo_weights = self.model.parameters_from_numpy( np.array([0.19608361, 0.53294581, -0.00724584]), name='elmo-averaging-weights') lstm_input_dim += 1024 self.lstm = dy.BiRNNBuilder(lstm_layers, lstm_input_dim, 2 * lstm_dim, self.model, dy.VanillaLSTMBuilder) self.f_encoding = Feedforward(self.mlp, 2 * lstm_dim, [], label_hidden_dim) self.f_label = Feedforward(self.mlp, label_hidden_dim, [], label_vocab.size) self.f_tag = Feedforward(self.mlp, label_hidden_dim, [], tag_vocab.size) self.word_embeddings = self.model.add_lookup_parameters( (word_vocab.size, word_embedding_dim)) self.dropout = dropout self.empty_label = () self.empty_label_index = self.label_vocab.index(self.empty_label)
def __init__(self, args, model, name='seq'): super().__init__(args, model) self.name = name self.token_seq_encoder = dy.BiRNNBuilder(1, self.args.token_dim, self.args.token_dim, self.model, dy.VanillaLSTMBuilder) self.special = self.model.add_lookup_parameters( (1, self.args.token_dim)) self.log( f'Initialized <{self.__class__.__name__}>, params = {self.model.parameter_count()}' )
def __init__(self, model, enc_vocab_size, enc_emb_dim, enc_hidden_dim, enc_num_layers, enc_lstm_dropout, enc_dropout): self.model = model self.enc_dropout = enc_dropout self.enc_lstm_dropout = enc_lstm_dropout self.enc_num_layers = enc_num_layers # layers self.embedding = self.model.add_lookup_parameters( (enc_vocab_size, enc_emb_dim)) self.rnn = dy.BiRNNBuilder(enc_num_layers, enc_emb_dim, enc_hidden_dim, self.model, dy.LSTMBuilder) # other initializations self._train() # default: train mode
def __init__(self, embedding, output_size, hidden_size, seed=1, crf=False, dropout_rate=0.5, optimizer="sgd", learning_rate=0.1): self.tmp_patience_filename = str(uuid4()) + ".model" self.set_seed(seed) self.model = dy.ParameterCollection() if optimizer == "sgd": self.trainer = dy.SimpleSGDTrainer(self.model, learning_rate=learning_rate) elif optimizer == "adam": self.trainer = dy.AdamTrainer(self.model, alpha=learning_rate) else: raise ValueError("Unknown optimizer") # CRF if crf: self.num_tags = output_size + 2 # Add 2 to account for start and end tags in CRF self.trans_mat = self.model.add_parameters( (self.num_tags, self.num_tags)) self._loss = self._calculate_crf_loss self._predict = self._crf_predict_sentence else: self.num_tags = output_size self._loss = self._calculate_loss self._predict = self._predict_sentence # Embedding self.lookup = self.model.lookup_parameters_from_numpy( embedding.vectors) (embed_size, _), _ = self.lookup.dim() # Bi-LSTM self.bilstm = dy.BiRNNBuilder(num_layers=2, input_dim=embed_size, hidden_dim=hidden_size * 2, model=self.model, rnn_builder_factory=dy.LSTMBuilder) # Dense layer self.w = self.model.add_parameters((self.num_tags, hidden_size * 2)) self.b = self.model.add_parameters(self.num_tags) self.dropout_rate = dropout_rate
def __init__(self, args, model, c2i, emb): super().__init__(args, model) self.train_input_key = 'gold_linearized_tokens' self.train_output_key = 'gold_contracted_tokens' self.pred_input_key = 'inflected_tokens' self.pred_output_key = 'contracted_tokens' self.vec_key = 'inf_vec' if 'seq' in self.args.tree_vecs: self.seq_encoder = SeqEncoder(self.args, self.model, 'con_seq') if 'bag' in self.args.tree_vecs: self.bag_encoder = BagEncoder(self.args, self.model, 'con_bag') if 'tree' in self.args.tree_vecs: self.tree_encoder = TreeEncoder(self.args, self.model, 'con_tree') self.c2i = c2i self.i2c = list(self.c2i.keys()) self.emb = emb self.dummies = model.add_lookup_parameters((4, self.args.token_dim)) self.group_mlp = dm.MLP(self.model, 5 * self.args.token_dim, 3, self.args.hid_dim) self.tok_lstm = dy.BiRNNBuilder(1, self.args.token_dim, self.args.token_dim, self.model, dy.VanillaLSTMBuilder) self.char_lstm = dy.BiRNNBuilder(1, self.args.char_dim, self.args.char_dim, self.model, dy.VanillaLSTMBuilder) self.init_c = self.model.add_parameters(2 * self.args.char_dim) self.lstm_decoder = dy.VanillaLSTMBuilder( 1, 2 * self.args.char_dim + self.args.token_dim, self.args.char_dim, self.model) self.attention = dm.Attention(self.model, self.args.char_dim, self.args.char_dim, self.args.char_dim) self.contract_mlp = dm.MLP(self.model, self.args.char_dim, len(self.i2c), self.args.hid_dim) self.empty = self.model.add_parameters(self.args.char_dim)
def __init__(self, param_collection, nlayers, input_dim, hidden_dim, name='BiLSTMEncoder'): super().__init__(param_collection, name) self.nlayers = nlayers self.input_dim = input_dim assert (hidden_dim % 2 == 0) # for bidirectionality self.hidden_dim = hidden_dim self.bilstm = dy.BiRNNBuilder(self.nlayers, self.input_dim, self.hidden_dim, self.pc, dy.LSTMBuilder) self.encodings_matrix = None
def init_indexed_input_params(self): """ Initialize BiLSTM builder :return: total output dimension of BiLSTM """ for i in range(1, self._embedding_layers + 1): in_dim = self._indexed_dim if i == 1 else self._embedding_layer_dim out_dim = self._embedding_layer_dim if i < self._embedding_layers else self._lstm_layer_dim self._params["W%de" % i] = self.model.add_parameters((out_dim, in_dim), init=self._init) self._params["b%de" % i] = self.model.add_parameters(out_dim, init=self._init) self._params["bilstm"] = dy.BiRNNBuilder(self._lstm_layers, self._lstm_layer_dim if self._embedding_layers else self._indexed_dim, self._lstm_layer_dim, self.model, dy.LSTMBuilder) return self._indexed_num * self._lstm_layer_dim
def __init__(self, model, char_embedding_dim, char_lstm_dim, char_lstm_layers, word_embedding_dim): self.model = model.add_subcollection('CharEmbedding') self.embedding_dim = word_embedding_dim self.char_vocab = Vocabulary.fromlist( [self.UNK, self.START, self.STOP] + self.VOCAB, unk_value=self.UNK) self.char_embeddings = self.model.add_lookup_parameters( (char_vocab.size, char_embedding_dim)) self.char_lstm = dy.BiRNNBuilder(char_lstm_layers, char_embedding_dim, 2 * char_lstm_dim, self.model, dy.VanillaLSTMBuilder) self.output = Affine(model, 2 * char_lstm_dim, word_embedding_dim)
def __init__(self, model, train_trees, trees_indices, dev_trees, word_vocab, label_vocab, nuclearity_vocab, embedding_dim, extra_dim, lstm_layers, lstm_dim, hidden_dim, dropout): self.spec = locals() self.spec.pop("self") self.spec.pop("model") if not model: self.model = dy.ParameterCollection() else: self.model = model self.parser_model = self.model.add_subcollection("Parser") self.embedding_dim = embedding_dim self.extra_dim = extra_dim self.lstm_layers = lstm_layers self.lstm_dim = lstm_dim self.hidden_dim = hidden_dim self.dropout = dropout self.train_trees = train_trees self.trees_indices = trees_indices self.dev_trees = dev_trees self.word_vocab = word_vocab self.label_vocab = label_vocab self.nuclearity_vocab = nuclearity_vocab logger.info("Word: {}, Label: {}".format(self.word_vocab.size, self.label_vocab.size)) self.word_embeddings = self.parser_model.add_lookup_parameters((self.word_vocab.size, self.embedding_dim)) self.lstm = dy.BiRNNBuilder(self.lstm_layers, self.embedding_dim + self.extra_dim, 2* self.lstm_dim, self.parser_model, dy.VanillaLSTMBuilder) self.labels_ff = Feedforward( self.parser_model, 2 * self.lstm_dim, [self.hidden_dim], self.label_vocab.size - 1) self.nuclearity_ff = Feedforward( self.parser_model, 2 * self.lstm_dim, [15], self.nuclearity_vocab.size - 2)
def __init__( self, model, tag_vocab, char_vocab, word_vocab, char_embedding_dim, char_lstm_layers, char_lstm_dim, word_embedding_dim, pos_embedding_dim, max_sent_len, label_hidden_dim, dropout, ): self.spec = locals() self.spec.pop("self") self.spec.pop("model") self.model = model.add_subcollection("AttentionTagger") self.tag_vocab = tag_vocab self.char_vocab = char_vocab self.word_vocab = word_vocab self.char_lstm_dim = char_lstm_dim self.char_embeddings = self.model.add_lookup_parameters( (char_vocab.size, char_embedding_dim)) self.word_embeddings = self.model.add_lookup_parameters( (word_vocab.size, word_embedding_dim)) self.pos_embeddings = self.model.add_lookup_parameters( (max_sent_len, pos_embedding_dim)) self.char_lstm = dy.BiRNNBuilder(char_lstm_layers, char_embedding_dim, 2 * char_lstm_dim, self.model, dy.VanillaLSTMBuilder) self.attention = utils.SingleHeadSelfAttentive( self.model, 2 * char_lstm_dim + word_embedding_dim + pos_embedding_dim, 2 * char_lstm_dim + word_embedding_dim + pos_embedding_dim, 2 * char_lstm_dim + word_embedding_dim + pos_embedding_dim) self.f_label = utils.OutputLayer( self.model, 2 * char_lstm_dim + word_embedding_dim + pos_embedding_dim, label_hidden_dim, self.tag_vocab.size) self.dropout = dropout
def __init__(self, model, input_size, parser_dim, num_layers, dropout): assert input_size % 2 == 0, 'hidden size must be even' self.model = model.add_subcollection('AttentionComposition') self.input_size = input_size self.parser_dim = parser_dim self.num_layers = num_layers self.dropout = dropout self.rnn = dy.BiRNNBuilder(num_layers, input_size, input_size, self.model, dy.VanillaLSTMBuilder) self.V = self.model.add_parameters( (input_size, input_size + parser_dim), init='glorot') self.gating = Affine(self.model, 2 * input_size, input_size) self.head = Affine(self.model, input_size, input_size) self.training = True