def __init__(self, model, training_src, training_tgt): self.model = model self.training_src, self.src_vocab, self.rsrc_vocab = self.change_word2id_genevoc( training_src) self.training_tgt, self.tgt_vocab, self.rtgt_vocab = self.change_word2id_genevoc_output( training_tgt) self.src_vocab_size = len(self.src_vocab) self.tgt_vocab_size = len(self.tgt_vocab) self.embed_size = 128 self.src_lookup = model.add_lookup_parameters( (self.src_vocab_size, self.embed_size)) self.tgt_lookup = model.add_lookup_parameters( (self.tgt_vocab_size, self.embed_size)) self.hidden_size = 128 self.layers = 1 self.contextsize = self.hidden_size * 2 self.l2r_builder = dy.GRUBuilder(self.layers, self.embed_size, self.hidden_size, model) self.r2l_builder = dy.GRUBuilder(self.layers, self.embed_size, self.hidden_size, model) self.dec_builder = dy.GRUBuilder(self.layers, self.embed_size + self.contextsize, self.hidden_size * 2, model) self.W_y = model.add_parameters(( self.tgt_vocab_size, self.hidden_size * 2 + self.contextsize, )) self.b_y = model.add_parameters(self.tgt_vocab_size) self.max_len = 50
def add_parameters(self, dropout, lstm_size, optimizer, model_type, gru=True): if model_type == "gru": self.encoder_rnn = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn.set_dropout(dropout) self.encoder_rnn2 = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn2.set_dropout(dropout) self.decoder_rnn = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE + lstm_size, lstm_size, self.model) self.decoder_rnn.set_dropout(dropout) else: self.encoder_rnn = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn.set_dropout(dropout) self.encoder_rnn2 = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE, lstm_size, self.model) self.encoder_rnn2.set_dropout(dropout) self.decoder_rnn = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE + lstm_size, lstm_size, self.model) self.decoder_rnn.set_dropout(dropout) global DROPOUT DROPOUT = dropout self.W1 = self.model.add_parameters((200, lstm_size)) self.b1 = self.model.add_parameters((200, 1)) self.W2 = self.model.add_parameters((100, 200)) self.b2 = self.model.add_parameters((100, 1)) self.W3 = self.model.add_parameters((len(self.C2I), 100)) self.b3 = self.model.add_parameters((len(self.C2I), 1)) self.W_query = self.model.add_parameters((lstm_size, lstm_size)) self.W_key = self.model.add_parameters((lstm_size, lstm_size)) self.W_val = self.model.add_parameters((lstm_size, lstm_size)) self.W_att = self.model.add_parameters((1, EMBEDDING_SIZE)) self.W_c_s = self.model.add_parameters((lstm_size, EMBEDDING_SIZE)) self.W_direct = self.model.add_parameters((len(self.C2I), lstm_size)) self.b_att = self.model.add_parameters((lstm_size, 1)) self.b_direct = self.model.add_parameters((len(self.C2I), 1)) self.E_lang = self.model.add_lookup_parameters((7, EMBEDDING_SIZE)) if optimizer == "sgd": self.trainer = dy.SimpleSGDTrainer(self.model) elif optimizer == "rms": self.trainer = dy.RMSPropTrainer(self.model) if optimizer == "cyclic": self.trainer = dy.CyclicalSGDTrainer(self.model) elif optimizer == "adam": self.trainer = dy.AdamTrainer(self.model) else: self.trainer = dy.AdagradTrainer(self.model)
def __init__(self, Config): self.Config = Config self.model = dy.Model() VOCAB_SIZE = Config.data.vocab_size EMBEDDINGS_SIZE = Config.model.embed_dim LSTM_NUM_OF_LAYERS = Config.model.num_layers STATE_SIZE = Config.model.num_units with open( os.path.join(Config.data.base_path, Config.data.processed_path, 'embed.pkl'), 'rb') as f: embed = np.asarray(cPickle.load(f)) oov = np.random.random((4 + Config.data.oov_size, EMBEDDINGS_SIZE)) # self.embed = self.model.lookup_parameters_from_numpy(np.transpose(np.asarray(embed))) # self.oov = self.model.add_lookup_parameters((4 + Config.data.oov_size, EMBEDDINGS_SIZE), init='uniform') self.input_lookup = self.model.lookup_parameters_from_numpy( np.concatenate((oov, embed))) self.enc_fwd_lstm = dy.GRUBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, self.model) self.enc_bwd_lstm = dy.GRUBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, self.model) self.sess_lstm = dy.GRUBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE, STATE_SIZE, self.model) self.decoder_w = self.model.add_parameters((VOCAB_SIZE, STATE_SIZE), init='uniform') self.decoder_b = self.model.add_parameters((VOCAB_SIZE), init='uniform')
def __init__(self, model, training_src, training_tgt, lang_li): self.model = model self.lang_li = lang_li self.src_vocab, self.rsrc_vocab = self.change_word2id_genevoc( training_src) self.src_vocab_size = len(self.src_vocab) print 'src_vocab_size', self.src_vocab_size self.embed_size = 128 self.src_lookup = self.model.add_lookup_parameters( (self.src_vocab_size, self.embed_size)) self.tgt_vocab = {} self.rtgt_vocab = {} self.tgt_vocab_size = {} self.tgt_lookup = {} for ele in lang_li: self.tgt_vocab[ele], self.rtgt_vocab[ ele] = self.change_word2id_genevoc_output(training_tgt[ele]) self.tgt_vocab_size[ele] = len(self.tgt_vocab[ele]) self.tgt_lookup[ele] = self.model.add_lookup_parameters( (self.tgt_vocab_size[ele], self.embed_size)) print 'tgt_vocab_size', self.tgt_vocab_size self.hidden_size = 128 self.layers = 1 self.contextsize = self.hidden_size * 2 self.l2r_builder = dy.GRUBuilder(self.layers, self.embed_size, self.hidden_size, self.model) self.r2l_builder = dy.GRUBuilder(self.layers, self.embed_size, self.hidden_size, self.model) self.dec_builder = {} self.W_y = {} self.b_y = {} for ele in lang_li: self.dec_builder[ele] = dy.GRUBuilder( self.layers, self.embed_size + self.contextsize, self.hidden_size * 2, self.model) self.W_y[ele] = self.model.add_parameters( (self.tgt_vocab_size[ele], self.hidden_size * 2 + self.contextsize)) self.b_y[ele] = self.model.add_parameters(self.tgt_vocab_size[ele]) self.attention_size = 128 self.lang_eb_size = 10 self.langeb_lookup = self.model.add_lookup_parameters( (len(lang_li), self.lang_eb_size)) self.W1_att_e = self.model.add_parameters( (self.attention_size, 2 * self.hidden_size)) self.W1_att_f = self.model.add_parameters( (self.attention_size, 2 * self.hidden_size)) self.W1_att_lang = self.model.add_parameters( (self.attention_size, self.lang_eb_size)) self.w2_att = self.model.add_parameters((1, self.attention_size)) self.max_len = 50
def __init__(self, data, opt): self.opt = opt self.model = dy.ParameterCollection() self.trainer = dy.MomentumSGDTrainer(self.model) self.w2i = data.w2i self.wdims = opt.embedding_size self.ldims = opt.hidden_size self.attsize = opt.attention_size self.ext_embeddings = data.ext_embeddings # Model Parameters self.wlookup = self.model.add_lookup_parameters( (len(self.w2i), self.wdims)) self.__load_external_embeddings() if self.opt.encoder_dir == "single": if self.opt.encoder_type == "lstm": self.sentence_rnn = [ dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model) ] elif self.opt.encoder_type == "gru": self.sentence_rnn = [ dy.GRUBuilder(1, self.wdims, self.ldims, self.model) ] self.attention_w = self.model.add_parameters( (self.attsize, self.ldims)) self.attention_b = self.model.add_parameters(self.attsize) self.att_context = self.model.add_parameters(self.attsize) self.mlp_w = self.model.add_parameters( (1, self.ldims + 2 * self.ldims)) self.mlp_b = self.model.add_parameters(1) elif self.opt.encoder_dir == "bidirectional": if self.opt.encoder_type == "lstm": self.sentence_rnn = [ dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model), dy.VanillaLSTMBuilder(1, self.wdims, self.ldims, self.model), ] elif self.opt.encoder_type == "gru": self.sentence_rnn = [ dy.GRUBuilder(1, self.wdims, self.ldims, self.model), dy.GRUBuilder(1, self.wdims, self.ldims, self.model), ] self.attention_w = self.model.add_parameters( (self.attsize, 2 * self.ldims)) self.attention_b = self.model.add_parameters(self.attsize) self.att_context = self.model.add_parameters(self.attsize) self.mlp_w = self.model.add_parameters( (1, 2 * self.ldims + 4 * self.ldims)) self.mlp_b = self.model.add_parameters(1)
def __init__(self, pc, n_in, n_out, dropout_rate, reuse=True): self.pc = pc self.n_in = n_in self.n_out = n_out self.dropout_rate = dropout_rate self.reuse = reuse if self.reuse: self.gru = dy.GRUBuilder(layers=1, input_dim=self.n_in, hidden_dim=self.n_out, model=self.pc) else: self.gru_f = dy.GRUBuilder(layers=1, input_dim=self.n_in, hidden_dim=self.n_out, model=self.pc) self.gru_b = dy.GRUBuilder(layers=1, input_dim=self.n_in, hidden_dim=self.n_out, model=self.pc)
def __init__(self, config): self.config = config self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model) self.rnn = [] self.rnn_upsample_w = [] self.rnn_linear_w = [None] rnn_input_size = config.FS[0] first = True for ls, fs, ups in zip(config.rnn_layers, config.FS, config.upsample): self.rnn.append(dy.GRUBuilder(1, rnn_input_size, ls, self.model)) self.rnn_upsample_w.append([self.model.add_parameters((ls, ls))] * ups) if first: first = False else: self.rnn_linear_w.append(self.model.add_parameters((ls, fs))) rnn_input_size = ls layer_is = rnn_input_size self.mlp_w = [] self.mlp_b = [] for layer_os in config.mlp: self.mlp_w.append(self.model.add_parameters((layer_os, layer_is))) self.mlp_b.append(self.model.add_parameters((layer_os))) layer_is = layer_os self.mlp_w.append(self.model.add_parameters((256, layer_is))) self.mlp_b.append(self.model.add_parameters((256)))
def __init__(self, in_vocab, hidden_dim, model): self.in_vocab = in_vocab self.hidden_dim = hidden_dim self.embedding_enc = model.add_lookup_parameters( (self.in_vocab, self.hidden_dim)) self.rnn_enc = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model)
def __init__(self, args, src_vocab, tgt_vocab, src_vocab_id2word, tgt_vocab_id2word): model = self.model = dy.Model() self.args = args self.src_vocab = src_vocab self.tgt_vocab = tgt_vocab self.src_vocab_id2word = src_vocab_id2word self.tgt_vocab_id2word = tgt_vocab_id2word self.src_lookup = self.model.add_lookup_parameters( (args.src_vocab_size, args.embed_size)) self.tgt_lookup = self.model.add_lookup_parameters( (args.tgt_vocab_size, args.embed_size)) self.enc_forward_builder = dy.GRUBuilder(1, args.embed_size, args.hidden_size, model) self.enc_backward_builder = dy.GRUBuilder(1, args.embed_size, args.hidden_size, model) self.dec_builder = dy.GRUBuilder( 1, args.embed_size + args.hidden_size * 2, args.hidden_size, model) # target word embedding self.W_y = model.add_parameters((args.tgt_vocab_size, args.embed_size)) self.b_y = model.add_parameters((args.tgt_vocab_size)) self.b_y.zero() # transformation of decoder hidden states and context vectors before reading out target words self.W_h = model.add_parameters( (args.embed_size, args.hidden_size + args.hidden_size * 2)) self.b_h = model.add_parameters((args.embed_size)) self.b_h.zero() # transformation of context vectors at t_0 in decoding self.W_s = model.add_parameters( (args.hidden_size, args.hidden_size * 2)) self.b_s = model.add_parameters((args.hidden_size)) self.b_s.zero() self.W1_att_f = model.add_parameters( (args.attention_size, args.hidden_size * 2)) self.W1_att_e = model.add_parameters( (args.attention_size, args.hidden_size)) self.W2_att = model.add_parameters((1, args.attention_size))
def _create_model(self): self.logger.info('Creating the model...') model = dy.ParameterCollection() # context gru encoders c_fwdRnn = dy.GRUBuilder(self.model_args["gru_layers"], self.model_args["gru_input_dim"], self.model_args["gru_hidden_dim"], model) c_bwdRnn = dy.GRUBuilder(self.model_args["gru_layers"], self.model_args["gru_input_dim"], self.model_args["gru_hidden_dim"], model) # question gru encoders q_fwdRnn = dy.GRUBuilder(self.model_args["gru_layers"], self.model_args["gru_input_dim"], self.model_args["gru_hidden_dim"], model) q_bwdRnn = dy.GRUBuilder(self.model_args["gru_layers"], self.model_args["gru_input_dim"], self.model_args["gru_hidden_dim"], model) # embedding parameter lookup_params = model.add_lookup_parameters((self.model_args["vocab_size"], self.model_args["gru_input_dim"]), dy.UniformInitializer(self.model_args["lookup_init_scale"])) unk_lookup_params = model.add_lookup_parameters((self.model_args["number_of_unks"], self.model_args["gru_input_dim"]), dy.UniformInitializer(self.model_args["lookup_init_scale"])) self.logger.info('Done creating the model') model_parameters = {"c_fwdRnn": c_fwdRnn, "c_bwdRnn": c_bwdRnn, "q_fwdRnn": q_fwdRnn, "q_bwdRnn": q_bwdRnn, "lookup_params": lookup_params, "unk_lookup_params": unk_lookup_params} return model, model_parameters
def __init__(self, hidden_dim, out_vocab, model, max_length=MAX_LENGTH): self.hidden_dim = hidden_dim self.out_vocab = out_vocab self.max_length = max_length self.embedding_dec = model.add_lookup_parameters( (self.out_vocab, self.hidden_dim)) self.w_attn = model.add_parameters( (self.max_length, self.hidden_dim * 2)) self.b_attn = model.add_parameters((self.max_length, )) self.w_attn_combine = model.add_parameters( (self.hidden_dim, self.hidden_dim * 2)) self.b_attn_combine = model.add_parameters((self.hidden_dim, )) self.rnn_dec = dy.GRUBuilder(1, self.hidden_dim, self.hidden_dim, model) self.w_dec = model.add_parameters((self.out_vocab, self.hidden_dim)) self.b_dec = model.add_parameters((self.out_vocab, ))
def main(): import sys action = sys.argv[1] emb_name = sys.argv[2] filen = sys.argv[3] testname = sys.argv[4] print 'setting up' train_words, train_tags, tag_idx, idx_tag, test_words, test_tags = setup(filen, testname) num_tags = len(tag_idx.keys()) gru_model = dy.ParameterCollection() word_index, embeddings_mat = import_embeddings(emb_name, 300) hidden_layer_len = 200 layers = 1 eparams = gru_model.lookup_parameters_from_numpy(embeddings_mat.A) #flatten matrix gru_unit = dy.GRUBuilder(layers, 300, hidden_layer_len, gru_model) param_mat = gru_model.add_parameters((hidden_layer_len, num_tags)) param_bias = gru_model.add_parameters((num_tags)) #gmodel.save("grumodel.model") #mdl2 = dy.ParameterCollection() #ep = mdl2.lookup_parameters_from_numpy(embeddings_mat.A) #parmat = mdl2.add_parameters((200, num_tags)) #parbias = mdl2.add_parameters((num_tags)) #gmodel.populate("grumodel.model") if action == 'train': print 'training' bsize, gmodel = training(3, eparams, gru_unit, idx_tag, gru_model, train_words, train_tags, word_index, param_mat, param_bias) if action == 'tune': print 'tuning' for r in range(3, 6): print 'training' print 'epochs: ', r bsize,gmodel = training(r, eparams, gru_unit, idx_tag, gru_model, train_words, train_tags, word_index, param_mat, param_bias) print 'testing' testing(idx_tag, gru_unit, gmodel, bsize, test_words, test_tags, word_index, param_mat, param_bias, eparams) if action == 'test': print 'training' #use 5 epochs bsize, gmodel = training(5, eparams, gru_unit, idx_tag, gru_model, train_words, train_tags, word_index, param_mat, param_bias) print 'testing' testing(idx_tag, gru_unit, gmodel, bsize, test_words, test_tags, word_index, param_mat, param_bias, eparams) return
def _initialize_model(self): self.model = dy.Model() self.input_lookup = self.model.add_lookup_parameters( (self.vocab_size, self.embedding_size)) # Attention params self.attention_w1 = self.model.add_parameters( (self.state_size, self.state_size)) self.attention_w2 = self.model.add_parameters( (self.state_size, self.state_size)) self.attention_v = self.model.add_parameters((1, self.state_size)) # Predictive allignment params self.w_p = self.model.add_parameters( (self.state_size, self.state_size)) self.v_p = self.model.add_parameters((1, self.state_size)) # LSTM/GRU and last layer projection matrix self.lstm = dy.GRUBuilder(self.lstm_num_of_layers, self.embedding_size, self.state_size, self.model) self.output_w = self.model.add_parameters( (self.num_of_classes, self.state_size)) self.output_b = self.model.add_parameters((self.num_of_classes))
def __init__(self, dataset, config): self.clip = 5 * dataset.sample_rate # set to zero for full training self.sample_rate = dataset.sample_rate self.config = config self.dataset = dataset self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model) #lookups self.phoneme_lookup = self.model.add_lookup_parameters( (len(dataset.phoneme2int), config.phone_embeddings_size)) self.context_lookup = [ self.model.add_lookup_parameters( (len(ctx), config.context_embeddings_size)) for ctx in dataset.context2int ] #encoder inp_sz = config.phone_embeddings_size * 5 #+ len(dataset.context2int) * config.context_embeddings_size self.encoder_fw = [ dy.LSTMBuilder(1, inp_sz, config.encoder_size, self.model) ] self.encoder_bw = [ dy.LSTMBuilder(1, inp_sz, config.encoder_size, self.model) ] [ self.encoder_fw.append( dy.LSTMBuilder(1, config.encoder_size * 2, config.encoder_size, self.model)) for _ in range(config.encoder_layers - 1) ] [ self.encoder_bw.append( dy.LSTMBuilder(1, config.encoder_size * 2, config.encoder_size, self.model)) for _ in range(config.encoder_layers - 1) ] #receptive network receptive_input = config.receptive_input self.receptive_w = [] self.receptive_b = [] for size in config.receptive_layers: receptive_output = size self.receptive_w.append( self.model.add_parameters((receptive_output, receptive_input))) self.receptive_b.append( self.model.add_parameters((receptive_output))) receptive_input = receptive_output #receptive network attention_input = config.receptive_input self.attention_w = [] self.attention_b = [] for size in config.attention_layers: attention_output = size self.attention_w.append( self.model.add_parameters((attention_output, attention_input))) self.attention_b.append( self.model.add_parameters((attention_output))) attention_input = attention_output #decoder self.decoder = dy.GRUBuilder( config.decoder_layers, config.encoder_size * 2 + config.receptive_layers[-1], config.decoder_size, self.model) #attention self.att_w1 = self.model.add_parameters( (config.att_proj_size, config.encoder_size * 2)) self.att_w2 = self.model.add_parameters( (config.att_proj_size, config.decoder_size)) self.att_w3 = self.model.add_parameters( (config.att_proj_size, config.att_lsa_filters)) self.att_w4 = self.model.add_parameters( (config.att_proj_size, config.attention_layers[-1])) self.att_v = self.model.add_parameters((1, config.att_proj_size)) self.cnn_attention = CNN(self.model) self.cnn_attention.add_layer_conv(config.att_lsa_input_size, 1, 1, 1, config.att_lsa_filters, same=True) #output presoftmax_input = config.decoder_size + config.receptive_layers[ -1] + config.sample_trail_size self.presoftmax_w = [] self.presoftmax_b = [] for size in config.presoftmax_layers: presoftmax_output = size self.presoftmax_w.append( self.model.add_parameters( (presoftmax_output, presoftmax_input))) self.presoftmax_b.append( self.model.add_parameters((presoftmax_output))) presoftmax_input = presoftmax_output self.softmax_w = self.model.add_parameters((257, presoftmax_input)) self.softmax_b = self.model.add_parameters((257))
def main(datapath, train=None, test=None, num_epochs=2, batch_size=256, embedding_approach='random', embedding_size=300): if train is None and test is None: print("Either train or test!") sys.exit() ################################################################ RNN_model = dy.ParameterCollection() ################################################################ # HYPERPARAMETERS ################################################################ # size of word embedding (if using "random", otherwise, dependent on the loaded embeddings) # embedding_size = 300 # size of hidden layer of `RNN` hidden_size = 200 # number of layers in `RNN` num_layers = 1 # type of trainer trainer = dy.SimpleSGDTrainer(m=RNN_model, learning_rate=0.01) ################################################################ ## Load the training and test data all_tokens, all_labels = import_emails( datapath) #Add test_tokens, test_labels ################################################################ #print("train_tokens = ", train_tokens[:10], "train_labels = ", train_labels[:10]) if embedding_approach == "pretrained": emb_matrix_pretrained, w2i_pretrained = load_pretrained_embeddings( path.join(datapath + "../../hw/", "pretrained_embeddings.txt"), take=10000) embedding_parameters = RNN_model.lookup_parameters_from_numpy( emb_matrix_pretrained) embedding_size = emb_matrix_pretrained.shape[ 1] ## Rewriting `embedding_size` w2i = w2i_pretrained # ensure we use the correct lookup table print("embedding matrix shape: {}".format(emb_matrix_pretrained.shape)) elif embedding_approach == "random": #### randomly initialized embeddings w2i_random = build_w2i_lookup(train_tokens) embedding_parameters = RNN_model.add_lookup_parameters( (len(w2i_random) + 1, embedding_size)) w2i = w2i_random # ensure we use the correct lookup table else: raise Exception("Choose a proper embedding approach") ###### CHOOSE HERE which approach you want to use. ###### # RNN_unit = dy.LSTMBuilder(num_layers, embedding_size, hidden_size, RNN_model) RNN_unit = dy.GRUBuilder(num_layers, embedding_size, hidden_size, RNN_model) ################################################################ #10 fold Cross validation fold_size = int(np.ceil(len(all_labels) / 10)) print(fold_size, " fold size of a 10-Fold Cross validation") i = 0 test_tokens = [] test_labels = [] train_labels = [] train_tokens = [] email_accuracy_folds = [] while i < len(all_labels): if i > 0: #just doing one fold, to save time. Run predict on that break test_tokens = all_tokens[i:i + fold_size] test_labels = all_labels[i:i + fold_size] train_tokens = all_tokens[0:i] train_tokens.extend(all_tokens[i + fold_size:]) train_labels = all_labels[0:i] train_labels.extend(all_labels[i + fold_size:]) print("len(test_l)= ", len(test_labels), " len(train_labels)= ", len(train_labels), " fold_sz= ", fold_size) i += fold_size ## convert the labels to ids l2i = labels_to_index_map(train_labels) #print("len(l2i) = ",l2i) train_labels = [l2i[l] for l in train_labels] #print("After mapping train_labels = ", train_labels[:10] ) test_labels = [l2i[l] for l in test_labels] # training hyperparams # batch_size = 256 num_batches_training = int(np.ceil(len(train_tokens) / batch_size)) num_batches_testing = int(np.ceil(len(test_tokens) / batch_size)) # num_epochs = 1 ## Projection layer # W (hidden x num_labels) pW = RNN_model.add_parameters((hidden_size, len(list(l2i.keys())))) # b (1 x num_labels) pb = RNN_model.add_parameters((len(list(l2i.keys())))) print("train_tokens len = ", len(train_tokens), "train_labels len =", len(train_labels)) if train is not None: modelPath = train print("in train", train) trainAlgo(train_tokens, train_labels, num_epochs, num_batches_training, batch_size, w2i, embedding_parameters, pW, pb, modelPath, RNN_unit, trainer, RNN_model) if test is not None: modelPath = test print("in test", test) final_predictions = testAlgo(test_tokens, test_labels, num_batches_testing, batch_size, w2i, embedding_parameters, pW, pb, modelPath, RNN_unit, RNN_model) email_accuracy = evaluate(final_predictions, test_labels) print("Email overall accuracy : {}".format(email_accuracy)) email_accuracy_folds.append(email_accuracy) print("Average over 10 folds = ", float(sum(email_accuracy_folds) / len(email_accuracy_folds)))
def setUp(self): # create model self.m = dy.Model() self.rnn = dy.GRUBuilder(2, 10, 10, self.m)
def __init__(self, params, vocab, label2tag, pretrained_embeddings=None): """ :param params: :param vocab: :param label2tag: :param pretrained_embeddings: """ self.dim_w = params.dim_w self.win = params.win self.vocab = vocab self.n_words = len(self.vocab) self.dim_asp = params.dim_asp self.dim_opi = params.dim_opi self.dim_y_asp = params.n_asp_tags self.dim_y_opi = params.n_opi_tags self.n_steps = params.n_steps self.asp_label2tag = label2tag self.opi_label2tag = {0: 'O', 1: 'T'} self.dropout_asp = params.dropout_asp self.dropout_opi = params.dropout_opi self.dropout = params.dropout self.rnn_type = params.rnn_type self.ds_name = params.ds_name self.model_name = params.model_name self.attention_type = params.attention_type self.pc = dy.ParameterCollection() self.Emb = WDEmb(pc=self.pc, n_words=self.n_words, dim_w=self.dim_w, pretrained_embeddings=pretrained_embeddings) #self.ASP_RNN = LSTM(pc=self.pc, n_in=self.win*self.dim_w, n_out=self.dim_asp, dropout_rate=self.dropout_asp) #self.OPI_RNN = LSTM(pc=self.pc, n_in=self.win*self.dim_w, n_out=self.dim_opi, dropout_rate=self.dropout_opi) # use dynet RNNBuilder rather than the self-defined RNN classes if self.rnn_type == 'LSTM': self.ASP_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc) self.OPI_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_opi, self.pc) elif self.rnn_type == 'GRU': # NOT TRIED! self.ASP_RNN = dy.GRUBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc) self.OPI_RNN = dy.GRUBuilder(1, self.win * self.dim_w, self.dim_opi, self.pc) else: raise Exception("Invalid RNN type!!!") self.THA = THA(pc=self.pc, n_steps=self.n_steps, n_in=2*self.dim_asp) if self.attention_type == 'bilinear': self.STN = ST_bilinear(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi) # here dot attention is not applicable since the aspect representation and opinion representation # have different dimensions # elif self.attention_type == 'dot': # self.STN = ST_dot(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi) elif self.attention_type == 'concat': self.STN = ST_concat(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi) else: raise Exception("Invalid attention type!!!") self.ASP_FC = Linear(pc=self.pc, n_in=2*self.dim_asp+2*self.dim_opi, n_out=self.dim_y_asp) self.OPI_FC = Linear(pc=self.pc, n_in=2*self.dim_opi, n_out=self.dim_y_opi) self.layers = [self.ASP_FC, self.OPI_FC, self.THA, self.STN] if params.optimizer == 'sgd': self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr) elif params.optimizer == 'momentum': self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9) elif params.optimizer == 'adam': self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9) elif params.optimizer == 'adagrad': self.optimizer = dy.AdagradTrainer(self.pc) elif params.optimizer == 'adadelta': # use default value of adadelta self.optimizer = dy.AdadeltaTrainer(self.pc) else: raise Exception("Invalid optimizer!!")
def main(datapath, train=None, test=None, num_epochs=2, batch_size=256, embedding_approach='random', embedding_size=300): if train is None and test is None: print("Either train or test!") sys.exit() ### initialize empty model ################################################################ RNN_model = dy.ParameterCollection() ################################################################ # HYPERPARAMETERS ################################################################ # size of word embedding (if using "random", otherwise, dependent on the loaded embeddings) # embedding_size = 300 # size of hidden layer of `RNN` hidden_size = 200 # number of layers in `RNN` num_layers = 1 # type of trainer trainer = dy.SimpleSGDTrainer(m=RNN_model, learning_rate=0.01) ################################################################ ## Load the training and test data train_tokens, train_labels, _, _, test_tokens, test_labels = u.import_ptb( datapath) ################################################################ if embedding_approach == "pretrained": emb_matrix_pretrained, w2i_pretrained = u.load_pretrained_embeddings( path.join(datapath, "pretrained_embeddings.txt"), take=10000) embedding_parameters = RNN_model.lookup_parameters_from_numpy( emb_matrix_pretrained) embedding_size = emb_matrix_pretrained.shape[ 1] ## Rewriting `embedding_size` w2i = w2i_pretrained # ensure we use the correct lookup table print("embedding matrix shape: {}".format(emb_matrix_pretrained.shape)) elif embedding_approach == "random": #### randomly initialized embeddings w2i_random = u.build_w2i_lookup(train_tokens) embedding_parameters = RNN_model.add_lookup_parameters( (len(w2i_random) + 1, embedding_size)) w2i = w2i_random # ensure we use the correct lookup table else: raise Exception("Choose a proper embedding approach") ###### CHOOSE HERE which approach you want to use. ###### # RNN_unit = dy.LSTMBuilder(num_layers, embedding_size, hidden_size, RNN_model) RNN_unit = dy.GRUBuilder(num_layers, embedding_size, hidden_size, RNN_model) ################################################################ ## convert the labels to ids l2i = u.labels_to_index_map(train_labels) train_labels = [[l2i[l] for l in sent] for sent in train_labels] test_labels = [[l2i[l] for l in sent] for sent in test_labels] # training hyperparams # batch_size = 256 num_batches_training = int(np.ceil(len(train_tokens) / batch_size)) num_batches_testing = int(np.ceil(len(test_tokens) / batch_size)) # num_epochs = 1 ## Projection layer # W (hidden x num_labels) pW = RNN_model.add_parameters((hidden_size, len(list(l2i.keys())))) # b (1 x num_labels) pb = RNN_model.add_parameters((len(list(l2i.keys())))) if train is not None: modelPath = train trainAlgo(train_tokens, train_labels, num_epochs, num_batches_training, batch_size, w2i, embedding_parameters, pW, pb, modelPath, RNN_unit, trainer, RNN_model) if test is not None: modelPath = test final_predictions = testAlgo(test_tokens, test_labels, num_batches_testing, batch_size, w2i, embedding_parameters, pW, pb, modelPath, RNN_unit, RNN_model) overall_accuracy, sentence_accuracy = evaluate(final_predictions, test_labels) print("overall accuracy: {}".format(overall_accuracy))
def test_wsj(): print print '# test on wsj subset' data, n_types, n_labels = pickle.load(open('wsj.pkl', 'r')) d_emb = 50 d_rnn = 51 d_hid = 52 d_actemb = 5 minibatch_size = 5 n_epochs = 10 preprocess_minibatch = True model = dy.ParameterCollection() embed_word = model.add_lookup_parameters((n_types, d_emb)) f_gru = dy.GRUBuilder(1, d_emb, d_rnn, model) b_gru = dy.GRUBuilder(1, d_emb, d_rnn, model) embed_action = model.add_lookup_parameters((n_labels, d_actemb)) combine_arh_W = model.add_parameters((d_hid, d_actemb + d_rnn * 2 + d_hid)) combine_arh_b = model.add_parameters(d_hid) initial_h = model.add_parameters(d_hid, dy.ConstInitializer(0)) initial_actemb = model.add_parameters(d_actemb, dy.ConstInitializer(0)) policy_W = model.add_parameters((n_labels, d_hid)) policy_b = model.add_parameters(n_labels) optimizer = dy.AdamTrainer(model, alpha=0.01) for _ in xrange(n_epochs): total_loss = 0 for batch in minibatch(data, minibatch_size, True): dy.renew_cg() combine_arh_We = dy.parameter(combine_arh_W) combine_arh_be = dy.parameter(combine_arh_b) policy_We = dy.parameter(policy_W) policy_be = dy.parameter(policy_b) loss = 0 if preprocess_minibatch: # for efficiency, combine RNN outputs on entire # minibatch in one go (requires padding with zeros, # should be masked but isn't right now) all_tokens = [ex.tokens for ex in batch] max_length = max(map(len, all_tokens)) all_tokens = [[x[i] if len(x) > i else 0 for x in all_tokens] for i in range(max_length)] all_e = [dy.lookup_batch(embed_word, x) for x in all_tokens] all_rnn_out = bi_gru(f_gru, b_gru, all_e) losses = [] for batch_id, ex in enumerate(batch): N = len(ex.tokens) if preprocess_minibatch: rnn_out = [ dy.pick_batch_elem(x, batch_id) for x in all_rnn_out[:N] ] else: e = [embed_word[x] for x in ex.tokens] rnn_out = bi_gru(f_gru, b_gru, e) prev_h = dy.parameter(initial_h) # previous hidden state actemb = dy.parameter( initial_actemb) # embedding of previous action output = [] for t in xrange(N): # update hidden state based on most recent # *predicted* action (not ground truth) inputs = [actemb, prev_h, rnn_out[t]] h = dy.rectify( dy.affine_transform([ combine_arh_be, combine_arh_We, dy.concatenate(inputs) ])) # make prediction pred_vec = dy.affine_transform([policy_be, policy_We, h]) pred = pred_vec.npvalue().argmin() output.append(pred) # accumulate loss (squared error against costs) truth = np.ones(n_labels) truth[ex.labels[t]] = 0 losses.append( dy.squared_distance(pred_vec, dy.inputTensor(truth))) # cache hidden state, previous action embedding prev_h = h actemb = embed_action[pred] # print 'output=%s, truth=%s' % (output, ex.labels) loss = dy.esum(losses) loss.backward() total_loss += loss.value() optimizer.update() print total_loss
def __init__(self, task_in_size, task_hid_size, task_out_size, adv_in_size, adv_hid_size, adv_out_size, adv_count, vocab_size, dropout, lstm_size, adv_depth=1, rnn_dropout=0.0, rnn_type='lstm'): model = dy.Model() if rnn_type == 'lstm': self._rnn = dy.LSTMBuilder(lstm_size, 300, task_in_size, model) elif rnn_type == 'gru': self._rnn = dy.GRUBuilder(lstm_size, 300, task_in_size, model) else: self._rnn = dy.SimpleRNNBuilder(lstm_size, 300, task_in_size, model) params = {} params['w_lookup'] = model.add_lookup_parameters((vocab_size, 300)) in_task = task_in_size params["task_w1"] = model.add_parameters((task_hid_size, in_task)) params["task_b1"] = model.add_parameters((task_hid_size)) params["task_w2"] = model.add_parameters( (task_out_size, task_hid_size)) params["task_b2"] = model.add_parameters((task_out_size)) for i in range(adv_count): for j in range(adv_depth): params["adv_" + str(i) + "_w" + str(j + 1)] = model.add_parameters( (adv_hid_size, adv_in_size)) params["adv_" + str(i) + "_b" + str(j + 1)] = model.add_parameters((adv_hid_size)) params["adv_" + str(i) + "_w" + str(adv_depth + 1)] = model.add_parameters( (adv_out_size, adv_hid_size)) params["adv_" + str(i) + "_b" + str(adv_depth + 1)] = model.add_parameters((adv_out_size)) params["contra_adv_w1"] = model.add_parameters( (adv_hid_size, adv_in_size)) params["contra_adv_b1"] = model.add_parameters((adv_hid_size)) params["contra_adv_w2"] = model.add_parameters( (adv_out_size, adv_hid_size)) params["contra_adv_b2"] = model.add_parameters((adv_out_size)) self._model = model self._hid_dim = task_hid_size self._in_dim = task_in_size self._adv_count = adv_count self._adv_depth = adv_depth self._params = params self._dropout = dropout self._rnn_dropout = rnn_dropout
print len(train_sentences_en) print len(valid_sentences_de) print len(valid_sentences_en) VOCAB_SIZE_DE = len(wids) VOCAB_SIZE_EN = VOCAB_SIZE_DE train_sentences = zip(train_sentences_de, train_sentences_en) valid_sentences = zip(valid_sentences_de, valid_sentences_en) #Specify model model = dy.Model() if config.GRU: encoder = dy.GRUBuilder(LAYER_DEPTH, EMB_SIZE, HIDDEN_SIZE, model) revcoder = dy.GRUBuilder(LAYER_DEPTH, EMB_SIZE, HIDDEN_SIZE, model) decoder = dy.GRUBuilder(LAYER_DEPTH, EMB_SIZE + HIDDEN_SIZE, HIDDEN_SIZE, model) else: encoder = dy.LSTMBuilder(LAYER_DEPTH, EMB_SIZE, HIDDEN_SIZE, model) revcoder = dy.LSTMBuilder(LAYER_DEPTH, EMB_SIZE, HIDDEN_SIZE, model) decoder = dy.LSTMBuilder(LAYER_DEPTH, EMB_SIZE + HIDDEN_SIZE, HIDDEN_SIZE, model) encoder_params = {} encoder_params["lookup"] = model.add_lookup_parameters( (VOCAB_SIZE_DE, EMB_SIZE)) decoder_params = {} if config.sharing:
def setUp(self): # create model self.m = dy.ParameterCollection() self.rnn = dy.GRUBuilder(2, 10, 10, self.m)
if len(f) <= input_size: training_pairs += [f] else: training_pairs += [ f[i:i + input_size] for i in range(0, len(f) - input_size) ] rng.shuffle(training_pairs) training_pairs = training_pairs[:500] TRAINING_PAIRS = len(training_pairs) print(TRAINING_PAIRS, " training pairs") print(len(typed_output_vocab) - 1, " possible types") # set up the neural net pc = dy.ParameterCollection() srnn = dy.GRUBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc) params: Dict[str, dy.Expression] = {} params["lookup"] = pc.add_lookup_parameters((VOCAB_SIZE, INPUT_DIM)) params["R"] = pc.add_parameters((OUTPUT_VOCAB_SIZE, HIDDEN_DIM)) params["bias"] = pc.add_parameters((OUTPUT_VOCAB_SIZE)) # Load training data from disk if it exists model = Path(MODEL_FILE) if model.is_file(): print("Model file found, loading... ", end="", flush=True) try: pc.populate(MODEL_FILE) print("OK") except Exception as e: print("Failed") print("Message was:\n\t%s" % (str(e), ))