def __init__(self, criterion, num_symbols, use_cuda): #, seq_len): super(CharLevel_autoencoder, self).__init__() self.char_embedding_dim = 128 self.pooling_stride = 5 self.seq_len = 300 self.num_symbols = num_symbols self.use_cuda = use_cuda self.filter_widths = list(range(1, 9)) self.num_filters_per_width = [150, 150, 200, 200, 250, 250, 250, 250] self.encoder_embedding = nn.Embedding(num_symbols, self.char_embedding_dim) self.cnn_encoder = cnn_encoder( filter_widths=self.filter_widths, num_filters_per_width=self.num_filters_per_width, char_embedding_dim=self.char_embedding_dim, use_cuda=use_cuda) self.decoder_hidden_size = int( np.sum(np.array(self.num_filters_per_width))) self.rnn_encoder = rnn_encoder(hidden_size=self.decoder_hidden_size) # decoder embedding dim dictated by output dim of encoder self.decoder_embedding = nn.Embedding(num_symbols, self.decoder_hidden_size) self.attention_decoder = AttnDecoderRNN( num_symbols=num_symbols, hidden_size=self.decoder_hidden_size, output_size=self.seq_len // self.pooling_stride) self.criterion = criterion
def __init__(self, criterion, num_symbols, use_cuda): ''' overview of autoencoder forward: 1. Input batch is embedded 2. CNN+Pool encoder is called on input 3. BiGRU encoder is called on activations of previous encoder 4. Attention GRU decoder takes an embedded symbol at current t - Decoder embedding embeds symbol at current t 6. Batch cross entropy is calculated and returned ''' super(CharLevel_autoencoder, self).__init__() self.char_embedding_dim = 128 self.pooling_stride = 5 self.seq_len = 300 self.num_symbols = num_symbols self.use_cuda = use_cuda self.filter_widths = list(range(1, 9)) # due to cuda limitations, every filter width has 50 less filters self.num_filters_per_width = [150, 150, 200, 200, 250, 250, 250, 250] self.encoder_embedding = nn.Embedding(num_symbols, self.char_embedding_dim) self.cnn_encoder = cnn_encoder( filter_widths=self.filter_widths, num_filters_per_width=self.num_filters_per_width, char_embedding_dim=self.char_embedding_dim, use_cuda=use_cuda) self.decoder_hidden_size = int( np.sum(np.array(self.num_filters_per_width))) self.rnn_encoder = rnn_encoder(hidden_size=self.decoder_hidden_size) # decoder embedding dim dictated by output dim of encoder self.decoder_embedding = nn.Embedding(num_symbols, self.decoder_hidden_size) self.attention_decoder = AttnDecoderRNN( num_symbols=num_symbols, hidden_size=self.decoder_hidden_size, output_size=self.seq_len // self.pooling_stride) self.criterion = criterion
def __init__(self, criterion, num_symbols, use_cuda): #, seq_len): super(CharLevel_autoencoder, self).__init__() self.char_embedding_dim = 64 self.pooling_stride = 5 self.seq_len = 200 self.num_symbols = num_symbols self.use_cuda = use_cuda self.filter_widths = list(range(1, 8)) self.num_filters_per_width = 125 #[100, 100, 125, 125, 150, 150, 150, 150] self.encoder_embedding = nn.Embedding(num_symbols, self.char_embedding_dim) self.cnn_encoder = cnn_encoder( filter_widths=self.filter_widths, num_filters_per_width=self.num_filters_per_width, char_embedding_dim=self.char_embedding_dim) #seq_len = self.seq_len) self.decoder_hidden_size = len( self.filter_widths) * self.num_filters_per_width self.rnn_encoder = rnn_encoder(hidden_size=self.decoder_hidden_size) # decoder embedding dim dictated by output dim of encoder self.decoder_embedding = nn.Embedding(num_symbols, self.decoder_hidden_size) self.attention_decoder = AttnDecoderRNN( num_symbols=num_symbols, hidden_size=self.decoder_hidden_size, output_size=self.seq_len // self.pooling_stride) # if use_cuda: # self.cnn_encoder = self.cnn_encoder.cuda() # self.rnn_encoder = self.rnn_encoder.cuda() # self.attention_decoder = self.attention_decoder.cuda() self.criterion = criterion
def __init__(self, criterion, seq_len, layers): super(CharLevel_autoencoder, self).__init__() self.batch_size = 64 self.layers = layers self.seq_len = seq_len self.emit_len = seq_len #//3 #CR|R C|C conv encoder -> pooled activations if layers == 'R_R': self.emit_len = seq_len self.layers = layers self.char_embedding_dim = 20 self.encoder_embedding = nn.Embedding(23, self.char_embedding_dim) self.filter_widths = list(range(1, 6)) self.filter_config = [20, 20, 20, 20, 10] #for CR_R and C_C #self.filter_config = [10, 10, 20, 20] self.cnn_encoder = cnn_encoder( filter_widths=self.filter_widths, filter_config=self.filter_config, char_embedding_dim=self.char_embedding_dim) self.decoder_hidden_size = int(np.sum(np.array(self.filter_config))) if layers == 'R_R': self.decoder_hidden_size = self.char_embedding_dim self.rnn_encoder = rnn_encoder(hidden_size=self.decoder_hidden_size) # _____________________________ decoder ________________________________ self.decoder_embedding = nn.Embedding(23, self.decoder_hidden_size) self.attention_decoder = rnn_decoder( hidden_size=self.decoder_hidden_size, emit_len=self.emit_len) self.criterion = criterion self.deconv_decoder = cnn_decoder(filter_widths=self.filter_widths, filter_config=self.filter_config, emit_len=self.emit_len)
tf_sum_mask_2dims = tf.placeholder(tf.float32, [conf.batch_size, conf.sum_len], name="sum_mask_2dims") #data load dir_data = "~/train_30000_params_tuning/" train_doc2id = np.loadtxt(dir_data+"train_doc2id.txt").astype('int32') train_deco_inputs2id = np.loadtxt(dir_data+"train_deco_inputs2id.txt").astype('int32') train_y_true2id = np.loadtxt(dir_data+"train_y_true2id.txt").astype('int32') doc_mask_2dims = list(np.loadtxt(dir_data+"doc_mask_2dims.txt").astype('int32')) dims_expanded = np.array([np.zeros((conf.emb_dim,1)), np.ones((conf.emb_dim,1))]) doc_mask_4dims = dims_expanded[doc_mask_2dims] sum_mask_2dims = list(np.loadtxt(dir_data+"sum_mask_2dims.txt").astype('int32')) vocab = open(dir_data+'vocab.txt', 'r').read().split() #encoder part print "encoder start..." enco_h = encoder.cnn_encoder(tf_input_x, tf_doc_mask_4dims) enco_h = enco_h*tf_doc_mask_4dims #decoder part print "decoder start..." loss = decoder.gru_decoder(enco_h[:,:,:,0], tf_doc_mask_4dims[:,:,0,0], tf_deco_input, tf_sum_mask_2dims, tf_y_true) #gradient descent tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), conf.max_grad_norm) optimizer = tf.train.AdamOptimizer(conf.lr) #optimizer = tf.train.GradientDescentOptimizer(conf.lr) #optimizer = tf.train.MomentumOptimizer(conf.lr, conf.momentum) optimizer_tf = optimizer.apply_gradients(zip(grads, tvars))