示例#1
0
    def __init__(self):
        self.training = tf.placeholder(tf.bool, name='training')
        self.inputs = tf.placeholder(dtype=tf.float32,
                                     shape=[None, 5, 224, 224, 3])
        self.inputs = tf.unstack(self.inputs, axis=1)
        self.sequence_length = tf.placeholder(dtype=tf.int32, shape=[None])

        LSTM_inputs = []
        for i in self.inputs:
            LSTM_inputs.append(self.get_features(i))
        self.LSTM_inputs = LSTM_inputs  #   seq_length*32*128
        print('Image feature extraction is successful')

        lstm_f_cell = BasicLSTMCell(num_units=hidden_size)
        lstm_b_cell = BasicLSTMCell(num_units=hidden_size)
        init_fw = lstm_f_cell.zero_state(batch_size, dtype=tf.float32)
        init_bw = lstm_b_cell.zero_state(batch_size, dtype=tf.float32)
        outputs, output_state_fw, output_state_bw = static_bidirectional_rnn(
            lstm_f_cell,
            lstm_b_cell,
            self.LSTM_inputs,
            initial_state_fw=init_fw,
            initial_state_bw=init_bw,
            sequence_length=self.sequence_length)
        self.predict = tf.layers.dense(outputs[-1], classes)
        self.finally_pre = tf.nn.softmax(self.predict)
        self.finally_pre = tf.argmax(self.predict)
        self.targets = tf.placeholder(dtype=tf.int32, shape=[None])

        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=self.targets, logits=self.predict))
        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            self.train_op = tf.train.AdamOptimizer().minimize(self.loss)
示例#2
0
def BasicLSTM_init_state(batch_size, output_size):
    with tf.name_scope('LSTM_init_state'):
        #        init_state=tf.zeros(dtype='float32',shape=(batch_size,2*output_size))
        #        init_state=tf.zeros_like(init_state)
        #        init_state=tf.split(init_state,num_or_size_splits=2,axis=-1)
        cell = BasicLSTMCell(output_size)
        init_state = cell.zero_state(batch_size=batch_size, dtype='float32')
        return init_state
示例#3
0
 def add_cell(self):
     lstm_cell = BasicLSTMCell(self.cell_size)
     self.cell_init_state = lstm_cell.zero_state(self.batch_size,
                                                 dtype=tf.float32)
     self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn(
         lstm_cell,
         self.l_in_y,
         initial_state=self.cell_init_state,
         time_major=False)
def rnn(features, mode, params):
    """ Recurrent model """
    if params.model == "LSTM":
        cell = BasicLSTMCell(params.hidden_size)
    elif params.model == "GRU":
        cell = GRUCell(params.hidden_size)
    else:
        cell = BasicRNNCell(params.hidden_size)

    initial_state = cell.zero_state(params.batch_size, dtype=tf.float64)

    if params.per_frame:
        # convert input from (batch_size, max_time, ...) to
        # (max_time, batch_size, ...)
        inputs = tf.transpose(features['feature'], [1, 0, 2])

        sequence_length = tf.reshape(features['sequence_length'],
                                     shape=(params.batch_size, ))

        outputs, state = tf.nn.dynamic_rnn(cell,
                                           inputs=inputs,
                                           initial_state=initial_state,
                                           sequence_length=sequence_length,
                                           time_major=True)

        # get output from the last state
        outputs = outputs[features['sequence_length'][0] - 1]
    else:
        # reshape MFCC vector to fit in one time step
        inputs = tf.reshape(features['feature'],
                            shape=(1, params.batch_size,
                                   params.max_length * params.feature_length))

        outputs, state = tf.nn.dynamic_rnn(cell,
                                           inputs=inputs,
                                           initial_state=initial_state,
                                           time_major=True)

        outputs = tf.reshape(outputs,
                             shape=(params.batch_size, params.hidden_size))

    # apply dropout
    dropout = tf.layers.dropout(outputs,
                                rate=params.dropout,
                                training=mode == tf.estimator.ModeKeys.TRAIN)

    logits = tf.layers.dense(dropout,
                             units=params.num_classes,
                             activation=None)

    return logits
示例#5
0
 def _add_bilstm_cell(self):
     # init the lstm cells. one for fwlstm, another for bwlstm.
     fw_lstm = BasicLSTMCell(num_units=self.hidden_layer,
                             forget_bias=1.0,
                             state_is_tuple=True)
     bw_lstm = BasicLSTMCell(num_units=self.hidden_layer,
                             forget_bias=1.0,
                             state_is_tuple=True)
     # define the init state for bwlstm and fwlstm
     self.fw_init_state = fw_lstm.zero_state(self.batch_size,
                                             dtype=tf.float32)
     self.bw_init_state = bw_lstm.zero_state(self.batch_size,
                                             dtype=tf.float32)
     output, final_state = tf.nn.bidirectional_dynamic_rnn(
         cell_fw=fw_lstm,
         cell_bw=bw_lstm,
         sequence_length=self.batch_size,
         inputs=self.input_layer_data,
         initial_state_bw=self.bw_init_state,
         initial_state_fw=self.fw_init_state)
     self.bilstm_output = tf.concat(output, 2)
     self.fw_final_state = final_state[0]
     self.bw_final_state = final_state[1]
class RecurrentController(BaseController):

    def network_vars(self):
        self.lstm_cell = BasicLSTMCell(256)
        self.state = self.lstm_cell.zero_state(self.batch_size, tf.float32)

    def network_op(self, X, state):
        X = tf.convert_to_tensor(X)
        return self.lstm_cell(X, state)

    def get_state(self):
        return self.state

    def update_state(self, new_state):
        return tf.no_op()
示例#7
0
class DilatedLSTM(object):
    def __init__(self,
                 inputs,
                 initial_state,
                 hidden_state_size,
                 max_steps,
                 num_cores=10,
                 pool_size=10):

        self.shared_cell = BasicLSTMCell(hidden_state_size)
        self.initial_state = initial_state
        self.max_steps = max_steps
        self.num_cores = num_cores
        self.pool_size = pool_size
        self.inputs = inputs
        self._build_ops()

    def _build_ops(self):
        i0 = tf.constant(0, dtype=tf.int32)
        loop_condition = lambda i, inputs, state: tf.less(i, self.max_steps)

        def body(i, inputs, full_state):
            idx = i % self.num_cores
            prev_state = full_state[idx]
            inputs, full_state[idx] = self.shared_cell(inputs, prev_state)

            return i + 1, inputs, full_state

        _, inputs, full_state = tf.while_loop(
            loop_condition,
            body,
            loop_vars=[i0, self.inputs, self.initial_state])

        lstm_outputs = tf.reshape(tf.concat(full_state, 1), [-1, 256])
        self.outpus = tf.avg_pool(tf.expand(lstm_outputs, -1),
                                  [1, self.pool_size, 1, 1],
                                  strides=[1, 1, 1, 1],
                                  padding='SAME')

    def zero_state(self):
        return [
            self.shared_cell.zero_state(
                tf.shape(self.max_steps)[0], tf.float32)
            for _ in range(self.stride)
        ]
示例#8
0
    def _outputs(self):
        cell = BasicLSTMCell(num_units=hidden_size)
        initial_state = cell.zero_state(batch_size, tf.float32)
        outputs_d_rnn, _states = tf.nn.dynamic_rnn(cell,
                                                   self.Input_data,
                                                   initial_state=initial_state,
                                                   dtype=tf.float32)

        # outputs_d_rnn = tf.Print(outputs_d_rnn,[outputs_d_rnn],"\n--PRINT-- outputs_d_rnn:\n",summarize=1000)
        # return outputs_d_rnn

        X_for_fc = tf.reshape(outputs_d_rnn, [-1, hidden_size])
        outputs_fc = fully_connected(inputs=X_for_fc,
                                     num_outputs=num_classes,
                                     activation_fn=None)
        outputs = tf.reshape(outputs_fc,
                             [batch_size, sequence_length, num_classes])
        return outputs
示例#9
0
X_train_vocab, X_train_vocab_rev = create_vocabulary(X_train)
hidden_size = len(X_train_vocab)
num_classes = len(X_train_vocab)

X_train_ids = sentence_to_token_ids(X_train, X_train_vocab)
X_data = X_train_ids[:-1]
Y_data = X_train_ids[1:]
X_data_one_hot = [token_ids_to_one_hot(X_data, num_classes)]
Y_data = [Y_data]

# ==============================================================================
X = tf.placeholder(tf.float32, [None, sequence_length, hidden_size])
Y = tf.placeholder(tf.int32, [None, sequence_length])

cell = BasicLSTMCell(num_units=hidden_size)
initial_state = cell.zero_state(batch_size, tf.float32)
outputs, _states = tf.nn.dynamic_rnn(cell,
                                     X,
                                     initial_state=initial_state,
                                     dtype=tf.float32)

X_for_fc = tf.reshape(outputs, [-1, hidden_size])
outputs = fully_connected(inputs=X_for_fc,
                          num_outputs=num_classes,
                          activation_fn=None)

outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes])

weights = tf.ones([batch_size, sequence_length])
sequence_loss = sequence_loss(logits=outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
    def build_model(self):
        # Encoder q(a|x)
        a_seq, a_mu, a_var = self.encoder(self.x)
        a_vae = a_seq

        # Initial state for the alpha RNN
        dummy_lstm = BasicLSTMCell(
            self.config.alpha_units *
            2 if self.config.learn_u else self.config.alpha_units)
        state_init_rnn = dummy_lstm.zero_state(self.config.batch_size,
                                               tf.float32)

        # Initialize Kalman filter (LGSSM)
        self.kf = KalmanFilter(
            dim_z=self.config.dim_z,
            dim_y=self.config.dim_a,
            dim_u=self.config.dim_u,
            dim_k=self.config.K,
            A=self.init_vars['A'],  # state transition function
            B=self.init_vars['B'],  # control matrix
            C=self.init_vars['C'],  # Measurement function
            R=self.init_vars['R'],  # measurement noise
            Q=self.init_vars['Q'],  # process noise
            y=a_seq,  # output
            u=None,
            mask=self.mask,
            mu=self.init_vars['mu'],
            Sigma=self.init_vars['Sigma'],
            y_0=self.init_vars['a_0'],
            alpha=self.alpha,
            state=state_init_rnn)

        # Get smoothed posterior over z
        smooth, A, B, C, alpha_plot = self.kf.smooth()

        # Get filtered posterior, used only for imputation plots
        filter, _, _, C_filter, _ = self.kf.filter()

        # Get a from the prior z (for plotting)
        a_mu_pred = tf.matmul(C,
                              tf.expand_dims(smooth[0], 2),
                              transpose_b=True)
        a_mu_pred_seq = tf.reshape(
            a_mu_pred, tf.stack((-1, self.ph_steps, self.config.dim_a)))
        if self.config.sample_z:
            a_seq = a_mu_pred_seq

        # Decoder p(x|a)
        x_hat, x_mu, x_var = self.decoder(a_seq)

        # Compute variables for generation from the model (for plotting)
        self.n_steps_gen = self.config.n_steps_gen  # We sample for this many iterations,
        self.out_gen_det = self.kf.sample_generative_tf(
            smooth,
            self.n_steps_gen,
            deterministic=True,
            init_fixed_steps=self.config.t_init_mask)
        self.out_gen = self.kf.sample_generative_tf(
            smooth,
            self.n_steps_gen,
            deterministic=False,
            init_fixed_steps=self.config.t_init_mask)
        self.out_gen_det_impute = self.kf.sample_generative_tf(
            smooth,
            self.test_data.timesteps,
            deterministic=True,
            init_fixed_steps=self.config.t_init_mask)
        self.out_alpha, _, _, _ = self.alpha(self.a_prev,
                                             state=state_init_rnn,
                                             u=None,
                                             init_buffer=True,
                                             reuse=True)

        # Collect generated model variables
        self.model_vars = dict(x_hat=x_hat,
                               x_mu=x_mu,
                               x_var=x_var,
                               a_seq=a_seq,
                               a_mu=a_mu,
                               a_var=a_var,
                               a_vae=a_vae,
                               smooth=smooth,
                               A=A,
                               B=B,
                               C=C,
                               alpha_plot=alpha_plot,
                               a_mu_pred_seq=a_mu_pred_seq,
                               filter=filter,
                               C_filter=C_filter)

        return self
示例#11
0
 def _add_cell(self):
     # init the lstm cell
     lstm = BasicLSTMCell(num_units=self.cell_size, forget_bias=1.0, state_is_tuple=True)
     self.init_state = lstm.zero_state(self.batch_size, dtype=tf.float32)
     self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn(cell=lstm, inputs=self.input_layer_data,
                                                                  initial_state=self.init_state, time_major=False)
示例#12
0
def main(model, T, n_epochs, n_batch, n_hidden, capacity, comp, FFT,
         learning_rate, decay):
    # --- Set data params ----------------
    #Create Data
    max_len_data = 100000000
    epoch_train, vocab_to_idx = file_data('train', n_batch, max_len_data, T,
                                          n_epochs, None)
    n_input = len(vocab_to_idx)
    epoch_val, _ = file_data('valid', n_batch, max_len_data, T, 10000,
                             vocab_to_idx)
    epoch_test, _ = file_data('test', n_batch, max_len_data, T, 1,
                              vocab_to_idx)
    n_output = n_input

    # --- Create graph and compute gradients ----------------------
    x = tf.placeholder("int32", [None, T])
    y = tf.placeholder("int64", [None, T])

    input_data = tf.one_hot(x, n_input, dtype=tf.float32)

    # Input to hidden layer
    cell = None
    h = None
    #h_b = None
    if model == "LSTM":
        cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1)
        if h == None:
            h = cell.zero_state(n_batch, tf.float32)
        hidden_out, states = tf.nn.dynamic_rnn(cell,
                                               input_data,
                                               dtype=tf.float32)
    elif model == "GRU":
        cell = GRUCell(n_hidden)
        if h == None:
            h = cell.zero_state(n_batch, tf.float32)
        hidden_out, states = tf.nn.dynamic_rnn(cell,
                                               input_data,
                                               dtype=tf.float32)
    elif model == "RNN":
        cell = BasicRNNCell(n_hidden)
        if h == None:
            h = cell.zero_state(n_batch, tf.float32)
        hidden_out, states = tf.nn.dynamic_rnn(cell,
                                               input_data,
                                               dtype=tf.float32)
    elif model == "EURNN":
        cell = EURNNCell(n_hidden, capacity, FFT, comp)
        if h == None:
            h = cell.zero_state(n_batch, tf.float32)
        if comp:
            hidden_out_comp, states = tf.nn.dynamic_rnn(cell,
                                                        input_data,
                                                        dtype=tf.complex64)
            hidden_out = tf.real(hidden_out_comp)
        else:
            hidden_out, states = tf.nn.dynamic_rnn(cell,
                                                   input_data,
                                                   dtype=tf.float32)
    elif model == "GORU":
        cell = GORUCell(n_hidden, capacity, FFT, comp)
        if h == None:
            h = cell.zero_state(n_batch, tf.float32)
        if comp:
            hidden_out_comp, states = tf.nn.dynamic_rnn(cell,
                                                        input_data,
                                                        dtype=tf.complex64)
            hidden_out = tf.real(hidden_out_comp)
        else:
            hidden_out, states = tf.nn.dynamic_rnn(cell,
                                                   input_data,
                                                   dtype=tf.float32)

    # Hidden Layer to Output
    V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input)

    V_weights = tf.get_variable("V_weights", shape = [n_hidden, n_output], \
      dtype=tf.float32, initializer=tf.random_uniform_initializer(-V_init_val, V_init_val))
    V_bias = tf.get_variable("V_bias", shape=[n_output], \
      dtype=tf.float32, initializer=tf.constant_initializer(0.01))

    hidden_out_list = tf.unstack(hidden_out, axis=1)
    temp_out = tf.stack([tf.matmul(i, V_weights) for i in hidden_out_list])
    output_data = tf.nn.bias_add(tf.transpose(temp_out, [1, 0, 2]), V_bias)

    # define evaluate process
    cost = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_data,
                                                       labels=y))
    correct_pred = tf.equal(tf.argmax(output_data, 2), y)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # --- Initialization ----------------------
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate,
                                          decay=decay).minimize(cost)
    init = tf.global_variables_initializer()

    for i in tf.global_variables():
        print(i.name)

    # --- save result ----------------------
    filename = "./output/character/text8/T=" + str(
        T) + "/" + model + "_N=" + str(
            n_hidden
        )  # + "_lambda=" + str(learning_rate) + "_beta=" + str(decay)

    if model == "EURNN" or model == "GORU":
        print(model)
        if FFT:
            filename += "_FFT"
        else:
            filename = filename + "_L=" + str(capacity)

    filename = filename + ".txt"
    if not os.path.exists(os.path.dirname(filename)):
        try:
            os.makedirs(os.path.dirname(filename))
        except OSError as exc:  # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    f = open(filename, 'w')
    f.write("########\n\n")
    f.write("## \tModel: %s with N=%d" % (model, n_hidden))
    if model == "EURNN" or model == "GORU":
        if FFT:
            f.write(" FFT")
        else:
            f.write(" L=%d" % (capacity))
    f.write("\n\n")
    f.write("########\n\n")

    # --- baseline -----

    # --- Training Loop ---------------------------------------------------------------

    # if saveTo == "my-model":
    # 	print("Autogenerating the save name")
    # 	saveTo = "nlp_"+str(model)+"_"+str(n_hidden)+"_"+str(capacity)+"_"+str(approx)+"_"+str(num_layers)
    # 	print("Save name is: " , saveTo)
    # 	savename="./output/nlp/"+str(saveTo)

    # 	if not os.path.exists(os.path.dirname(savename)):
    # 		try:
    # 			os.makedirs(os.path.dirname(savename))
    # 		except OSError as exc: # Guard against race condition
    # 			if exc.errno != errno.EEXIST:
    # 				raise

    def do_validation():
        j = 0
        val_losses = []
        for val in epoch_val:
            j += 1
            if j >= 2:
                break
            print("Running validation...")
            val_state = None
            for stepb, (X_val, Y_val) in enumerate(val):
                val_batch_x = X_val
                val_batch_y = Y_val
                val_dict = {x: val_batch_x, y: val_batch_y}
                if val_state is not None:
                    #This needs to be initialized from the original net creation.
                    val_dict[h] = val_state
                if notstates:
                    val_acc, val_loss = sess.run([accuracy, cost],
                                                 feed_dict=val_dict)
                else:
                    val_acc, val_loss, val_state = sess.run(
                        [accuracy, cost, states], feed_dict=val_dict)
                val_losses.append(val_loss)
        print("Validations:", )
        validation_losses.append(sum(val_losses) / len(val_losses))
        print("Validation Loss= " + \
            "{:.6f}".format(validation_losses[-1]))

        f.write("%d\t%f\n" % (t, validation_losses[-1]))
        f.flush()

    # saver = tf.train.Saver()

    step = 0
    with tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                          allow_soft_placement=False)) as sess:
        print("Session Created")

        # if loadFrom != "":
        # 	new_saver = tf.train.import_meta_graph(loadFrom+'.meta')
        # 	new_saver.restore(sess, tf.train.latest_checkpoint('./'))
        # 	print("Session loaded from: " , loadFrom)
        # else:
        # 	#summary_writer = tf.train.SummaryWriter('/tmp/logdir', sess.graph)
        # 	sess.run(init)

        steps = []
        losses = []
        accs = []
        validation_losses = []

        sess.run(init)
        training_state = None
        i = 0
        t = 0
        for epoch in epoch_train:
            print("Epoch: ", i)

            for step, (X, Y) in enumerate(epoch):
                batch_x = X
                batch_y = Y
                myfeed_dict = {x: batch_x, y: batch_y}
                if training_state is not None:
                    myfeed_dict[h] = training_state

                # if training_state is not None:
                # #	#This needs to be initialized from the original net creation.

                #myfeed_dict[h] = training_state
                # 	#print("State: " , training_state)
                #print("Comp : ", training_state[0])

                #print("Sum: " , sum([i*i for i in training_state[0]]))
                #print("Feed dict: " , myfeed_dict)
                if notstates:
                    _, acc, loss = sess.run([optimizer, accuracy, cost],
                                            feed_dict=myfeed_dict)
                else:
                    empty, acc, loss, training_state = sess.run(
                        [optimizer, accuracy, cost, states],
                        feed_dict=myfeed_dict)
                #print("Sum: " , sum([i*i for i in training_state[0]]))

                print("Iter " + str(step) + ", Minibatch Loss= " + \
                   "{:.6f}".format(loss) + ", Training Accuracy= " + \
                     "{:.5f}".format(acc))

                steps.append(t)
                losses.append(loss)
                accs.append(acc)
                t += 1

                if step % 5000 == 4999:
                    do_validation()
                    # saver.save(sess,savename)
                    #Now I need to take an epoch and go through it. I will average the losses at the end
                    # f2.write("%d\t%f\t%f\n"%(step, loss, acc))
                    # f.flush()
                    # f2.flush()
                # mystates = sess.run(states, feed_dict=myfeed_dict)
                # print ("States",training_state)

            i += 1

        print("Optimization Finished!")

        j = 0
        test_losses = []
        for test in epoch_test:
            j += 1
            if j >= 2:
                break
            print("Running validation...")
            test_state = None
            for stepb, (X_test, Y_test) in enumerate(test):
                test_batch_x = X_test
                test_batch_y = Y_test
                test_dict = {x: test_batch_x, y: test_batch_y}
                # if test_state is not None:
                #This needs to be initialized from the original net creation.
                # test_dict[h] = test_state
                test_acc, test_loss = sess.run([accuracy, cost],
                                               feed_dict=test_dict)
                test_losses.append(test_loss)
        print("test:", )
        test_losses.append(sum(test_losses) / len(test_losses))
        print("test Loss= " + \
            "{:.6f}".format(test_losses[-1]))
        f.write("Test result: %d\t%f\n" % (t, test_losses[-1]))
示例#13
0
X = np.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
Y = to_categorical(dataY)
'''
Create TF model
'''
units = 256
seq_length = 100
data_x = tf.placeholder(tf.float32, shape=(None, seq_length, 1))
data_y = tf.placeholder(tf.float32, shape=(None, n_vocab))
batch_size = tf.shape(data_x)[0]
#Create tf cell, api refrence: https://www.tensorflow.org/api_docs/python/tf/contrib/rnn
rnn_cell = BasicLSTMCell(num_units=units, forget_bias=1)
initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)

#Compute RNN
outputs, state = tf.nn.dynamic_rnn(cell=rnn_cell,
                                   inputs=data_x,
                                   initial_state=initial_state,
                                   dtype=tf.float32)

#Got from: https://danijar.com/introduction-to-recurrent-networks-in-tensorflow/
outputs = tf.transpose(outputs, [1, 0, 2])
last = tf.gather(outputs, int(outputs.get_shape()[0]) - 1)

to_forward = tf.nn.dropout(x=last, keep_prob=0.2)

#Dens: activation(dot(input, kernel) + bias)
w_kernel = tf.Variable(tf.random_uniform(shape=(units, n_vocab)))
示例#14
0
        n_hidden_units,
    ])),
    'out': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[
        n_classes,
    ]))
}
#X==>[128,28,28]
X = tf.reshape(x, [-1, n_input])
#x==>[128*28,28]
X_in = tf.matmul(X, weights['in']) + biases['in']
#X_in ==>[128*28,128]
X_in = tf.reshape(X_in, [-1, n_step, n_hidden_units])
#X_in ==>[128,28,128]

cell = BasicLSTMCell(n_hidden_units)
init_state = cell.zero_state(batch_size, dtype=tf.float32)
outputs, final_state = tf.nn.dynamic_rnn(cell, X_in, initial_state=init_state)
outputs = tf.unstack(tf.transpose(outputs, [1, 0, 2]))
pred = tf.matmul(outputs[-1], weights['out']) + biases['out']

loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
train_op = tf.train.AdamOptimizer(0.001).minimize(loss)

correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    step = 0
示例#15
0
    def __init__(self, **kwargs):
        '''The following arguments are accepted:

        Parameters
        ----------
        vocab_size  :   int
                        Size of the vocabulary for creating embeddings
        embedding_matrix    :   int
                                Dimensionality of the embedding space
        memory_size :   int
                        LSTM memory size
        keep_prob   :   float
                        Inverse of dropout percentage for embedding and LSTM
        subsequence_length  :   int
                                Length of the subsequences (all embeddings are padded to this
                                length)
        optimizer   :   OptimizerSpec
        '''
        ############################################################################################
        #                                 Get all hyperparameters                                  #
        ############################################################################################
        vocab_size = kwargs['vocab_size']
        embedding_size = kwargs['embedding_size']
        memory_size = kwargs['memory_size']
        keep_prob = kwargs['keep_prob']
        subsequence_length = kwargs['subsequence_length']
        optimizer_spec = kwargs['optimizer']
        optimizer = optimizer_spec.create()
        self.learning_rate = optimizer_spec.learning_rate
        self.step_counter = optimizer_spec.step_counter

        ############################################################################################
        #                                        Net inputs                                        #
        ############################################################################################
        self.batch_size = placeholder(tf.int32, shape=[], name='batch_size')
        self.is_training = placeholder(tf.bool, shape=[], name='is_training')
        self.word_ids = placeholder(tf.int32,
                                    shape=(None, subsequence_length),
                                    name='word_ids')
        self.labels = placeholder(tf.int32, shape=(None, ), name='labels')
        self.hidden_state = placeholder(tf.float32,
                                        shape=(None, memory_size),
                                        name='hidden_state')
        self.cell_state = placeholder(tf.float32,
                                      shape=(None, memory_size),
                                      name='cell_state')

        lengths = sequence_lengths(self.word_ids)

        ############################################################################################
        #                                        Embedding                                         #
        ############################################################################################
        self.embedding_matrix, _bias = get_weights_and_bias(
            (vocab_size, embedding_size))
        embeddings = cond(
            self.is_training, lambda: nn.dropout(nn.embedding_lookup(
                self.embedding_matrix, self.word_ids),
                                                 keep_prob=keep_prob),
            lambda: nn.embedding_lookup(self.embedding_matrix, self.word_ids))

        ############################################################################################
        #                                        LSTM layer                                        #
        ############################################################################################
        cell = BasicLSTMCell(memory_size, activation=tf.nn.tanh)

        # during inference, use entire ensemble
        keep_prob = cond(self.is_training, lambda: constant(keep_prob),
                         lambda: constant(1.0))
        cell = DropoutWrapper(cell, output_keep_prob=keep_prob)

        # what's the difference to just creating a zero-filled tensor tuple?
        self.zero_state = cell.zero_state(self.batch_size, tf.float32)
        state = LSTMStateTuple(h=self.cell_state, c=self.hidden_state)

        # A dynamic rnn creates the graph on the fly, so it can deal with embeddings of different
        # lengths. We do not need to unstack the embedding tensor to get rows, instead we compute
        # the actual sequence lengths and pass that
        # We are not sure how any of this works. Do we need to mask the cost function so the cell
        # outputs for _NOT_A_WORD_ inputs are ignored? Is the final cell state really relevant if it
        # was last updated with _NOT_A_WORD_ input? Does static_rnn absolve us of any of those
        # issues?
        outputs, self.state = nn.dynamic_rnn(cell,
                                             embeddings,
                                             sequence_length=lengths,
                                             initial_state=state)
        # Recreate tensor from list
        outputs = reshape(concat(outputs, 1),
                          [-1, subsequence_length * memory_size])
        self.outputs = reduce_mean(outputs)

        ############################################################################################
        #                        Fully connected layer, loss, and training                         #
        ############################################################################################
        ff1 = fully_connected(outputs, 2, with_activation=False, use_bias=True)
        loss = reduce_mean(
            nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels,
                                                        logits=ff1))
        self.train_step = optimizer.minimize(loss,
                                             global_step=self.step_counter)
        self.predictions = nn.softmax(ff1)
        correct_prediction = equal(cast(argmax(self.predictions, 1), tf.int32),
                                   self.labels)
        self.accuracy = reduce_mean(cast(correct_prediction, tf.float32))

        ############################################################################################
        #                                    Create summaraies                                     #
        ############################################################################################
        with tf.variable_scope('summary'):
            self.summary_loss = tf.summary.scalar('loss', loss)
            self.summary_accuracy = tf.summary.scalar('accuracy',
                                                      self.accuracy)
示例#16
0
    def __init__(self, args, batch_size, mode='train'):
        """The standard __init__ function."""
        logger = logging.getLogger(__name__)

        self.args = args
        self.config = config = self.args.config

        # Defining the epoch variables
        self.epoch = tf.Variable(0, trainable=False)
        self.epoch_incr = self.epoch.assign(self.epoch + 1)
        self.global_step = tf.Variable(0, trainable=False)

        # Used to update training schedule
        self.best_ppl = tf.Variable(10000.0, trainable=False, dtype=tf.float32)
        self.best_ppl_new = tf.placeholder(tf.float32, shape=())
        self.best_ppl_assign = self.best_ppl.assign(self.best_ppl_new)

        self.margin_ppl = tf.Variable(10000.0, trainable=False, dtype=tf.float32)
        self.margin_ppl_new = tf.placeholder(tf.float32, shape=())
        self.margin_ppl_assign = self.margin_ppl.assign(self.margin_ppl_new)

        self.last_ppl_update = tf.Variable(0, trainable=False)
        self.last_ppl_update_new = tf.placeholder(tf.int32, shape=())
        self.last_ppl_update_assign = self.last_ppl_update.assign(self.last_ppl_update_new)

        # Defining the loss interpolation constant
        self.l1 = tf.Variable(1.0, trainable=False, dtype=tf.float32)
        self.l1_new = tf.placeholder(tf.float32, shape=())
        self.l1_assign = self.l1.assign(self.l1_new)
        self.l2 = 1.0 - self.l1

        self.input_data = tf.placeholder(tf.int32, [batch_size, config.timesteps])
        self.targets = tf.placeholder(tf.int32, [batch_size, config.timesteps])

        # Taking inputs, applying dropout, passing through embeddings
        self.embedding = embedding = tf.get_variable("embedding", [args.vocab_size, config.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)
        if mode == 'train':
            inputs = tf.nn.dropout(inputs, keep_prob=config.input_keep_prob)

        # The whole BasicLSTMCell network
        cells = []
        initial_states = []
        for i in range(config.num_layers):
            cell = BasicLSTMCell(
                config.rnn_size, forget_bias=0.0, state_is_tuple=True, reuse=tf.get_variable_scope().reuse
            )
            if mode == 'train':
                cell = DropoutWrapper(
                    cell=cell,
                    output_keep_prob=config.intra_keep_prob,
                    state_keep_prob=config.state_keep_prob,
                    variational_recurrent=True,
                    dtype=tf.float32
                )
            cells.append(cell)
            initial_states.append(cell.zero_state(batch_size, tf.float32))
        self.cells = tuple(cells)
        self.initial_states = tuple(initial_states)

        # The actual LSTM computation, `self.initial_state` will be fed later on
        final_states = []
        outputs = []
        for i in range(config.num_layers):
            with tf.variable_scope("layer%d" % i):
                inputs, final_state = tf.nn.dynamic_rnn(
                    self.cells[i], inputs, initial_state=self.initial_states[i]
                )
            outputs.append(inputs)
            final_states.append(final_state)
        self.final_states = tuple(final_states)
        # Skip connections to make training easier
        self.outputs = tf.add_n(outputs)

        with tf.variable_scope('logits'):
            # Layer of logits before softmax after RNN
            if config.shared_embeddings is True:
                self.softmax_w = softmax_w = tf.transpose(embedding, [1, 0])
            else:
                self.softmax_w = softmax_w = tf.get_variable("softmax_w", [config.rnn_size, args.vocab_size])
            self.softmax_b = softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        # The output dropout has been applied in the DropoutWrapper
        output = tf.reshape(self.outputs, [-1, config.rnn_size])
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)

        # Store the actual probability values.
        # Used by evaluation function in some cases
        self.probs = tf.nn.softmax(self.logits)

        # Converting the distribution to a one hot vector
        self.distro1 = tf.reshape(tf.one_hot(self.targets, args.vocab_size), [-1, args.vocab_size])
        # Finding 1-D cross entropy loss tensor
        self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=tf.stop_gradient(self.distro1), logits=self.logits)
        # Scaling by interpolation values of L1
        self.cost = tf.reduce_sum(self.loss) / batch_size

        self.final_cost = self.cost

        if mode == 'eval':
            return

        # Defining the learning rate variables
        self.lr = tf.Variable(config.lr, trainable=False)
        self.lr_decay = self.lr.assign(self.lr * config.lr_decay)

        # Standard tricks to train LSTMs
        tvars = tf.trainable_variables()
        for variable in tvars:
            logger.info("%s - %s", variable.name, str(variable.get_shape()))
        self.grads, _ = tf.clip_by_global_norm(
            tf.gradients(self.final_cost, tvars),
            config.grad_clip
        )

        if config.optimizer == 'adam':
            optimizer = tf.train.AdamOptimizer(self.lr)
        else:
            optimizer = tf.train.GradientDescentOptimizer(self.lr)

        self.train_op = optimizer.apply_gradients(
            zip(self.grads, tvars),
            global_step=self.global_step
        )

        # Model savers
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
        self.best_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
class LanguageModel(object):
    built = False
    sen_comp_setup = False

    def __init__(self,
                 dataset,
                 lstm_hidden_size,
                 pretrained=False,
                 embedding_size=100,
                 project_size=512,
                 project=False,
                 restore_from=None,
                 model_dir=None,
                 log_dir=None):
        """
        Parameters
        ----------
        dataset: Dataset,
            Dataset instance holding train, test, eval datasets
        lstm_hidden_size: int,
            Number of hidden units in the LSTM
        pretrained: bool, default False
            Whether to use pretrained embeddings
        project: bool, False
            Whether to project after using larger LSTM
        project_size: int, default 512
            Final size to project to
        restore_from: str, default None
            Path to restore model from
        model_dir: str, default None
            Directory to save model to
        log_dir: str, default None
            Directory to write summaries to
        """
        graph = tf.Graph()
        graph.seed = SEED
        self.dataset = dataset
        self.lstm_hidden_size = lstm_hidden_size
        self.embedding_size = embedding_size
        self.project = False
        if project:
            self.project_size = project_size
        self.session = tf.Session(graph=graph)
        self.len_corpus = len(dataset.vocab)
        self.time_steps = dataset.train.shape[1] -1
        self.model_dir = model_dir

        with self.session.graph.as_default():
            self._embeddings(pretrained=pretrained)
            self._compute_cross_entropy_loss()
            self._optimizer()
            self._sentence_completion_setup()
            self._savers(log_dir=log_dir)
            self._summaries()

            if restore_from is not None:
                self.saver.restore(self.session, restore_from)
            else:
                self.session.run(tf.global_variables_initializer())

    def _savers(self, log_dir=None):
        """Creates saver and summary writer.

        Parameters
        ----------
        log_dir: str, default None
            Directory to log results to
        """
        self.summary_writer = tf.summary.FileWriter(log_dir)
        self.summary_writer.add_graph(self.session.graph)
        self.saver = tf.train.Saver(max_to_keep=1000)

    def _embeddings(self, pretrained=False, scope_name=None):
        """Compute word embeddings for sentence.

        Parameters
        ----------
        pretrained: bool, default False
            Whether to use pretrained embeddings
        scope_name: str, default None
            Variable scope
        """
        if not scope_name:
            scope_name = "Embedding"

        self.sentence_ph = tf.placeholder(dtype=tf.int32, shape=[None, self.time_steps + 1],
                                        name="Sentence_placeholder")

        with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE):
            self.embedding_matrix = tf.get_variable(
                name="embedding_matrix",
                shape=[self.len_corpus, self.embedding_size],
                initializer=xav_init()
            )

            if pretrained:
                print("Loading pretrained embeddings...")
                load_embedding(session=self.session,
                               vocab=self.dataset.word_to_idx,
                               emb=self.embedding_matrix,
                               path=self.dataset.embedding_file,
                               vocab_size=self.len_corpus,
                               dim_embedding=self.embedding_size)

            self.word_embeddings = tf.nn.embedding_lookup(self.embedding_matrix,
                                                          self.sentence_ph)

    def _build_rnn(self, trainable_zero_state=False, scope_name=None):
        """Sets up the LSTM and its unrolling."""
        if not scope_name:
            scope_name = "LSTM"

        with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE):
            self.lstm = BasicLSTMCell(num_units=self.lstm_hidden_size)
            batch_size = tf.shape(self.sentence_ph)[0]
            if not trainable_zero_state:
                state = self.lstm.zero_state(batch_size=batch_size, dtype=tf.float32)
            else:
                state = self._trainable_zero_state()
            if self.project:
                self._projection_layer()
            self._unroll_lstm(state=state)
            self._output_layer()
        self.built = True

    def _projection_layer(self, scope_name=None):
        """Creates the weight matrix for projection, when a larger LSTM is used."""
        if scope_name is None:
            scope_name = "Projection"

        with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE):
            self.project_W = tf.get_variable(name="proj_weights",
                shape=[self.lstm_hidden_size, self.project_size],
                dtype=tf.float32,
                initializer=xav_init()
            )

    def _unroll_lstm(self, state):
        """Unrolls the LSTM."""
        outputs = list()
        for time_step in range(self.time_steps):
            out, state = self.lstm(self.word_embeddings[:, time_step, :], state)
            out = tf.reshape(out, [-1, 1, self.lstm_hidden_size])
            outputs.append(out)
        self.output = tf.concat(outputs, axis=1)

        if self.project:
            self.output = tf.tensordot(self.output, self.project_W, axes=1)

    def _output_layer(self, scope_name=None):
        """Self explanatory."""
        if scope_name is None:
            scope_name = "Output_layer"
        if self.project:
            shape = [self.project_size, self.len_corpus]
        else:
            shape = [self.lstm_hidden_size, self.len_corpus]

        with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE):
            self.output_layer = dict()
            self.output_layer['weights'] = tf.get_variable(name="weights",
                shape=shape, dtype=tf.float32, initializer=xav_init())

            self.output_layer['bias'] = tf.get_variable(name='bias',
                shape=[self.len_corpus], dtype=tf.float32, initializer=xav_init()
            )

    def _compute_cross_entropy_loss(self):
        """Computes the loss for the LSTM. Masks out <pad> tokens from final loss."""
        if not self.built:
            print("Building the RNN Graph...")
            self._build_rnn()
        # Expected shape: 64 x 29 x 20000
        logits = tf.tensordot(self.output, self.output_layer["weights"], axes=1)
        logits = tf.add(logits, self.output_layer["bias"])

        #Expected shape: 64 x 29
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits,
            labels=self.sentence_ph[:,1:]
        )

        # Include a mask that filters out the pad tokens from the loss computation.
        pad_index = self.dataset.word_to_idx["<pad>"]
        # Mask Tensor, with 0s whereever <pad> token is present
        self.not_pads = tf.not_equal(self.sentence_ph[:, 1:], 2)
        self.not_pads = tf.cast(self.not_pads, cross_entropy.dtype)

        self.cross_entropy_masked = tf.multiply(cross_entropy, self.not_pads)
        self.sentence_lengths = tf.reduce_sum(self.not_pads, axis=1)
        # Expected shape: (64, )
        cross_entropy_batch = tf.reduce_sum(self.cross_entropy_masked, axis=1)
        self.batch_loss = cross_entropy_batch / self.sentence_lengths
        self.batch_perplexity = tf.exp(self.batch_loss)
        self.loss_avg = tf.reduce_mean(self.batch_loss)
        self.perplexity_avg = tf.reduce_mean(self.batch_perplexity) # Batch averaged perplexity

    def _summaries(self):
        """Creates summaries to log."""
        # Train summaries
        self.train_loss_summary = tf.summary.scalar('train/batch_averaged_loss', self.loss_avg)
        self.train_perplexity_summary = tf.summary.scalar('train/batch_averaged_perplexity', self.perplexity_avg)
        train_summaries = [self.train_loss_summary, self.train_perplexity_summary]
        self.train_summaries = tf.summary.merge(train_summaries, name="train_summaries")

        # Test summaries
        self.eval_loss_ph = tf.placeholder(tf.float32)
        self.eval_perplexity_ph = tf.placeholder(tf.float32)
        self.eval_loss_summary = tf.summary.scalar('eval/averaged_loss', self.eval_loss_ph)
        self.eval_perplexity_summary = tf.summary.scalar('eval/averaged_perplexity', self.eval_perplexity_ph)
        eval_summaries = [self.eval_loss_summary, self.eval_perplexity_summary]
        self.eval_summaries = tf.summary.merge(eval_summaries, name="eval_summaries")

    def _optimizer(self):
        """Defines the optimizer."""
        with tf.variable_scope("Optimizer", reuse=tf.AUTO_REUSE):
            self.optimizer = tf.train.AdamOptimizer()
            gradients, variables = zip(*self.optimizer.compute_gradients(self.loss_avg))
            gradients, _ = tf.clip_by_global_norm(gradients, clip_norm=5.0)
            self.optimize_op = self.optimizer.apply_gradients(zip(gradients, variables))

    def evaluate(self, batch_size=64, timestep=None, verbose=False):
        """Computes loss and perplexity on the eval dataset."""
        losses, perplexities = [], []
        fetches = [self.batch_loss, self.batch_perplexity]

        for batch in self.dataset.batch_generator(mode="eval", batch_size=batch_size):
            feed_dict = {self.sentence_ph: batch}
            batch_loss, batch_perplexity = self.session.run(fetches=fetches, feed_dict=feed_dict)
            losses.extend(batch_loss)
            perplexities.extend(batch_perplexity)

        mean_eval_loss = np.mean(losses)
        mean_eval_perplexity = np.mean(perplexities)

        fetches = self.eval_summaries
        feed_dict = {self.eval_loss_ph: mean_eval_loss,
                     self.eval_perplexity_ph: mean_eval_perplexity}
        eval_summaries = self.session.run(fetches=fetches, feed_dict=feed_dict)
        self.summary_writer.add_summary(eval_summaries, timestep)

        if verbose:
            print("Evaluation Loss: {0:.3f}".format(mean_eval_loss))
            print("Evaluation Perplexity: {0:.3f}".format(mean_eval_perplexity))

    def fit(self, num_epochs=10, batch_size=64, eval_every=10, verbose=False):
        """Trains the LSTM."""
        start_time = time.time()
        for epoch in range(num_epochs):
            model_dir_epoch = os.path.join(self.model_dir, str(epoch+1))
            if not os.path.exists(model_dir_epoch):
                os.makedirs(model_dir_epoch)

            for n_batch, train_batch in enumerate(self.dataset.batch_generator(mode="train", batch_size=batch_size, shuffle=True)):
                fetches = [self.loss_avg, self.perplexity_avg, self.optimize_op, self.train_summaries]
                feed_dict = {self.sentence_ph: train_batch}
                timestep = self.dataset.train.shape[0]/batch_size * epoch + n_batch

                loss, perplexity, _, train_summaries = self.session.run(fetches=fetches, feed_dict=feed_dict)

                if (n_batch + 1) % eval_every == 0:
                    self.summary_writer.add_summary(train_summaries, timestep)
                    if verbose:
                        print("Epoch {}, Batch: {}".format(epoch+1, n_batch+1))
                        print("Training loss: {0:.3f}".format(loss))
                        print("Training perplexity: {0:.3f}".format(perplexity))

            print("Computing loss and perplexity on eval data. Epoch {}, Timestep: {}".format(epoch+1, timestep))
            self.evaluate(timestep=timestep, verbose=verbose)
            print()

            model_savepath = os.path.join(model_dir_epoch, "model.ckpt")
            save_path = self.saver.save(sess=self.session, save_path=model_savepath)

    def _sentence_completion_setup(self):
        """Setup for the sentence completion task."""
        self.state_c = tf.placeholder(tf.float32, [1, self.lstm_hidden_size])
        self.state_h = tf.placeholder(tf.float32, [1, self.lstm_hidden_size])
        self.word_ph = tf.placeholder(dtype=tf.int32, shape=[1], name="Word_placeholder")
        self.word_embedding = tf.nn.embedding_lookup(self.embedding_matrix, self.word_ph)

        state = tf.contrib.rnn.LSTMStateTuple(self.state_c, self.state_h)
        out, self.next_state = self.lstm(self.word_embedding, state)

        if(self.project):
            out = tf.matmul(out, self.projection["weights"])
        logits_word = tf.matmul(out, self.output_layer["weights"])
        logits_word = tf.add(logits_word, self.output_layer["bias"])
        self.logits = tf.reshape(logits_word, [20000])
        self.sen_comp_setup = True

    def complete_sentence(self, words, max_len=20):
        """Completes a sentence, given the initial words.

        Parameters
        ----------
        words: list,
            List of starting words
        max_len: int, default 20
            Maximum length of sentence if <eos> is not generated.
        """
        words_copied = words.copy()
        words_copied.insert(0, "<bos>")

        sentence = list()
        state_c = np.zeros((1, self.lstm_hidden_size))
        state_h = np.zeros((1, self.lstm_hidden_size))
        word_predicted = None

        sentence_length = 0
        unk_idx = self.dataset.word_to_idx["<unk>"]

        for word in words_copied:
            sentence.append(word)
            word_idx = self.dataset.word_to_idx.get(word, unk_idx)

            fetches = [self.next_state, self.logits]
            word_idx_array = np.array([word_idx])
            feed_dict = {self.word_ph: word_idx_array,
                         self.state_c: state_c,
                         self.state_h: state_h}
            state, logits = self.session.run(fetches, feed_dict)
            state_c, state_h = (state.c, state.h)

            logits[0] = np.finfo(float).min
            logits[2:4] = np.finfo(float).min
            word_predicted = self.dataset.idx_to_word[np.argmax(logits)]

        sentence_length = len(sentence) - 1

        while (sentence_length < max_len and word_predicted != "<eos>"):
            word = word_predicted
            word_idx = self.dataset.word_to_idx.get(word, unk_idx)

            fetches = [self.next_state, self.logits]
            word_idx_array = np.array([word_idx])
            feed_dict = {self.word_ph: word_idx_array,
                         self.state_c: state_c,
                         self.state_h: state_h}
            state, logits = self.session.run(fetches, feed_dict)
            state_c, state_h = (state.c, state.h)

            # Decide next word
            logits[0] = np.finfo(float).min
            logits[2:4] = np.finfo(float).min
            word_predicted = self.dataset.idx_to_word[np.argmax(logits)]

            sentence.append(word_predicted)
            sentence_length += 1
        sentence = " ".join(sentence[1:])
        return sentence

    def complete_sentences(self, data_filename, sol_filename, max_len=20, log_every=100):
        """Completes the sentences in given file.

        Parameters
        ----------
        data_filename: str,
            Filename containing the sentences to complete
        sol_filename: str,
            Filename to write the completed sentence to
        max_len: int, default 20
            Maximum allowed length of sentence.
        """
        if not self.sen_comp_setup:
            self._sentence_completion_setup()

        print("Starting to write sentences...")
        f1 = open(sol_filename, "w")
        f2 = open(data_filename, "r")
        num_lines = 0
        for idx, sentence in enumerate(f2.readlines()):
            words = sentence.strip().split(" ")
            completed_sentence = self.complete_sentence(words, max_len=max_len)
            f1.write(completed_sentence + "\n")
            num_lines += 1
            if num_lines % log_every == 0:
                print("Finished writing {} sentences.".format(num_lines))
        f1.close()
        f2.close()
        print("Finished writing sentences.")

    def compute_perplexity(self, batch):
        """Wrapper function to compute batch perplexity, one for each sentence."""
        fetches = self.perplexity_avg
        feed_dict = {self.sentence_ph: batch}
        return self.session.run(fetches, feed_dict)

    def save_perplexity_to_file(self, filename, log_every=100):
        """Saves perplexity computations to file.

        Parameters
        ----------
        Filename: str,
            File to write perplexity values to
        """
        print("Starting to save perplexity values...")
        with open(filename, "w") as f:
            num_lines = 0
            for idx, test_sentence in enumerate(self.dataset.batch_generator(mode="test", batch_size=1, shuffle=False)):
                perplexity = self.compute_perplexity(test_sentence)
                f.write(str(perplexity) + "\n")
                num_lines += 1
                if num_lines % log_every == 0:
                    print("Finished calculating perplexity for {} sentences.".format(num_lines))
        print("Finished writing perplexity values.")
示例#18
0
    input_Y = tf.placeholder(tf.float32, (None, 2, 1), 'input_Y')

    conv1_out = tf.layers.conv1d(input_X, 2, 13, activation=tf.nn.relu, name='conv1')
    conv2_out = tf.layers.conv1d(conv1_out, 2, 13, activation=tf.nn.relu, name='conv2')

    pooling_out = tf.layers.average_pooling1d(conv2_out, 2, 2, name='pooling')

    conv3_out = tf.layers.conv1d(pooling_out, 4, 5, activation=tf.nn.relu, name='conv3')
    conv4_out = tf.layers.conv1d(conv3_out, 4, 5, activation=tf.nn.relu, name='conv4')

    pooling1_out = tf.layers.average_pooling1d(conv4_out, 2, 2, name='pooling1')

    resort_out = tf.transpose(pooling1_out, [1, 0, 2], name='resort')

    lstm_layer = BasicLSTMCell(1)
    state = lstm_layer.zero_state(batch_size, tf.float32)

    out = []
    for i in range(2):
        output, state = lstm_layer(resort_out[i], state)
        out.append(output)

    out_gather = [conv4_out, pooling1_out, resort_out, out]
    init_op = tf.global_variables_initializer()

sess = tf.Session(graph=df_graph)
sess.run(init_op)
train_writer = tf.summary.FileWriter('./cnn_lstm', sess.graph, flush_secs=5)
out_run = sess.run(out_gather, feed_dict={input_X: X, input_Y: Y, batch_size: X.shape[0]})
[print(np.array(x).shape) for x in out_run]
示例#19
0
class LstmAgent(AbstractAgent):
    @property
    def seq_len(self):
        if self._seq_len:
            return self._seq_len
        return 8

    def __init__(self, batch_size: int, layer_size: int, device_num: int,
                 **kwargs):
        self.batch_size = batch_size
        self.layer_size = layer_size
        with tf.device('/gpu:' + str(device_num)):
            state_args = tf.float32, [batch_size, layer_size]
            self.S = LSTMStateTuple(c=tf.placeholder(*state_args, name='C'),
                                    h=tf.placeholder(*state_args, name='H'))
            self.lstm = BasicLSTMCell(layer_size)
        super().__init__(batch_size=batch_size,
                         layer_size=layer_size,
                         device_num=device_num,
                         **kwargs)
        self.initial_state = self.sess.run(
            self.lstm.zero_state(batch_size, tf.float32))
        assert np.shape(self.initial_state) == (2, batch_size, layer_size)
        assert self.S.c.shape == self.S.h.shape == (batch_size, layer_size)

    def network(self, inputs: tf.Tensor, reuse=False) -> tf.Tensor:
        split_inputs = tf.split(inputs, self.seq_len, axis=1)
        s = self.S
        for x in split_inputs:
            x = tf.squeeze(x, axis=1)
            outputs = NetworkOutput(*self.lstm(x, s))
        return outputs

    def state_feed(self, states):
        return dict(zip(self.S, states))

    def train_step(self, step: Step) -> dict:
        assert np.shape(step.s) == np.shape(self.initial_state)
        if feed_dict is None:
            feed_dict = {
                **self.state_feed(step.s),
                **{
                    self.O1: step.o1,
                    self.A: step.a,
                    self.R: np.array(step.r) * self.reward_scale,
                    self.O2: step.o2,
                    self.T: step.t
                }
            }
        return super().train_step(step)

    def q_network(self, o: tf.Tensor, a: tf.Tensor, name: str, reuse: bool = None) \
            -> tf.Tensor:
        with tf.variable_scope(name, reuse=reuse):
            o = self.network(o).output
            oa = tf.concat([o, a], axis=1)
            return tf.reshape(tf.layers.dense(oa, 1, name='q'), [-1])

    def get_actions(self, o: ArrayLike, sample: bool = True, state=None) \
            -> Tuple[np.ndarray, LSTMStateTuple]:
        assert len(np.shape(o)) == 1
        assert np.shape(state) == np.shape(self.initial_state)
        feed_dict = {**{self.O1: [[o]]}, **self.state_feed(state)}
        A = self.A_sampled1 if sample else self.A_max_likelihood
        return self.sess.run([A[0], self.S_new], feed_dict)
示例#20
0
    def build_graph(self):
        self.logger.info("start building graph")
        english_input = tf.placeholder(tf.int32,
                                       [self.batch_size, self.topic_num],
                                       name="english_input")
        chinese_input = tf.placeholder(tf.int32,
                                       [self.batch_size, self.topic_num],
                                       name="chinese_input")

        Y = tf.placeholder(tf.float32, [self.batch_size], name="scores")

        # embedding layer
        with tf.variable_scope("embdding"):
            en_embeddings = []
            zh_embeddings = []
            for i in range(self.topic_num):
                english_ids = tf.slice(english_input, [0, i],
                                       [self.batch_size, 1])
                chinese_ids = tf.slice(chinese_input, [0, i],
                                       [self.batch_size, 1])
                embedding_en = tf.Variable(
                    tf.random_normal([self.fea_dim, self.hidden_size]),
                    name="en_topic_%d_embedding" % (i + 1),
                    dtype=tf.float32)
                embedding_zh = tf.Variable(
                    tf.random_normal([self.fea_dim, self.hidden_size]),
                    name="zh_topic_%d_embedding" % (i + 1),
                    dtype=tf.float32)
                en_embeddings.append(
                    tf.nn.embedding_lookup(embedding_en, english_ids))
                zh_embeddings.append(
                    tf.nn.embedding_lookup(embedding_zh, chinese_ids))

            english_embedding = tf.concat(en_embeddings, 1)
            chinese_embedding = tf.concat(zh_embeddings, 1)
            english_embedding = tf.reshape(
                english_embedding,
                [self.batch_size, self.topic_num, self.hidden_size])
            chinese_embedding = tf.reshape(
                chinese_embedding,
                [self.batch_size, self.topic_num, self.hidden_size])

        # lstm layer
        two_lstm_outputs = []
        for i in range(2):
            with tf.variable_scope("lstm-%s" % chr(ord('a') + i)):
                if i == 0:
                    X = english_embedding
                else:
                    X = chinese_embedding

                cell = BasicLSTMCell(num_units=self.hidden_size)
                initial_state = cell.zero_state(self.batch_size, tf.float32)
                outputs, _states = tf.nn.dynamic_rnn(
                    cell, X, initial_state=initial_state, dtype=tf.float32)

                outputs = tf.slice(outputs, [0, self.topic_num - 1, 0],
                                   [self.batch_size, 1, self.hidden_size])
                two_lstm_outputs.append(
                    tf.reshape(outputs, [-1, self.hidden_size]))

        # concat and reshape output
        # concat_outputs = tf.concat(two_lstm_outputs, 1)
        # concat_outputs = tf.reshape(concat_outputs, [-1, 2*self.hidden_size])
        # full connected layer
        # w = tf.Variable(tf.random_normal([2*self.hidden_size, 1]), name="weight", dtype=tf.float32)
        # b = tf.Variable(tf.constant(1.0), name="bias", dtype=tf.float32)
        # y = tf.matmul(concat_outputs, w) + b
        # y = tf.exp(-tf.nn.relu(y))

        # get lstm_a output and lstm_b output
        lstm_a_output = two_lstm_outputs[0]
        lstm_b_output = two_lstm_outputs[1]

        # cosine similariy
        numerator = tf.reduce_sum(lstm_a_output * lstm_b_output, 1)
        denominator = tf.sqrt(tf.reduce_sum(
            tf.square(lstm_a_output), 1)) * tf.sqrt(
                tf.reduce_sum(tf.square(lstm_b_output), 1))
        y = 1 - (tf.acos((numerator / denominator)) / tf.constant(3.141592653))
        # Euclidean distance
        # y = tf.exp(-tf.sqrt(tf.reduce_sum(tf.square(lstm_a_output - lstm_b_output), 1)))

        # reshape y
        y = tf.reshape(y, [self.batch_size])

        self.global_step = tf.Variable(0, trainable=False)
        self.learning_rate = tf.train.exponential_decay(0.1,
                                                        self.global_step,
                                                        10,
                                                        2,
                                                        staircase=False)
        self.loss_op = tf.reduce_mean(tf.square(y - Y))
        self.train_op = tf.train.AdamOptimizer(learning_rate=0.1).minimize(
            self.loss_op, global_step=self.global_step)

        tf.summary.scalar("loss", self.loss_op)
        tf.summary.histogram("prediction", y)
        tf.summary.histogram("labels", Y)

        self.prediction = y

        self.init = tf.global_variables_initializer()

        # 导出图
        # print("exporting meta graph......")
        # tf.train.export_meta_graph(filename=self.model_path+"model.ckpt.meta")

        self.logger.info("Done! building graph")