def discriminator_stego_nn(self, img, reuse=False):
        with tf.variable_scope('S_network'):

            if reuse:
                tf.get_variable_scope().reuse_variables()

            net = img
            net = self.image_processing_layer(img)
            net = self.batch_norm(net, scope='d_s_bn0')
            net = conv2d(net, self.df_dim, kernel_size=[5, 5], stride=[2, 2],
                         activation_fn=self.leaky_relu, scope='d_s_h0_conv')

            net = self.batch_norm(net, scope='d_s_bn1')
            net = conv2d(net, self.df_dim * 2, kernel_size=[5, 5], stride=[2, 2],
                         activation_fn=self.leaky_relu, scope='d_s_h1_conv')

            net = self.batch_norm(net, scope='d_s_bn2')
            net = conv2d(net, self.df_dim * 4, kernel_size=[5, 5], stride=[2, 2],
                         activation_fn=self.leaky_relu, scope='d_s_h2_conv')

            net = self.batch_norm(net, scope='d_s_bn3')
            net = conv2d(net, self.df_dim * 8, kernel_size=[5, 5], stride=[2, 2],
                         activation_fn=self.leaky_relu, scope='d_s_h3_conv')

            net = self.batch_norm(net, scope='d_s_bn4')

            net = tf.reshape(net, [self.conf.batch_size, -1])
            net = linear(net, 1, activation_fn=tf.nn.sigmoid, scope='d_s_h4_lin',
                         weights_initializer=tf.random_normal_initializer(stddev=0.02))

            return net
    def generator_nn(self, noise, train=True):
        with tf.variable_scope('G_network'):
            if not train:
                tf.get_variable_scope().reuse_variables()

            gen = linear(noise, self.gf_dim * 8 * 4 * 4, scope='g_h0_lin',
                         activation_fn=None, weights_initializer=tf.random_normal_initializer(stddev=0.02))

            gen = tf.reshape(gen, [-1, 4, 4, self.gf_dim * 8])
            # gen = self.batch_norm(gen, reuse=(not train), scope='g_bn0')
            gen = self.g_bn0(gen, train=train)
            gen = tf.nn.relu(gen)

            gen = self.conv2d_transpose(gen, [self.conf.batch_size, 8, 8, self.gf_dim * 4], name='g_h1')
            # gen = self.batch_norm(gen, reuse=(not train), scope='g_bn1')
            gen = self.g_bn1(gen, train=train)
            gen = tf.nn.relu(gen)

            gen = self.conv2d_transpose(gen, [self.conf.batch_size, 16, 16, self.gf_dim * 2], name='g_h2')
            # gen = self.batch_norm(gen, reuse=(not train), scope='g_bn2')
            gen = self.g_bn2(gen, train=train)
            gen = tf.nn.relu(gen)

            gen = self.conv2d_transpose(gen, [self.conf.batch_size, 32, 32, self.gf_dim * 1], name='g_h3')
            # gen = self.batch_norm(gen, reuse=(not train), scope='g_bn3')
            gen = self.g_bn3(gen, train=train)
            gen = tf.nn.relu(gen)

            out = self.conv2d_transpose(gen, [self.conf.batch_size, 64, 64, self.c_dim], name='g_out')

            return tf.nn.tanh(out)
Exemplo n.º 3
0
def softmax_model(X, Y_, mode):
    Ylogits = layers.linear(X, 10)
    predict = tf.nn.softmax(Ylogits)
    classes = tf.cast(tf.argmax(predict, 1), tf.uint8)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10)))*100
    train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.003, "Adam")
    return {"predictions":predict, "classes": classes}, loss, train_op
Exemplo n.º 4
0
Arquivo: task.py Projeto: spwcd/QTML
def conv_model(X, Y_, mode):
    XX = tf.reshape(X, [-1, 28, 28, 1])
    biasInit = tf.constant_initializer(0.1, dtype=tf.float32)
    Y1 = layers.conv2d(XX,  num_outputs=6,  kernel_size=[6, 6], biases_initializer=biasInit)
    Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2, biases_initializer=biasInit)
    Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2, biases_initializer=biasInit)
    Y4 = layers.flatten(Y3)
    Y5 = layers.relu(Y4, 200, biases_initializer=biasInit)
    # to deactivate dropout on the dense layer, set keep_prob=1
    Y5d = layers.dropout(Y5, keep_prob=0.75, noise_shape=None, is_training=mode==learn.ModeKeys.TRAIN)
    Ylogits = layers.linear(Y5d, 10)
    predict = tf.nn.softmax(Ylogits)
    classes = tf.cast(tf.argmax(predict, 1), tf.uint8)

    loss = conv_model_loss(Ylogits, Y_, mode)
    train_op = conv_model_train_op(loss, mode)
    eval_metrics = conv_model_eval_metrics(classes, Y_, mode)

    return learn.ModelFnOps(
        mode=mode,
        # You can name the fields of your predictions dictionary as you like.
        predictions={"predictions": predict, "classes": classes},
        loss=loss,
        train_op=train_op,
        eval_metric_ops=eval_metrics
    )
Exemplo n.º 5
0
def discriminator_2layer(H, opt, dropout, prefix='', num_outputs=1, is_reuse=None):
    # last layer must be linear
    # H = tf.squeeze(H, [1,2])
    # pdb.set_trace()
    biasInit = tf.constant_initializer(0.001, dtype=tf.float32)
    H_dis = layers.fully_connected(tf.nn.dropout(H, keep_prob=dropout), num_outputs=opt.H_dis,
                                   biases_initializer=biasInit, activation_fn=tf.nn.relu, scope=prefix + 'dis_1',
                                   reuse=is_reuse)
    logits = layers.linear(tf.nn.dropout(H_dis, keep_prob=dropout), num_outputs=num_outputs,
                           biases_initializer=biasInit, scope=prefix + 'dis_2', reuse=is_reuse)
    return logits
    def network(self):
        net = self.images

        net = self.image_processing_layer(net)

        def get_init():
            return tf.truncated_normal_initializer(stddev=0.02)

        net = conv2d(net, 10, [7, 7], activation_fn=tf.nn.relu, name='conv1', weights_initializer=get_init())
        net = conv2d(net, 20, [5, 5], activation_fn=tf.nn.relu, name='conv2', weights_initializer=get_init())
        net = tf.nn.max_pool(net, [1, 4, 4, 1], [1, 1, 1, 1], padding='SAME')

        net = conv2d(net, 30, [3, 3], activation_fn=tf.nn.relu, name='conv3', weights_initializer=get_init())
        net = conv2d(net, 40, [3, 3], activation_fn=tf.nn.relu, name='conv4', weights_initializer=get_init())

        net = tf.nn.max_pool(net, [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME')

        net = tf.reshape(net, [self.conf.batch_size, -1])

        net = linear(net, 100, activation_fn=tf.nn.tanh, name='FC1')
        out = linear(net, 2, activation_fn=tf.nn.softmax, name='out')
        return out
def _logistic_regression_model_fn(features, labels, mode):
  _ = mode
  logits = layers.linear(
      features,
      1,
      weights_initializer=init_ops.zeros_initializer(),
      # Intentionally uses really awful initial values so that
      # AUC/precision/recall/etc will change meaningfully even on a toy dataset.
      biases_initializer=init_ops.constant_initializer(-10.0))
  predictions = math_ops.sigmoid(logits)
  loss = loss_ops.sigmoid_cross_entropy(logits, labels)
  train_op = optimizers.optimize_loss(
      loss, variables.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
  return predictions, loss, train_op
Exemplo n.º 8
0
def conv_model(X, Y_):
   XX = tf.reshape(X, [-1, 28, 28, 1])
   Y1 = layers.conv2d(XX,  num_outputs=6,  kernel_size=[6, 6])
   Y2 = layers.conv2d(Y1,  num_outputs=12, kernel_size=[5, 5], stride=2)
   Y3 = layers.conv2d(Y2,  num_outputs=24, kernel_size=[4, 4], stride=2)
   Y4 = layers.flatten(Y3)
   Y5 = layers.relu(Y4, 200)
   Ylogits = layers.linear(Y5, 10)
   predict = tf.nn.softmax(Ylogits)

   classes = tf.cast(tf.argmax(predict, 1), tf.uint8)
   loss = tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10))
   train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.003, "Adam")
   return {"predictions":predict, "classes": classes}, loss, train_op
Exemplo n.º 9
0
def conv_model(X, Y_, mode):
    XX = tf.reshape(X, [-1, 28, 28, 1])
    biasInit = tf.constant_initializer(0.1, dtype=tf.float32)
    Y1 = layers.conv2d(XX,  num_outputs=6,  kernel_size=[6, 6], biases_initializer=biasInit)
    Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2, biases_initializer=biasInit)
    Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2, biases_initializer=biasInit)
    Y4 = layers.flatten(Y3)
    Y5 = layers.relu(Y4, 200, biases_initializer=biasInit)
    Ylogits = layers.linear(Y5, 10)
    predict = tf.nn.softmax(Ylogits)
    classes = tf.cast(tf.argmax(predict, 1), tf.uint8)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10)))*100
    train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.001, "Adam")
    return {"predictions":predict, "classes": classes}, loss, train_op
# Define the zero state of the cell
initial_state = cell.zero_state(mini_batch_size, tf.float32)

# Launch dynamic RNN Network with specified cell and initial state
# We use time_major=False because we would need to transpose the input on our own otherwise
rnn_outputs, _rnn_states = tf.nn.dynamic_rnn(cell, x,
                            initial_state=initial_state, time_major=False)

# Get the last $eval_time_steps timestep(s) for training
rnn_outputs_on_last_t_step =  tf.slice(
                            rnn_outputs,
                            [0, n_in_time_steps - (1+eval_time_steps), 0],
                            [mini_batch_size, eval_time_steps, n_units])

# Project output from rnn output size to n_output
final_projection = lambda z: layers.linear(z, num_outputs=n_output,
                            activation_fn=tf.nn.sigmoid)

# Apply projection to every time step
predicted = tf.map_fn(final_projection, rnn_outputs_on_last_t_step)

# Error and backprop
error = tf.nn.l2_loss(tf.subtract(tf.abs(y),tf.abs(predicted)))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(error)

# Prediction error and accuracy
accuracy = tf.reduce_mean(tf.subtract(tf.abs(y),tf.abs(predicted)))


#-------------------------------------------------------------------------------
# RUN THE NETWORK
#-------------------------------------------------------------------------------
Exemplo n.º 11
0
def generator(x, hidden_size):
  with tf.variable_scope('Generator'):
    h0 = tf.nn.softplus(layers.linear(x, hidden_size))
    return layers.linear(h0, 1)
def inference_net(x, latent_size):
    return layers.linear(x, latent_size)
Exemplo n.º 13
0
# naive dropout
dropcells = [rnn.DropoutWrapper(each,input_keep_prob=pkeep) for each in cells]
multicell = rnn.MultiRNNCell(dropcells, state_is_tuple=False)
multicell = rnn.DropoutWrapper(multicell, output_keep_prob=pkeep)  # dropout for the softmax layer

Yr, H = tf.nn.dynamic_rnn(multicell, Xo, dtype=tf.float32, initial_state=Hin)

H = tf.identity(H, name='H')

# Softmax layer implementation:
# Flatten the first two dimension of the output [ BATCHSIZE, SEQLEN, ALPHASIZE ] => [ BATCHSIZE x SEQLEN, ALPHASIZE ]
# then apply softmax readout layer. This way, the weights and biases are shared across unrolled time steps.
# From the readout point of view, a value coming from a sequence time step or a minibatch item is the same thing.

Y_flat = tf.reshape(Yr, [-1, INTERNALSIZE])    # [ BATCHSIZE x SEQLEN, INTERNALSIZE ]
Ylogits = layers.linear(Y_flat, ALPHASIZE)     # [ BATCHSIZE x SEQLEN, ALPHASIZE ]
Y_flat_ = tf.reshape(Yo_, [-1, ALPHASIZE])     # [ BATCHSIZE x SEQLEN, ALPHASIZE ]
loss = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_flat_)  # [ BATCHSIZE x SEQLEN ]
loss = tf.reshape(loss, [batchsize, -1])      # [ BATCHSIZE, SEQLEN ]
Yo = tf.nn.softmax(Ylogits, name='Yo')        # [ BATCHSIZE x SEQLEN, ALPHASIZE ]
Y = tf.argmax(Yo, 1)                          # [ BATCHSIZE x SEQLEN ]
Y = tf.reshape(Y, [batchsize, -1], name="Y")  # [ BATCHSIZE, SEQLEN ]
train_step = tf.train.AdamOptimizer(lr).minimize(loss)

# stats for display
seqloss = tf.reduce_mean(loss, 1)
batchloss = tf.reduce_mean(seqloss)
accuracy = tf.reduce_mean(tf.cast(tf.equal(Y_, tf.cast(Y, tf.uint8)), tf.float32))
loss_summary = tf.summary.scalar("batch_loss", batchloss)
accuracy_summary = tf.summary.scalar("batch_accuracy", accuracy)
summaries = tf.summary.merge([loss_summary, accuracy_summary])
Exemplo n.º 14
0
    def _build_network(self, input_width):
        with self.session.graph.as_default():
            if self.rnn_cell_type == "LSTM":
                self.rnn_cell = tf.contrib.rnn.LSTMCell(self.rnn_cell_dim)
            elif self.rnn_cell_type == "GRU":
                self.rnn_cell = tf.contrib.rnn.GRUCell(self.rnn_cell_dim)
            else:
                raise ValueError("Unknown rnn_cell {}".format(rnn_cell))

            self.global_step = tf.Variable(0,
                                           dtype=tf.int64,
                                           trainable=False,
                                           name='global_step')
            self.tokens = tf.placeholder(tf.int32, [None, None, None],
                                         name="tokens")
            self.token_lens = tf.placeholder(tf.int32, [None, None],
                                             name="token_lens")
            self.features = tf.placeholder(tf.float32, [None, None],
                                           name="features")
            self.labels = tf.placeholder(tf.int64, [None], name="labels")
            self.alphabet_size = len(self.char_vocabulary.classes_)

            self.dropout_keep = tf.placeholder(tf.float32)
            self.input_width = input_width

            char_embedding_matrix = tf.get_variable(
                "char_embeddings", [self.alphabet_size, self.EMBEDDING_SIZE],
                initializer=tf.random_normal_initializer(stddev=0.01),
                dtype=tf.float32)

            with tf.variable_scope("token_encoder"):
                tokens_flat = tf.reshape(self.tokens,
                                         [-1, tf.shape(self.tokens)[-1]])
                token_lens_flat = tf.reshape(self.token_lens, [-1])
                char_embeddings = tf.nn.embedding_lookup(
                    char_embedding_matrix, tokens_flat)

                hidden_states, final_states = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw=self.rnn_cell,
                    cell_bw=self.rnn_cell,
                    inputs=char_embeddings,
                    sequence_length=token_lens_flat,
                    dtype=tf.float32,
                    scope="char_BiRNN")

            tokens_encoded = tf_layers.linear(tf.concat(final_states, 1),
                                              self.EMBEDDING_SIZE,
                                              scope="tokens_encoded")
            tokens_encoded = tf.reshape(tokens_encoded,
                                        [tf.shape(self.features)[0], -1])

            self.input_layer = tf.concat((tokens_encoded, self.features), 1)
            self.input_layer = tf.reshape(self.input_layer,
                                          [-1, self.input_width])

            # input transform
            self.hidden_layer = tf.nn.dropout(
                tf_layers.fully_connected(self.input_layer,
                                          num_outputs=self.h_width,
                                          activation_fn=None,
                                          scope="input_layer"),
                self.dropout_keep)

            # hidden layers
            for i in range(self.h_depth):
                if self.layer_type == "FeedForward":
                    self.hidden_layer = tf.nn.dropout(
                        tf_layers.fully_connected(
                            self.hidden_layer,
                            num_outputs=self.h_width,
                            activation_fn=tf.nn.relu,
                            scope="ff_layer_{}".format(i)), self.dropout_keep)
                elif self.layer_type == "Highway":
                    self.hidden_layer = tf.nn.dropout(
                        highway_layer(self.hidden_layer,
                                      num_outputs=self.h_width,
                                      activation_fn=tf.nn.relu,
                                      scope="highway_layer_{}".format(i)),
                        self.dropout_keep)
                else:
                    raise ValueError("Unknown hidden layer type.")

            self.output_layer = tf_layers.fully_connected(
                self.hidden_layer,
                num_outputs=len(self.target_encoder.classes_),
                activation_fn=None,
                scope="output_layer")

            self.predictions = tf.argmax(self.output_layer, 1)
            self.loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.output_layer, labels=self.labels),
                name="loss")

            self.training = tf.train.AdamOptimizer().minimize(
                self.loss, global_step=self.global_step)
            self.accuracy = tf_metrics.accuracy(self.predictions, self.labels)

            self.summary = tf.summary.merge([
                tf.summary.scalar("train/loss", self.loss),
                tf.summary.scalar("train/accuracy", self.accuracy)
            ])

            self._initialize_variables()
Exemplo n.º 15
0
    def __init_decoder(self):
        '''Initializes the decoder part of the model.'''
        with tf.variable_scope('decoder') as scope:
            output_fn = lambda outs: layers.linear(
                outs, self.__get_vocab_size(), scope=scope)

            if self.cfg.get('use_attention'):
                attention_states = tf.transpose(self.encoder_outputs,
                                                [1, 0, 2])

                (attention_keys, attention_values, attention_score_fn,
                 attention_construct_fn) = seq2seq.prepare_attention(
                     attention_states=attention_states,
                     attention_option='bahdanau',
                     num_units=self.decoder_cell.output_size)

                decoder_fn_train = seq2seq.attention_decoder_fn_train(
                    encoder_state=self.encoder_state,
                    attention_keys=attention_keys,
                    attention_values=attention_values,
                    attention_score_fn=attention_score_fn,
                    attention_construct_fn=attention_construct_fn,
                    name='attention_decoder')

                decoder_fn_inference = seq2seq.attention_decoder_fn_inference(
                    output_fn=output_fn,
                    encoder_state=self.encoder_state,
                    attention_keys=attention_keys,
                    attention_values=attention_values,
                    attention_score_fn=attention_score_fn,
                    attention_construct_fn=attention_construct_fn,
                    embeddings=self.embeddings,
                    start_of_sequence_id=Config.EOS_WORD_IDX,
                    end_of_sequence_id=Config.EOS_WORD_IDX,
                    maximum_length=tf.reduce_max(self.encoder_inputs_length) +
                    3,
                    num_decoder_symbols=self.__get_vocab_size())
            else:
                decoder_fn_train = seq2seq.simple_decoder_fn_train(
                    encoder_state=self.encoder_state)
                decoder_fn_inference = seq2seq.simple_decoder_fn_inference(
                    output_fn=output_fn,
                    encoder_state=self.encoder_state,
                    embeddings=self.embeddings,
                    start_of_sequence_id=Config.EOS_WORD_IDX,
                    end_of_sequence_id=Config.EOS_WORD_IDX,
                    maximum_length=tf.reduce_max(self.encoder_inputs_length) +
                    3,
                    num_decoder_symbols=self.__get_vocab_size())

            (self.decoder_outputs_train, self.decoder_state_train,
             self.decoder_context_state_train) = seq2seq.dynamic_rnn_decoder(
                 cell=self.decoder_cell,
                 decoder_fn=decoder_fn_train,
                 inputs=self.decoder_train_inputs_embedded,
                 sequence_length=self.decoder_train_length,
                 time_major=True,
                 scope=scope)

            self.decoder_logits_train = output_fn(self.decoder_outputs_train)
            self.decoder_prediction_train = tf.argmax(
                self.decoder_logits_train,
                axis=-1,
                name='decoder_prediction_traion')

            scope.reuse_variables()

            (self.decoder_logits_inference, decoder_state_inference,
             self.decoder_context_state_inference
             ) = seq2seq.dynamic_rnn_decoder(cell=self.decoder_cell,
                                             decoder_fn=decoder_fn_inference,
                                             time_major=True,
                                             scope=scope)

            self.decoder_prediction_inference = tf.argmax(
                self.decoder_logits_inference,
                axis=-1,
                name='decoder_prediction_inference')
Exemplo n.º 16
0
# to do for LSTM's tuple state, but can be achieved by creating two vector
# Variables, which are then tiled along batch dimension and grouped into tuple.
batch_size = tf.shape(inputs)[1]
initial_state = cell.zero_state(batch_size, tf.float32)

# Given inputs (time, batch, input_size) outputs a tuple
#  - outputs: (time, batch, output_size)  [do not mistake with OUTPUT_SIZE]
#  - states:  (time, batch, hidden_size)
rnn_outputs, rnn_states = tf.nn.dynamic_rnn(cell,
                                            inputs,
                                            initial_state=initial_state,
                                            time_major=True)

# project output from rnn output size to OUTPUT_SIZE. Sometimes it is worth adding
# an extra layer here.
final_projection = lambda x: layers.linear(
    x, num_outputs=OUTPUT_SIZE, activation_fn=tf.nn.sigmoid)

# apply projection to every timestep.
predicted_outputs = map_fn(final_projection, rnn_outputs)

# compute elementwise cross entropy.
error = -(outputs * tf.log(predicted_outputs + TINY) +
          (1.0 - outputs) * tf.log(1.0 - predicted_outputs + TINY))
error = tf.reduce_mean(error)

# optimize
train_fn = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(error)

# assuming that absolute difference between output and correct answer is 0.5
# or less we can round it to the correct output.
accuracy = tf.reduce_mean(
Exemplo n.º 17
0
    def build(self):
        hparam = self.hparam

        if hparam.code_ndim == 3:
            code_shape = [None, hparam.timesteps, hparam.code_dim]
            cond_shape = [None, hparam.timesteps, hparam.cond_dim]
        elif hparam.code_ndim == 2:
            code_shape = [None, hparam.code_dim]
            cond_shape = [None, hparam.cond_dim]

        code = ori_code = tf.placeholder("float32", code_shape, name='code')
        if hparam.conditional:
            assert hparam.onehot is False, "NotImplemented: cond with onehot"
            cond = tf.placeholder("float32", cond_shape, name='condition')
            code = tf.concat([code, cond], -1)

        real_seq = tf.placeholder("int32", [None, hparam.timesteps],
                                  name='real_seq')
        real_seq_img = tf.one_hot(real_seq, hparam.vocab_size)
        dis_train = tf.placeholder('bool', name='is_train')
        bs = tf.shape(ori_code)[0]

        # generator
        final_states = []
        init_states = []
        with tf.variable_scope('generator'):
            # play with code

            step = int(hparam.timesteps / np.prod(hparam.repeats))
            first_input = code if hparam.code_ndim == 3 else \
                tf.tile(tf.expand_dims(code, 1), (1, step, 1))
            if hparam.timestep_pad:
                first_input = tf.concat([
                    first_input,
                    tf.tile(
                        tf.expand_dims(
                            tf.expand_dims(tf.lin_space(0., 1., step), 0), -1),
                        (bs, 1, 1)),
                ], -1)
            outputs = [first_input]
            for ind in range(len(hparam.cells)):
                repeat = hparam.repeats[ind]
                cell_size = hparam.cells[ind]
                bi = hparam.bidirection[ind]
                with tf.variable_scope('layer{}'.format(ind)):
                    # if ind == len(hparam.repeats) and \
                    #    hparam.last_bidirectional:
                    if bi:
                        # assert(repeat == 1)
                        fw_cell = hparam.basic_cell(cell_size)
                        bw_cell = hparam.basic_cell(cell_size)
                        fw_init = fw_cell.zero_state(bs, tf.float32)
                        bw_init = fw_cell.zero_state(bs, tf.float32)
                        output, state = tf.nn.bidirectional_dynamic_rnn(
                            fw_cell,
                            bw_cell,
                            outputs[-1],
                            initial_state_fw=fw_init,
                            initial_state_bw=bw_init,
                            dtype=tf.float32,
                        )
                        output = tf.concat(output, 2)
                        cell_size *= 2

                        init_states.extend([fw_init, bw_init])
                        final_states.append(state)
                    else:
                        cell = hparam.basic_cell(cell_size)
                        init = cell.zero_state(bs, tf.float32)
                        output, state = tf.nn.dynamic_rnn(
                            cell,
                            outputs[-1],
                            dtype=tf.float32,
                        )
                        init_states.append(init)
                        final_states.append(state)
                    # output = output * 2
                    if repeat != 1:
                        step *= repeat
                        output = tf.reshape(tf.tile(output, (1, 1, repeat)),
                                            [bs, step, cell_size])
                    outputs.append(output)
            outputs[-1] = outputs[-1][:, :hparam.timesteps, :]
            for o in outputs:
                print o

            with tf.variable_scope('decision'):
                if hparam.deconv_decision:
                    fake_seq_img = outputs[-1]

                    right = slim.fully_connected(fake_seq_img,
                                                 32 * 10,
                                                 activation_fn=None)
                    right = tf.reshape(right, [bs, hparam.timesteps, 10, 32])
                    # right = tf.nn.softmax(right, -1)
                    right = slim.conv2d_transpose(
                        right,
                        1, [1, 12],
                        stride=(1, 12),
                        activation_fn=tf.tanh)[:, :, :, 0]

                    fake_seq_img = right
                    fake_seq_img = tf.concat([
                        tf.ones([bs, hparam.timesteps, 4]) * -1, fake_seq_img,
                        tf.ones([bs, hparam.timesteps, 4]) * -1
                    ],
                                             axis=2)
                    print fake_seq_img
                else:
                    fake_seq_img = outputs[-1]
                    fake_seq_img = layers.linear(fake_seq_img,
                                                 hparam.vocab_size)
                    outputs.append(fake_seq_img)
                    fake_seq_img = tf.tanh(fake_seq_img)
                    # fake_seq_img = tf.nn.softmax(fake_seq_img, -1)
                    outputs.append(fake_seq_img)
                fake_seq = tf.argmax(fake_seq_img, -1)

            if hparam.plus_code:
                fake_seq_img = tf.clip_by_value(fake_seq_img + code, -1., +1.)

        # discriminator
        if hparam.rnn_dis:

            def dis(seq_img, bn_scope, reuse=False):
                with tf.variable_scope('discriminator', reuse=reuse):
                    print 'dis'
                    slices = tf.unstack(seq_img, axis=1)
                    fw_cell = hparam.basic_cell(32)
                    # bw_cell = hparam.basic_cell(64)
                    x, state = tf.nn.static_rnn(
                        fw_cell,  # bw_cell,
                        slices,
                        dtype=tf.float32,
                    )
                    x = tf.stack(x, axis=1)
                    print x
                    # x = tf.concat(x, 2)
                    x = slim.linear(x, 1)
                    print x
                    x = slim.flatten(x)
                    print x
                    x = slim.linear(x, 1)
                    print x
                    # x = tf.nn.sigmoid(x)
                return x
        else:

            def dis(seq_img, bn_scope, reuse=False, cond_vec=None):
                with tf.variable_scope('discriminator', reuse=reuse):
                    fs = 32
                    covariance = tf.matmul(seq_img, seq_img, transpose_b=True)
                    x = tf.expand_dims(covariance, -1)
                    x = lrelu(slim.conv2d(x, fs * 1, [5, 5]))
                    x = slim.max_pool2d(x, (2, 2))
                    x = lrelu(slim.conv2d(x, fs * 2, [5, 5]))
                    x = slim.max_pool2d(x, (2, 2))
                    x = lrelu(slim.conv2d(x, fs * 4, [5, 5]))
                    x = slim.max_pool2d(x, (2, 2))
                    x = lrelu(slim.conv2d(x, fs * 4, [5, 5]))
                    x = slim.max_pool2d(x, (2, 2))
                    covariance_feat = slim.flatten(x)

                    # x = tf.nn.embedding_lookup(embeddings, seq)
                    # x = ResNetBuilder(dis_train,
                    #                   bn_scopes=['fake', 'real'],
                    #                   bn_scope=bn_scope).\
                    #     resnet(x, structure=[2, 2, 2, 2], filters=8, nb_class=1)

                    #  note axis
                    fs = 32
                    x = seq_img
                    x = tf.expand_dims(seq_img, -1)
                    x = lrelu(slim.conv2d(x, fs * 1, [5, 5]))
                    x = slim.max_pool2d(x, (2, 2))
                    x = lrelu(slim.conv2d(x, fs * 2, [5, 5]))
                    x = slim.max_pool2d(x, (2, 2))
                    x = lrelu(slim.conv2d(x, fs * 4, [5, 5]))
                    x = slim.max_pool2d(x, (2, 2))
                    x = lrelu(slim.conv2d(x, fs * 4, [5, 5]))
                    x = slim.max_pool2d(x, (2, 2))
                    seq_feat = slim.flatten(x)

                    feat = tf.concat([covariance_feat, seq_feat], axis=1)
                    if cond_vec is not None:
                        feat = tf.concat([feat, cond_vec], axis=-1)
                    feat = lrelu(slim.linear(feat, 200))

                    x = slim.linear(feat, 1)
                    # x = tf.nn.sigmoid(x)
                return x

        # opt
        # problematic with the reuse bn
        # fake_seq_img = tf.where(
        #     tf.greater(fake_seq_img, 0.5),
        #     fake_seq_img,
        #     tf.zeros_like(fake_seq_img))
        if hparam.conditional:
            if len(cond_shape) == 3:
                raise Exception("NotImplemented: cond with ndim3 (DisNet)")
            cond_real = tf.placeholder("float32", [None, hparam.cond_dim],
                                       name='cond_real')

        fake_dis_pred = dis(fake_seq_img,
                            cond_vec=cond if hparam.conditional else None,
                            bn_scope='fake')
        real_dis_pred = dis(real_seq_img,
                            cond_vec=cond_real if hparam.conditional else None,
                            bn_scope='real',
                            reuse=True)

        # traditional GAN loss
        # G_loss = tf.reduce_mean(-safe_log(fake_dis_pred))
        # D_loss = tf.reduce_mean(-safe_log(real_dis_pred)) +\
        #     tf.reduce_mean(-safe_log(1-fake_dis_pred))
        # IWGAN
        epsilon = tf.random_uniform(minval=0, maxval=1.0, shape=())

        print 'grad'
        intepolation = fake_seq_img * epsilon + real_seq_img * (1.0 - epsilon)
        inte_dis_pred = dis(intepolation,
                            cond_vec=(cond * epsilon + cond_real *
                                      (1.0 - epsilon)) /
                            2. if hparam.conditional else None,
                            bn_scope='intepolation',
                            reuse=True)
        grad = tf.gradients(inte_dis_pred, intepolation)[0]
        print grad
        grad = tf.reshape(grad, (-1, hparam.timesteps * hparam.vocab_size))
        print grad
        D_loss = tf.reduce_mean(fake_dis_pred) - \
            tf.reduce_mean(real_dis_pred) + \
            10*tf.reduce_mean(tf.square(tf.norm(grad, ord=2, axis=1)-1))
        G_loss = -tf.reduce_mean(fake_dis_pred)
        print D_loss
        print G_loss

        fake_seq_img_grad = tf.gradients(G_loss, fake_seq_img)[0]

        G_opt = tf.train.AdamOptimizer(learning_rate=hparam.G_lr,
                                       beta1=0.5,
                                       beta2=0.9)
        # D_opt = tf.train.GradientDescentOptimizer(learning_rate=hparam.D_lr)
        D_opt = tf.train.AdamOptimizer(learning_rate=hparam.D_lr,
                                       beta1=0.5,
                                       beta2=0.9)
        D_iter = tf.Variable(0, name='D_iter')
        G_iter = tf.Variable(0, name='G_iter')
        trainable_gen_var = reduce(lambda x, y: x + y, [
            tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, ele)
            for ele in hparam.trainable_gen
        ], [])
        G_train_op = slim.learning.create_train_op(
            G_loss,
            G_opt,
            variables_to_train=trainable_gen_var,
            global_step=G_iter,
            clip_gradient_norm=hparam.G_clipnorm)
        D_train_op = slim.learning.create_train_op(
            D_loss,
            D_opt,
            variables_to_train=tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator"),
            global_step=D_iter,
        )
        iter_step = tf.Variable(0, name='iter_step')
        iter_step_op = iter_step.assign_add(1)

        # input
        self.ori_code = ori_code
        self.code = code
        self.real_seq = real_seq
        self.real_seq_img = real_seq_img
        if hparam.conditional:
            self.cond = cond
            self.cond_real = cond_real
        # summary
        self.summary_fake_img = tf.summary.image(
            'fake_img', tf.expand_dims(fake_seq_img, -1))
        self.summary_real_img = tf.summary.image(
            'real_img', tf.expand_dims(real_seq_img, -1))
        self.summary_G_loss = tf.summary.scalar('G_loss', G_loss)
        self.summary_D_loss = tf.summary.scalar('D_loss', D_loss)
        self.summary_fake_dis_pred = tf.summary.scalar(
            'fake_dis_pred', tf.reduce_mean(fake_dis_pred))
        self.summary_real_dis_pred = tf.summary.scalar(
            'real_dis_pred', tf.reduce_mean(real_dis_pred))
        self.summary_fake_img_grad = tf.summary.image(
            'gradient_map', tf.expand_dims(fake_seq_img_grad, -1))
        self.summary_first_input = tf.summary.image(
            'noise', tf.expand_dims(first_input, -1))
        self.gen_outputs = outputs

        # debug
        self.fake_seq_img = fake_seq_img
        self.first_input = first_input
        self.init_states = tuple(init_states)
        self.final_states = tuple(final_states)
        self.bs_tensor = bs
        # train
        self.dis_train = dis_train
        self.G_train_op = G_train_op
        self.D_train_op = D_train_op
        self.iter_step = iter_step
        self.iter_step_op = iter_step_op
        # output
        self.fake_seq = fake_seq
        self.built = True
Exemplo n.º 18
0
def train_rnn(args):
    SEQLEN = 50
    BATCHSIZE = args.batch_size
    ALPHASIZE = txt.ALPHASIZE
    INTERNALSIZE = 512
    NLAYERS = 5
    learning_rate = 0.0002  # small learning rate
    dropout_keep = .9  # only some dropout they use .8 but .9 is my preference

    text_files = args.data_path + '*.txt'  # get all of the text files from data_path
    codetext, valitext, bookranges = txt.read_data_files(text_files,
                                                         validation=True)
    # set epoch size based on batchsize and sequence len
    epoch_size = len(codetext) // (BATCHSIZE * SEQLEN)
    # model placeholders
    lr = tf.placeholder(tf.float32, name='learning_rate')
    p_keep = tf.placeholder(tf.float32, name='p_keep')
    batch_size = tf.placeholder(tf.int32, name='batch_size')
    # input placeholders
    X = tf.placeholder(tf.uint8, [None, None], name='X')  #
    Xo = tf.one_hot(X, ALPHASIZE, 1.0, 0.0)
    # expected outputs = same sequence shifted by 1
    Y_ = tf.placeholder(tf.uint8, [None, None], name='Y_')
    Yo_ = tf.one_hot(Y_, ALPHASIZE, 1.0, 0.0)
    # input state
    Hin = tf.placeholder(tf.float32, [None, INTERNALSIZE * NLAYERS],
                         name='Hin')
    # Using a NLAYERS=3 of cells, unrolled SEQLEN=30 times
    # dynamic_rnn infers SEQLEN from the size of the inputs Xo
    cells = [rnn.GRUCell(INTERNALSIZE) for _ in range(NLAYERS)]
    # weird dropout, well simple dropout
    drop_cells = [
        rnn.DropoutWrapper(cell, input_keep_prob=p_keep) for cell in cells
    ]
    multi_cell = rnn.MultiRNNCell(drop_cells, state_is_tuple=False)
    multi_cell = rnn.DropoutWrapper(multi_cell, output_keep_prob=p_keep)
    # ^ The last layer is for the softmax dropout
    Yr, H = tf.nn.dynamic_rnn(multi_cell,
                              Xo,
                              dtype=tf.float32,
                              initial_state=Hin)
    # H is that last state
    H = tf.identity(H, name='H')  # give it a tf name
    # Softmax layer implementation:
    # Flatten the first two dimension of the output [ BATCHSIZE, SEQLEN, ALPHASIZE ] => [ BATCHSIZE x SEQLEN, ALPHASIZE ]
    # then apply softmax readout layer. This way, the weights and biases are shared across unrolled time steps.
    # From the readout point of view, a value coming from a sequence time step or a minibatch item is the same thing.
    Yflat = tf.reshape(Yr, [-1, INTERNALSIZE])
    Ylogits = layers.linear(Yflat, ALPHASIZE)
    Yflat_ = tf.reshape(Yo_, [-1, ALPHASIZE])
    loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=Ylogits,
                                                      labels=Yflat_)
    loss = tf.reshape(loss, [batch_size, -1])
    Yo = tf.nn.softmax(Ylogits, name='Yo')
    Y = tf.argmax(Yo, 1)
    Y = tf.reshape(Y, [batch_size, -1], name="Y")
    train_step = tf.train.AdamOptimizer(lr).minimize(loss)
    # stats for display
    seqloss = tf.reduce_mean(loss, 1)
    batchloss = tf.reduce_mean(seqloss)
    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(Y_, tf.cast(Y, tf.uint8)), tf.float32))
    loss_summary = tf.summary.scalar("batch_loss", batchloss)
    acc_summary = tf.summary.scalar("batch_accuracy", accuracy)
    summaries = tf.summary.merge([loss_summary, acc_summary])
    # Init Tensorboard stuff. This will save Tensorboard information into a different
    # folder at each run named 'log/<timestamp>/'. Two sets of data are saved so that
    # you can compare training and validation curves visually in Tensorboard.
    timestamp = str(math.trunc(time.time()))
    #summary_writer = tf.summary.FileWriter("log/" + timestamp + "-training")
    #validation_writer = tf.summary.FileWriter("log/" + timestamp + "-validation")
    # For saving models
    os.makedirs(args.save_dir, exist_ok=True)
    # Only the last checkpoint will be saved
    saver = tf.train.Saver(max_to_keep=10)
    gen_file = open(args.save_dir + 'generated.txt', 'w')
    # For displaying progress
    # - changing this to my own implementation
    # - Theyres is too much output
    # for display: init the progress bar
    # TODO: Change this guy eventually
    DISPLAY_FREQ = 50
    _50_BATCHES = DISPLAY_FREQ * BATCHSIZE * SEQLEN
    progress = txt.Progress(DISPLAY_FREQ,
                            size=111 + 2,
                            msg="Training on next " + str(DISPLAY_FREQ) +
                            " batches")
    #
    istate = np.zeros([BATCHSIZE, INTERNALSIZE * NLAYERS])
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    step = 0
    # Training loop
    for x, y_, epoch in txt.rnn_minibatch_sequencer(codetext,
                                                    BATCHSIZE,
                                                    SEQLEN,
                                                    nb_epochs=args.epochs):
        # train on one minibatch
        feed_dict = {
            X: x,
            Y_: y_,
            Hin: istate,
            lr: learning_rate,
            p_keep: dropout_keep,
            batch_size: BATCHSIZE
        }
        _, y, ostate = sess.run([train_step, Y, H], feed_dict=feed_dict)
        # validation step
        if step % _50_BATCHES == 0:
            feed_dict = {
                X: x,
                Y_: y_,
                Hin: istate,
                p_keep: 1.0,
                batch_size: BATCHSIZE
            }  # no dropout for validation
            y, l, bl, acc, smm = sess.run(
                [Y, seqloss, batchloss, accuracy, summaries],
                feed_dict=feed_dict)
            txt.print_learning_learned_comparison(x, y, l, bookranges, bl, acc,
                                                  epoch_size, step, epoch)
            #summary_writer.add_summary(smm, step)

        # run a validation step every 50 batches
        # The validation text should be a single sequence but that's too slow (1s per 1024 chars!),
        # so we cut it up and batch the pieces (slightly inaccurate)
        # tested: validating with 5K sequences instead of 1K is only slightly more accurate, but a lot slower.
        if step % _50_BATCHES == 0 and len(valitext) > 0:
            VALI_SEQLEN = 1 * 1024  # Sequence length for validation. State will be wrong at the start of each sequence.
            bsize = len(valitext) // VALI_SEQLEN
            txt.print_validation_header(len(codetext), bookranges)
            vali_x, vali_y, _ = next(
                txt.rnn_minibatch_sequencer(valitext, bsize, VALI_SEQLEN,
                                            1))  # all data in 1 batch
            vali_nullstate = np.zeros([bsize, INTERNALSIZE * NLAYERS])
            feed_dict = {
                X: vali_x,
                Y_: vali_y,
                Hin: vali_nullstate,
                p_keep: 1.0,  # no dropout for validation
                batch_size: bsize
            }
            ls, acc, smm = sess.run([batchloss, accuracy, summaries],
                                    feed_dict=feed_dict)
            txt.print_validation_stats(ls, acc)
            # save validation data for Tensorboard
            #validation_writer.add_summary(smm, step)
            #saver.save(sess, '{}/rnn_val_save'.format(args.save_dir+'val/'), global_step=step)
        # display a short text generated with the current weights and biases (every 150 batches)
        if step // 3 % _50_BATCHES == 0:
            txt.print_text_generation_header()
            ry = np.array([[txt.convert_from_alphabet(ord("K"))]])
            rh = np.zeros([1, INTERNALSIZE * NLAYERS])
            gen_file.write(
                '----------------- STEP {} -----------------\n'.format(step))
            for k in range(1000):
                ryo, rh = sess.run([Yo, H],
                                   feed_dict={
                                       X: ry,
                                       p_keep: 1.0,
                                       Hin: rh,
                                       batch_size: 1
                                   })
                rc = txt.sample_from_probabilities(
                    ryo, topn=10 if epoch <= 1 else 2)
                letter = (chr(txt.convert_to_alphabet(rc)))
                gen_file.write(letter)
                print(letter, end='')
                ry = np.array([[rc]])
            txt.print_text_generation_footer()
            gen_file.write('\n')
        # display progress bar
        progress.step(reset=step % _50_BATCHES == 0)
        # loop state around
        istate = ostate
        step += BATCHSIZE * SEQLEN

    gen_file.close()
    saved_file = saver.save(sess,
                            '{}rnn_{}'.format(args.save_dir, args.epochs),
                            global_step=step)
    print("Saved file: " + saved_file)
Exemplo n.º 19
0
    def __init__(self, inputs_tf, dimo, dimz, dimg, dimu, max_u, o_stats,
                 g_stats, hidden, layers, env_name, **kwargs):
        """The discriminator network and related training code.

        Args:
            inputs_tf (dict of tensors): all necessary inputs for the network: the
                observation (o), the goal (g), and the action (u)
            dimo (int): the dimension of the observations
            dimg (int): the dimension of the goals
            dimu (int): the dimension of the actions
            max_u (float): the maximum magnitude of actions; action outputs will be scaled
                accordingly
            o_stats (baselines.her.Normalizer): normalizer for observations
            g_stats (baselines.her.Normalizer): normalizer for goals
            hidden (int): number of hidden units that should be used in hidden layers
            layers (int): number of hidden layers
        """

        self.o_tf = tf.placeholder(tf.float32, shape=(None, self.dimo))
        self.z_tf = tf.placeholder(tf.float32, shape=(None, self.dimz))
        self.g_tf = tf.placeholder(tf.float32, shape=(None, self.dimg))

        obs_tau_excludes_goal, obs_tau_achieved_goal = split_observation_tf(
            self.env_name, self.o_tau_tf)

        obs_excludes_goal, obs_achieved_goal = split_observation_tf(
            self.env_name, self.o_tf)

        # Discriminator networks

        with tf.variable_scope('state_mi'):
            # Mutual Information Neural Estimation
            # shuffle and concatenate
            x_in = obs_tau_excludes_goal
            y_in = obs_tau_achieved_goal
            y_in_tran = tf.transpose(y_in, perm=[1, 0, 2])
            y_shuffle_tran = tf.random_shuffle(y_in_tran)
            y_shuffle = tf.transpose(y_shuffle_tran, perm=[1, 0, 2])
            x_conc = tf.concat([x_in, x_in], axis=-2)
            y_conc = tf.concat([y_in, y_shuffle], axis=-2)

            # propagate the forward pass
            layerx = tf_layers.linear(x_conc, int(self.hidden / 2))
            layery = tf_layers.linear(y_conc, int(self.hidden / 2))
            layer2 = tf.nn.relu(layerx + layery)
            output = tf_layers.linear(layer2, 1)
            output = tf.nn.tanh(output)

            # split in T_xy and T_x_y predictions
            N_samples = tf.shape(x_in)[-2]
            T_xy = output[:, :N_samples, :]
            T_x_y = output[:, N_samples:, :]

            # compute the negative loss (maximise loss == minimise -loss)
            mean_exp_T_x_y = tf.reduce_mean(tf.math.exp(T_x_y), axis=-2)
            neg_loss = -(tf.reduce_mean(T_xy, axis=-2) -
                         tf.math.log(mean_exp_T_x_y))
            neg_loss = tf.check_numerics(neg_loss,
                                         'check_numerics caught bad neg_loss')
            self.mi_tf = neg_loss

        with tf.variable_scope('skill_ds'):
            self.logits_tf = nn(obs_achieved_goal,
                                [int(self.hidden / 2)] * self.layers +
                                [self.dimz])
            self.sk_tf = tf.nn.softmax_cross_entropy_with_logits(
                labels=self.z_tf, logits=self.logits_tf)
            self.sk_r_tf = -1 * self.sk_tf
Exemplo n.º 20
0
    ]
dropcells = [rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in cells]
stacked_cells = rnn.MultiRNNCell(dropcells, state_is_tuple=True)
stacked_cells = rnn.DropoutWrapper(stacked_cells, output_keep_prob=pkeep)

# Let dynamic_rnn do all the work
init_state = stacked_cells.zero_state(batchsize, tf.float32)
Y_out, last_state = tf.nn.dynamic_rnn(stacked_cells,
                                      Xo,
                                      dtype=tf.float32,
                                      initial_state=init_state)

# Flatten and set up softmax layer
Y_flat = tf.reshape(Y_out,
                    [-1, state_size])  # batch_size*time_steps,state_size
Ylogits = layers.linear(Y_flat, NUM_CHARS)  # batch_size*time_steps,NUM_CHARS
Yflat_ = tf.reshape(Yo_, [-1, NUM_CHARS])  # batch_size*time_steps,NUM_CHARS
Yo = tf.nn.softmax(Ylogits)  # batch_size*time_steps,NUM_CHARS
Y = tf.argmax(Yo, 1)  # batch_size*time_steps
Y = tf.reshape(Y, [batchsize, -1])  # batch_size,time_steps

# Define our loss function
loss = tf.nn.softmax_cross_entropy_with_logits(
    logits=Ylogits, labels=Yflat_)  # batch_size*time_steps
loss = tf.reshape(loss, [batchsize, -1])  # batch_size,time_steps

# Define the training step using AdamOptimizer
train_step = tf.train.AdamOptimizer(learn_rate).minimize(loss)

# Define saver to create checkpoints during training
if not os.path.exists("checkpoints"):
Exemplo n.º 21
0
def discriminator_0layer(H, opt, dropout, prefix='', num_outputs=1, is_reuse=None):
	H = tf.squeeze(H)
	biasInit = tf.constant_initializer(0.001, dtype=tf.float32)
	logits = layers.linear(tf.nn.dropout(H, keep_prob=dropout), num_outputs=num_outputs, biases_initializer=biasInit,
						   scope=prefix + 'dis', reuse=is_reuse)
	return logits
Exemplo n.º 22
0
    if args.cell_type == 1:
        net = [rnn.BasicLSTMCell(args.internal_size, state_is_tuple=False) for _ in range(args.layers)]
    else:
        net = [rnn.GRUCell(args.internal_size) for _ in range(args.layers)]
    net = [rnn.DropoutWrapper(cell, input_keep_prob=dropout_prob) for cell in net]

    multi_rnn = rnn.MultiRNNCell(net, state_is_tuple=False)
    drop_multi_rnn = rnn.DropoutWrapper(multi_rnn, output_keep_prob=dropout_prob)

    Yr, H = tf.nn.dynamic_rnn(drop_multi_rnn, Xo, initial_state=initial_state, dtype=tf.float32)

    H = tf.identity(H, name="H")

    Yflat = tf.reshape(Yr, [-1, args.internal_size])
    Ylogits = layers.linear(Yflat, VOCAB_SIZE)

    Yflat_ = tf.reshape(Yo_, [-1, VOCAB_SIZE])


    loss = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits,
                                                   labels=Yflat_)  
    loss = tf.reshape(loss, [batchsize, -1])

    Yo = tf.nn.softmax(Ylogits, name="Yo")
    Y = tf.argmax(Yo, 1)
    Y = tf.reshape(Y, [batchsize, -1], name="Y")

    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    # stats for display
Exemplo n.º 23
0
  def test_dynamic_rnn_decoder_time_major(self):
    with self.test_session() as sess:
      with variable_scope.variable_scope(
          "root", initializer=init_ops.constant_initializer(0.5)) as varscope:
        # Define inputs/outputs to model
        batch_size = 2
        encoder_embedding_size = 3
        decoder_embedding_size = 4
        encoder_hidden_size = 5
        decoder_hidden_size = encoder_hidden_size
        input_sequence_length = 6
        decoder_sequence_length = 7
        num_decoder_symbols = 20
        start_of_sequence_id = end_of_sequence_id = 1
        decoder_embeddings = variable_scope.get_variable(
            "decoder_embeddings", [num_decoder_symbols, decoder_embedding_size],
            initializer=init_ops.random_normal_initializer(stddev=0.1))
        inputs = constant_op.constant(
            0.5,
            shape=[input_sequence_length, batch_size, encoder_embedding_size])
        decoder_inputs = constant_op.constant(
            0.4,
            shape=[decoder_sequence_length, batch_size, decoder_embedding_size])
        decoder_length = constant_op.constant(
            decoder_sequence_length, dtype=dtypes.int32, shape=[batch_size,])
        with variable_scope.variable_scope("rnn") as scope:
          # setting up weights for computing the final output
          output_fn = lambda x: layers.linear(x, num_decoder_symbols,
                                              scope=scope)

          # Define model
          encoder_outputs, encoder_state = rnn.dynamic_rnn(
              cell=core_rnn_cell_impl.GRUCell(encoder_hidden_size),
              inputs=inputs,
              dtype=dtypes.float32,
              time_major=True,
              scope=scope)

        with variable_scope.variable_scope("decoder") as scope:
          # Train decoder
          decoder_cell = core_rnn_cell_impl.GRUCell(decoder_hidden_size)
          decoder_fn_train = Seq2SeqTest._decoder_fn_with_context_state(
              decoder_fn_lib.simple_decoder_fn_train(
                  encoder_state=encoder_state))
          (decoder_outputs_train, decoder_state_train,
           decoder_context_state_train) = (seq2seq.dynamic_rnn_decoder(
               cell=decoder_cell,
               decoder_fn=decoder_fn_train,
               inputs=decoder_inputs,
               sequence_length=decoder_length,
               time_major=True,
               scope=scope))
          decoder_outputs_train = output_fn(decoder_outputs_train)

          # Setup variable reuse
          scope.reuse_variables()

          # Inference decoder
          decoder_fn_inference = Seq2SeqTest._decoder_fn_with_context_state(
              decoder_fn_lib.simple_decoder_fn_inference(
                  output_fn=output_fn,
                  encoder_state=encoder_state,
                  embeddings=decoder_embeddings,
                  start_of_sequence_id=start_of_sequence_id,
                  end_of_sequence_id=end_of_sequence_id,
                  #TODO: find out why it goes to +1
                  maximum_length=decoder_sequence_length - 1,
                  num_decoder_symbols=num_decoder_symbols,
                  dtype=dtypes.int32))
          (decoder_outputs_inference, decoder_state_inference,
           decoder_context_state_inference) = (seq2seq.dynamic_rnn_decoder(
               cell=decoder_cell,
               decoder_fn=decoder_fn_inference,
               time_major=True,
               scope=scope))

        # Run model
        variables.global_variables_initializer().run()
        (decoder_outputs_train_res, decoder_state_train_res,
         decoder_context_state_train_res) = sess.run([
             decoder_outputs_train, decoder_state_train,
             decoder_context_state_train
         ])
        (decoder_outputs_inference_res, decoder_state_inference_res,
         decoder_context_state_inference_res) = sess.run([
             decoder_outputs_inference, decoder_state_inference,
             decoder_context_state_inference
         ])

        # Assert outputs
        self.assertEqual((decoder_sequence_length, batch_size,
                          num_decoder_symbols), decoder_outputs_train_res.shape)
        self.assertEqual((batch_size, num_decoder_symbols),
                         decoder_outputs_inference_res.shape[1:3])
        self.assertEqual(decoder_sequence_length,
                         decoder_context_state_inference_res)
        self.assertEqual((batch_size, decoder_hidden_size),
                         decoder_state_train_res.shape)
        self.assertEqual((batch_size, decoder_hidden_size),
                         decoder_state_inference_res.shape)
        self.assertEqual(decoder_sequence_length,
                         decoder_context_state_train_res)
        # The dynamic decoder might end earlier than `maximal_length`
        # under inference
        self.assertGreaterEqual(decoder_sequence_length,
                                decoder_state_inference_res.shape[0])
def generative_net(z, data_size):
    return layers.linear(z, data_size)
def generative_net(z, data_size):
  return layers.linear(z, data_size)
Exemplo n.º 26
0
    def build():
        """Builds the Tensorflow graph."""
        inputs, labels, lengths = None, None, None

        if mode in ('train', 'eval'):
            if isinstance(no_event_label, numbers.Number):
                label_shape = []
            else:
                label_shape = [len(no_event_label)]
            inputs, labels, lengths = magenta.common.get_padded_batch(
                sequence_example_file_paths,
                hparams.batch_size,
                input_size,
                label_shape=label_shape,
                shuffle=mode == 'train')

        elif mode == 'generate':
            inputs = tf.placeholder(tf.float32,
                                    [hparams.batch_size, None, input_size])

        if isinstance(encoder_decoder,
                      magenta.music.OneHotIndexEventSequenceEncoderDecoder):
            expanded_inputs = tf.one_hot(
                tf.cast(tf.squeeze(inputs, axis=-1), tf.int64),
                encoder_decoder.input_depth)
        else:
            expanded_inputs = inputs

        dropout_keep_prob = 1.0 if mode == 'generate' else hparams.dropout_keep_prob

        if hparams.use_cudnn:
            outputs, initial_state, final_state = make_cudnn(
                expanded_inputs,
                hparams.rnn_layer_sizes,
                hparams.batch_size,
                mode,
                dropout_keep_prob=dropout_keep_prob,
                residual_connections=hparams.residual_connections)

        else:
            cell = make_rnn_cell(
                hparams.rnn_layer_sizes,
                dropout_keep_prob=dropout_keep_prob,
                attn_length=hparams.attn_length,
                residual_connections=hparams.residual_connections)

            initial_state = cell.zero_state(hparams.batch_size, tf.float32)

            outputs, final_state = tf.nn.dynamic_rnn(
                cell,
                inputs,
                sequence_length=lengths,
                initial_state=initial_state,
                swap_memory=True)

        outputs_flat = magenta.common.flatten_maybe_padded_sequences(
            outputs, lengths)
        if isinstance(num_classes, numbers.Number):
            num_logits = num_classes
        else:
            num_logits = sum(num_classes)
        logits_flat = contrib_layers.linear(outputs_flat, num_logits)

        if mode in ('train', 'eval'):
            labels_flat = magenta.common.flatten_maybe_padded_sequences(
                labels, lengths)

            if isinstance(num_classes, numbers.Number):
                softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=labels_flat, logits=logits_flat)
                predictions_flat = tf.argmax(logits_flat, axis=1)
            else:
                logits_offsets = np.cumsum([0] + num_classes)
                softmax_cross_entropy = []
                predictions = []
                for i in range(len(num_classes)):
                    softmax_cross_entropy.append(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            labels=labels_flat[:, i],
                            logits=logits_flat[:, logits_offsets[i]:
                                               logits_offsets[i + 1]]))
                    predictions.append(
                        tf.argmax(
                            logits_flat[:,
                                        logits_offsets[i]:logits_offsets[i +
                                                                         1]],
                            axis=1))
                predictions_flat = tf.stack(predictions, 1)

            correct_predictions = tf.to_float(
                tf.equal(labels_flat, predictions_flat))
            event_positions = tf.to_float(
                tf.not_equal(labels_flat, no_event_label))
            no_event_positions = tf.to_float(
                tf.equal(labels_flat, no_event_label))

            # Compute the total number of time steps across all sequences in the
            # batch. For some models this will be different from the number of RNN
            # steps.
            def batch_labels_to_num_steps(batch_labels, lengths):
                num_steps = 0
                for labels, length in zip(batch_labels, lengths):
                    num_steps += encoder_decoder.labels_to_num_steps(
                        labels[:length])
                return np.float32(num_steps)

            num_steps = tf.py_func(batch_labels_to_num_steps,
                                   [labels, lengths], tf.float32)

            if mode == 'train':
                loss = tf.reduce_mean(softmax_cross_entropy)
                perplexity = tf.exp(loss)
                accuracy = tf.reduce_mean(correct_predictions)
                event_accuracy = (
                    tf.reduce_sum(correct_predictions * event_positions) /
                    tf.reduce_sum(event_positions))
                no_event_accuracy = (
                    tf.reduce_sum(correct_predictions * no_event_positions) /
                    tf.reduce_sum(no_event_positions))

                loss_per_step = tf.reduce_sum(
                    softmax_cross_entropy) / num_steps
                perplexity_per_step = tf.exp(loss_per_step)

                optimizer = tf.train.AdamOptimizer(
                    learning_rate=hparams.learning_rate)

                train_op = contrib_slim.learning.create_train_op(
                    loss, optimizer, clip_gradient_norm=hparams.clip_norm)
                tf.add_to_collection('train_op', train_op)

                vars_to_summarize = {
                    'loss': loss,
                    'metrics/perplexity': perplexity,
                    'metrics/accuracy': accuracy,
                    'metrics/event_accuracy': event_accuracy,
                    'metrics/no_event_accuracy': no_event_accuracy,
                    'metrics/loss_per_step': loss_per_step,
                    'metrics/perplexity_per_step': perplexity_per_step,
                }
            elif mode == 'eval':
                vars_to_summarize, update_ops = contrib_metrics.aggregate_metric_map(
                    {
                        'loss':
                        tf.metrics.mean(softmax_cross_entropy),
                        'metrics/accuracy':
                        tf.metrics.accuracy(labels_flat, predictions_flat),
                        'metrics/per_class_accuracy':
                        tf.metrics.mean_per_class_accuracy(
                            labels_flat, predictions_flat, num_classes),
                        'metrics/event_accuracy':
                        tf.metrics.recall(event_positions,
                                          correct_predictions),
                        'metrics/no_event_accuracy':
                        tf.metrics.recall(no_event_positions,
                                          correct_predictions),
                        'metrics/loss_per_step':
                        tf.metrics.mean(tf.reduce_sum(softmax_cross_entropy) /
                                        num_steps,
                                        weights=num_steps),
                    })
                for updates_op in update_ops.values():
                    tf.add_to_collection('eval_ops', updates_op)

                # Perplexity is just exp(loss) and doesn't need its own update op.
                vars_to_summarize['metrics/perplexity'] = tf.exp(
                    vars_to_summarize['loss'])
                vars_to_summarize['metrics/perplexity_per_step'] = tf.exp(
                    vars_to_summarize['metrics/loss_per_step'])

            for var_name, var_value in six.iteritems(vars_to_summarize):
                tf.summary.scalar(var_name, var_value)
                tf.add_to_collection(var_name, var_value)

        elif mode == 'generate':
            temperature = tf.placeholder(tf.float32, [])
            if isinstance(num_classes, numbers.Number):
                softmax_flat = tf.nn.softmax(
                    tf.div(logits_flat, tf.fill([num_classes], temperature)))
                softmax = tf.reshape(softmax_flat,
                                     [hparams.batch_size, -1, num_classes])
            else:
                logits_offsets = np.cumsum([0] + num_classes)
                softmax = []
                for i in range(len(num_classes)):
                    sm = tf.nn.softmax(
                        tf.div(
                            logits_flat[:,
                                        logits_offsets[i]:logits_offsets[i +
                                                                         1]],
                            tf.fill([num_classes[i]], temperature)))
                    sm = tf.reshape(sm,
                                    [hparams.batch_size, -1, num_classes[i]])
                    softmax.append(sm)

            tf.add_to_collection('inputs', inputs)
            tf.add_to_collection('temperature', temperature)
            tf.add_to_collection('softmax', softmax)
            # Flatten state tuples for metagraph compatibility.
            for state in tf_nest.flatten(initial_state):
                tf.add_to_collection('initial_state', state)
            for state in tf_nest.flatten(final_state):
                tf.add_to_collection('final_state', state)
Exemplo n.º 27
0
def vgg_16(inputs,
           num_classes=1000,
           is_training=True,
           dropout_keep_prob=0.5,
           spatial_squeeze=True,
           dataset='cifar',
           scope='vgg_16'):
    """Oxford Net VGG 16-Layers version D Example.

    Note: All the fully_connected layers have been transformed to conv2d layers.
          To use in classification mode, resize input to 224x224.

    Args:
      inputs: a tensor of size [batch_size, height, width, channels].
      num_classes: number of predicted classes.
      is_training: whether or not the model is being trained.
      dropout_keep_prob: the probability that activations are kept in the dropout
        layers during training.
      spatial_squeeze: whether or not should squeeze the spatial dimensions of the
        outputs. Useful to remove unnecessary dimensions for classification.
      scope: Optional scope for the variables.

    Returns:
      the last op containing the log predictions and end_points dict.
    """
    with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
                outputs_collections=end_points_collection):

            def ConvBatchRelu(layer_input, n_output_plane, name):
                with variable_scope.variable_scope(name):
                    output = layers.conv2d(layer_input,
                                           n_output_plane, [3, 3],
                                           scope='conv')
                    output = layers.batch_norm(output,
                                               center=True,
                                               scale=True,
                                               activation_fn=tf.nn.relu,
                                               is_training=is_training)
                return output

            filters = [
                64, 64, 128, 128, 256, 256, 256, 512, 512, 512, 512, 512, 512,
                512
            ]
            if dataset == 'f_mnist':
                filters = [_ // 4 for _ in filters]
            elif dataset != 'cifar':
                raise NotImplementedError(
                    "Dataset {} is not supported!".format(dataset))

            net = ConvBatchRelu(inputs, filters[0], 'conv1_1')
            net = ConvBatchRelu(net, filters[1], 'conv1_2')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
            net = ConvBatchRelu(net, filters[2], 'conv2_1')
            net = ConvBatchRelu(net, filters[3], 'conv2_2')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
            net = ConvBatchRelu(net, filters[4], 'conv3_1')
            net = ConvBatchRelu(net, filters[5], 'conv3_2')
            net = ConvBatchRelu(net, filters[6], 'conv3_3')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
            net = ConvBatchRelu(net, filters[7], 'conv4_1')
            net = ConvBatchRelu(net, filters[8], 'conv4_2')
            net = ConvBatchRelu(net, filters[9], 'conv4_3')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
            net = ConvBatchRelu(net, filters[10], 'conv5_1')
            net = ConvBatchRelu(net, filters[11], 'conv5_2')
            net = ConvBatchRelu(net, filters[12], 'conv5_3')
            if dataset == 'cifar':
                net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
            # Use conv2d instead of fully_connected layers.
            net = layers.flatten(net, scope='flatten6')
            net = layers_lib.dropout(net,
                                     0.5,
                                     is_training=is_training,
                                     scope='dropout6')
            net = layers.relu(net, filters[13])
            net = layers_lib.dropout(net,
                                     0.5,
                                     is_training=is_training,
                                     scope='dropout6')
            net = layers.linear(net, num_classes)
            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            end_points[sc.name + '/fc8'] = net
            return net, end_points
Exemplo n.º 28
0
def make_cudnn(inputs,
               rnn_layer_sizes,
               batch_size,
               mode,
               dropout_keep_prob=1.0,
               residual_connections=False):
    """Builds a sequence of cuDNN LSTM layers from the given hyperparameters.

  Args:
    inputs: A tensor of RNN inputs.
    rnn_layer_sizes: A list of integer sizes (in units) for each layer of the
        RNN.
    batch_size: The number of examples per batch.
    mode: 'train', 'eval', or 'generate'. For 'generate',
        CudnnCompatibleLSTMCell will be used.
    dropout_keep_prob: The float probability to keep the output of any given
        sub-cell.
    residual_connections: Whether or not to use residual connections.

  Returns:
    outputs: A tensor of RNN outputs, with shape
        `[batch_size, inputs.shape[1], rnn_layer_sizes[-1]]`.
    initial_state: The initial RNN states, a tuple with length
        `len(rnn_layer_sizes)` of LSTMStateTuples.
    final_state: The final RNN states, a tuple with length
        `len(rnn_layer_sizes)` of LSTMStateTuples.
  """
    cudnn_inputs = tf.transpose(inputs, [1, 0, 2])

    if len(set(rnn_layer_sizes)) == 1 and not residual_connections:
        initial_state = tuple(
            contrib_rnn.LSTMStateTuple(
                h=tf.zeros([batch_size, num_units], dtype=tf.float32),
                c=tf.zeros([batch_size, num_units], dtype=tf.float32))
            for num_units in rnn_layer_sizes)

        if mode != 'generate':
            # We can make a single call to CudnnLSTM since all layers are the same
            # size and we aren't using residual connections.
            cudnn_initial_state = state_tuples_to_cudnn_lstm_state(
                initial_state)
            cell = contrib_cudnn_rnn.CudnnLSTM(num_layers=len(rnn_layer_sizes),
                                               num_units=rnn_layer_sizes[0],
                                               direction='unidirectional',
                                               dropout=1.0 - dropout_keep_prob)
            cudnn_outputs, cudnn_final_state = cell(
                cudnn_inputs,
                initial_state=cudnn_initial_state,
                training=mode == 'train')
            final_state = cudnn_lstm_state_to_state_tuples(cudnn_final_state)

        else:
            # At generation time we use CudnnCompatibleLSTMCell.
            cell = contrib_rnn.MultiRNNCell([
                contrib_cudnn_rnn.CudnnCompatibleLSTMCell(num_units)
                for num_units in rnn_layer_sizes
            ])
            cudnn_outputs, final_state = tf.nn.dynamic_rnn(
                cell,
                cudnn_inputs,
                initial_state=initial_state,
                time_major=True,
                scope='cudnn_lstm/rnn')

    else:
        # We need to make multiple calls to CudnnLSTM, keeping the initial and final
        # states at each layer.
        initial_state = []
        final_state = []

        for i in range(len(rnn_layer_sizes)):
            # If we're using residual connections and this layer is not the same size
            # as the previous layer, we need to project into the new size so the
            # (projected) input can be added to the output.
            if residual_connections:
                if i == 0 or rnn_layer_sizes[i] != rnn_layer_sizes[i - 1]:
                    cudnn_inputs = contrib_layers.linear(
                        cudnn_inputs, rnn_layer_sizes[i])

            layer_initial_state = (contrib_rnn.LSTMStateTuple(
                h=tf.zeros([batch_size, rnn_layer_sizes[i]], dtype=tf.float32),
                c=tf.zeros([batch_size, rnn_layer_sizes[i]],
                           dtype=tf.float32)), )

            if mode != 'generate':
                cudnn_initial_state = state_tuples_to_cudnn_lstm_state(
                    layer_initial_state)
                cell = contrib_cudnn_rnn.CudnnLSTM(
                    num_layers=1,
                    num_units=rnn_layer_sizes[i],
                    direction='unidirectional',
                    dropout=1.0 - dropout_keep_prob)
                cudnn_outputs, cudnn_final_state = cell(
                    cudnn_inputs,
                    initial_state=cudnn_initial_state,
                    training=mode == 'train')
                layer_final_state = cudnn_lstm_state_to_state_tuples(
                    cudnn_final_state)

            else:
                # At generation time we use CudnnCompatibleLSTMCell.
                cell = contrib_rnn.MultiRNNCell([
                    contrib_cudnn_rnn.CudnnCompatibleLSTMCell(
                        rnn_layer_sizes[i])
                ])
                cudnn_outputs, layer_final_state = tf.nn.dynamic_rnn(
                    cell,
                    cudnn_inputs,
                    initial_state=layer_initial_state,
                    time_major=True,
                    scope='cudnn_lstm/rnn' if i == 0 else 'cudnn_lstm_%d/rnn' %
                    i)

            if residual_connections:
                cudnn_outputs += cudnn_inputs

            cudnn_inputs = cudnn_outputs

            initial_state += layer_initial_state
            final_state += layer_final_state

    outputs = tf.transpose(cudnn_outputs, [1, 0, 2])

    return outputs, tuple(initial_state), tuple(final_state)
def train():
    samples = tf.placeholder(tf.float32,
                             [None, None, INPUT_SIZE])  # (batch, time, in)
    ground_truth = tf.placeholder(tf.float32,
                                  [None, OUTPUT_SIZE])  # (batch, out)

    cell, initial_state = create_model(model=FLAGS.model,
                                       num_cells=[FLAGS.rnn_cells] *
                                       FLAGS.rnn_layers,
                                       batch_size=FLAGS.batch_size)

    rnn_outputs, rnn_states = tf.nn.dynamic_rnn(cell,
                                                samples,
                                                dtype=tf.float32,
                                                initial_state=initial_state)

    # Split the outputs of the RNN into the actual outputs and the state update gate
    rnn_outputs, updated_states = split_rnn_outputs(FLAGS.model, rnn_outputs)

    out = layers.linear(inputs=rnn_outputs[:, -1, :], num_outputs=OUTPUT_SIZE)

    # Compute L2 loss
    mse = tf.nn.l2_loss(ground_truth - out) / FLAGS.batch_size

    # Compute loss for each updated state
    budget_loss = compute_budget_loss(FLAGS.model, mse, updated_states,
                                      FLAGS.cost_per_sample)

    # Combine all losses
    loss = mse + budget_loss

    # Optimizer
    opt, grads_and_vars = compute_gradients(loss, FLAGS.learning_rate,
                                            FLAGS.grad_clip)
    train_fn = opt.apply_gradients(grads_and_vars)

    sess = tf.Session()

    log_dir = os.path.join(FLAGS.logdir,
                           datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    valid_writer = tf.summary.FileWriter(log_dir + '/val')

    sess.run(tf.global_variables_initializer())

    try:
        num_iters = 0
        while True:
            # Generate new batch and perform SGD update
            x, y = generate_batch(min_val=MIN_VAL,
                                  max_val=MAX_VAL,
                                  seq_length=FLAGS.sequence_length,
                                  batch_size=FLAGS.batch_size)
            sess.run([train_fn], feed_dict={samples: x, ground_truth: y})
            num_iters += 1

            # Evaluate on validation data generated on the fly
            if num_iters % FLAGS.evaluate_every == 0:
                valid_error, valid_steps = 0., 0.
                for _ in range(FLAGS.validation_batches):
                    valid_x, valid_y = generate_batch(
                        min_val=MIN_VAL,
                        max_val=MAX_VAL,
                        seq_length=FLAGS.sequence_length,
                        batch_size=FLAGS.batch_size)
                    valid_iter_error, valid_used_inputs = sess.run(
                        [mse, updated_states],
                        feed_dict={
                            samples: valid_x,
                            ground_truth: valid_y
                        })
                    valid_error += valid_iter_error
                    if valid_used_inputs is not None:
                        valid_steps += compute_used_samples(valid_used_inputs)
                    else:
                        valid_steps += FLAGS.sequence_length
                valid_error /= FLAGS.validation_batches
                valid_steps /= FLAGS.validation_batches

                valid_writer.add_summary(scalar_summary('error', valid_error),
                                         num_iters)
                valid_writer.add_summary(
                    scalar_summary('used_samples',
                                   valid_steps / FLAGS.sequence_length),
                    num_iters)
                valid_writer.flush()

                print("Iteration %d, "
                      "validation error: %.7f, "
                      "validation samples: %.2f%%" %
                      (num_iters, valid_error,
                       100. * valid_steps / FLAGS.sequence_length))
    except KeyboardInterrupt:
        pass
Exemplo n.º 30
0
def main(args):
    """Main function to train the model.

  Args:
    args: Parsed arguments.

  Returns:
    Execution status defined by `constants.ExitCode`.
  """
    # Validate paths.
    if not validate_paths(args):
        return constants.ExitCode.INVALID_PATH

    # Extract paths.
    input_dir = args.input_dir
    model_dir = args.model_dir
    log_dir = args.log_dir
    existing_model = args.existing_model

    # Extract model parameters.
    batch_size = args.batch_size
    dropout_pkeep = args.dropout_pkeep
    hidden_state_size = args.hidden_state_size
    hidden_layer_size = args.hidden_layer_size
    learning_rate = args.learning_rate

    # Extract additional flags.
    debug = args.debug
    validation = args.validation

    # Split corpus for training and validation.
    # validation_text will be empty if validation is False.
    code_text, validation_text, input_ranges = utils.read_data_files(
        input_dir, validation=validation)

    # Bail out if we don't have enough corpus for training.
    if len(code_text) < batch_size * constants.TRAINING_SEQLEN + 1:
        return constants.ExitCode.CORPUS_TOO_SMALL

    # Get corpus files info. Will be used in debug mode to generate sample text.
    files_info_list = []
    if debug:
        files_info_list = utils.get_files_info(input_dir)
        assert files_info_list

    # Calculate validation batch size. It will be 0 if we choose not to validate.
    validation_batch_size = len(validation_text) // constants.VALIDATION_SEQLEN

    # Display some stats on the data.
    epoch_size = len(code_text) // (batch_size * constants.TRAINING_SEQLEN)
    utils.print_data_stats(len(code_text), len(validation_text), epoch_size)

    # Set graph-level random seed, so any random sequence generated in this
    # graph is repeatable. It could also be removed.
    tf.set_random_seed(0)

    # Define placeholder for learning rate, dropout and batch size.
    lr = tf.placeholder(tf.float32, name='lr')
    pkeep = tf.placeholder(tf.float32, name='pkeep')
    batchsize = tf.placeholder(tf.int32, name='batchsize')

    # Input data.
    input_bytes = tf.placeholder(tf.uint8, [None, None], name='input_bytes')
    input_onehot = tf.one_hot(input_bytes, constants.ALPHA_SIZE, 1.0, 0.0)

    # Expected outputs = same sequence shifted by 1, since we are trying to
    # predict the next character.
    expected_bytes = tf.placeholder(tf.uint8, [None, None],
                                    name='expected_bytes')
    expected_onehot = tf.one_hot(expected_bytes, constants.ALPHA_SIZE, 1.0,
                                 0.0)

    # Input state.
    hidden_state = tf.placeholder(
        tf.float32, [None, hidden_state_size * hidden_layer_size],
        name='hidden_state')

    # "naive dropout" implementation.
    cells = [rnn.GRUCell(hidden_state_size) for _ in range(hidden_layer_size)]
    dropcells = [
        rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in cells
    ]
    multicell = rnn.MultiRNNCell(dropcells, state_is_tuple=False)
    multicell = rnn.DropoutWrapper(multicell, output_keep_prob=pkeep)

    output_raw, next_state = tf.nn.dynamic_rnn(multicell,
                                               input_onehot,
                                               dtype=tf.float32,
                                               initial_state=hidden_state)
    next_state = tf.identity(next_state, name='next_state')

    # Reshape training outputs.
    output_flat = tf.reshape(output_raw, [-1, hidden_state_size])
    output_logits = layers.linear(output_flat, constants.ALPHA_SIZE)

    # Reshape expected outputs.
    expected_flat = tf.reshape(expected_onehot, [-1, constants.ALPHA_SIZE])

    # Compute training loss.
    loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=output_logits,
                                                      labels=expected_flat)
    loss = tf.reshape(loss, [batchsize, -1])

    # Use softmax to normalize training outputs.
    output_onehot = tf.nn.softmax(output_logits, name='output_onehot')

    # Use argmax to get the max value, which is the predicted bytes.
    output_bytes = tf.argmax(output_onehot, 1)
    output_bytes = tf.reshape(output_bytes, [batchsize, -1],
                              name='output_bytes')

    # Choose Adam optimizer to compute gradients.
    optimizer = tf.train.AdamOptimizer(lr).minimize(loss)

    # Stats for display.
    seqloss = tf.reduce_mean(loss, 1)
    batchloss = tf.reduce_mean(seqloss)
    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(expected_bytes, tf.cast(output_bytes, tf.uint8)),
                tf.float32))
    loss_summary = tf.summary.scalar('batch_loss', batchloss)
    acc_summary = tf.summary.scalar('batch_accuracy', accuracy)
    summaries = tf.summary.merge([loss_summary, acc_summary])

    # Init Tensorboard stuff.
    # This will save Tensorboard information in folder specified in command line.
    # Two sets of data are saved so that you can compare training and
    # validation curves visually in Tensorboard.
    timestamp = str(math.trunc(time.time()))
    summary_writer = tf.summary.FileWriter(
        os.path.join(log_dir, timestamp + '-training'))
    validation_writer = tf.summary.FileWriter(
        os.path.join(log_dir, timestamp + '-validation'))

    # Init for saving models.
    # They will be saved into a directory specified in command line.
    saver = tf.train.Saver(max_to_keep=constants.MAX_TO_KEEP)

    # For display: init the progress bar.
    step_size = batch_size * constants.TRAINING_SEQLEN
    frequency = constants.DISPLAY_FREQ * step_size
    progress = utils.Progress(constants.DISPLAY_FREQ,
                              size=constants.DISPLAY_LEN,
                              msg='Training on next {} batches'.format(
                                  constants.DISPLAY_FREQ))

    # Set initial state.
    state = np.zeros([batch_size, hidden_state_size * hidden_layer_size])
    session = tf.Session()

    # We continue training on exsiting model, or start with a new model.
    if existing_model:
        print('Continue training on existing model: {}'.format(existing_model))
        try:
            saver.restore(session, existing_model)
        except:
            print(('Failed to restore existing model since model '
                   'parameters do not match.'),
                  file=sys.stderr)
            return constants.ExitCode.TENSORFLOW_ERROR
    else:
        print('No existing model provided. Start training with a new model.')
        session.run(tf.global_variables_initializer())

    # Num of bytes we have trained so far.
    steps = 0

    # Training loop.
    for input_batch, expected_batch, epoch in utils.rnn_minibatch_sequencer(
            code_text,
            batch_size,
            constants.TRAINING_SEQLEN,
            nb_epochs=constants.EPOCHS):

        # Train on one mini-batch.
        feed_dict = {
            input_bytes: input_batch,
            expected_bytes: expected_batch,
            hidden_state: state,
            lr: learning_rate,
            pkeep: dropout_pkeep,
            batchsize: batch_size
        }

        _, predicted, new_state = session.run(
            [optimizer, output_bytes, next_state], feed_dict=feed_dict)

        # Log training data for Tensorboard display a mini-batch of sequences
        # every `frequency` batches.
        if debug and steps % frequency == 0:
            feed_dict = {
                input_bytes: input_batch,
                expected_bytes: expected_batch,
                hidden_state: state,
                pkeep: 1.0,
                batchsize: batch_size
            }
            predicted, seq_loss, batch_loss, acc_value, summaries_value = session.run(
                [output_bytes, seqloss, batchloss, accuracy, summaries],
                feed_dict=feed_dict)
            utils.print_learning_learned_comparison(input_batch, predicted,
                                                    seq_loss, input_ranges,
                                                    batch_loss, acc_value,
                                                    epoch_size, steps, epoch)
            summary_writer.add_summary(summaries_value, steps)

        # Run a validation step every `frequency` batches.
        # The validation text should be a single sequence but that's too slow.
        # We cut it up and batch the pieces (slightly inaccurate).
        if validation and steps % frequency == 0 and validation_batch_size:
            utils.print_validation_header(len(code_text), input_ranges)
            validation_x, validation_y, _ = next(
                utils.rnn_minibatch_sequencer(validation_text,
                                              validation_batch_size,
                                              constants.VALIDATION_SEQLEN, 1))
            null_state = np.zeros(
                [validation_batch_size, hidden_state_size * hidden_layer_size])
            feed_dict = {
                input_bytes: validation_x,
                expected_bytes: validation_y,
                hidden_state: null_state,
                pkeep: 1.0,
                batchsize: validation_batch_size
            }
            batch_loss, acc_value, summaries_value = session.run(
                [batchloss, accuracy, summaries], feed_dict=feed_dict)
            utils.print_validation_stats(batch_loss, acc_value)

            # Save validation data for Tensorboard.
            validation_writer.add_summary(summaries_value, steps)

        # Display a short text generated with the current weights and biases.
        # If enabled, there will be a large output.
        if debug and steps // 4 % frequency == 0:
            utils.print_text_generation_header()
            file_info = utils.random_element_from_list(files_info_list)
            first_byte, file_size = file_info['first_byte'], file_info[
                'file_size']
            ry = np.array([[first_byte]])
            rh = np.zeros([1, hidden_state_size * hidden_layer_size])
            sample = [first_byte]
            for _ in range(file_size - 1):
                feed_dict = {
                    input_bytes: ry,
                    pkeep: 1.0,
                    hidden_state: rh,
                    batchsize: 1
                }
                ryo, rh = session.run([output_onehot, next_state],
                                      feed_dict=feed_dict)
                rc = utils.sample_from_probabilities(
                    ryo, topn=10 if epoch <= 1 else 2)
                sample.append(rc)
                ry = np.array([[rc]])
            print(repr(utils.decode_to_text(sample)))
            utils.print_text_generation_footer()

        # Save a checkpoint every `10 * frequency` batches. Each checkpoint is
        # a version of model.
        if steps // 10 % frequency == 0:
            saved_model_name = constants.RNN_MODEL_NAME + '_' + timestamp
            saved_model_path = os.path.join(model_dir, saved_model_name)
            saved_model = saver.save(session,
                                     saved_model_path,
                                     global_step=steps)
            print('Saved model: {}'.format(saved_model))

        # Display progress bar.
        if debug:
            progress.step(reset=steps % frequency == 0)

        # Update state.
        state = new_state
        steps += step_size

    # Save the model after training is done.
    saved_model_name = constants.RNN_MODEL_NAME + '_' + timestamp
    saved_model_path = os.path.join(model_dir, saved_model_name)
    saved_model = saver.save(session, saved_model_path, global_step=steps)
    print('Saved model: {}'.format(saved_model))

    return constants.ExitCode.SUCCESS
Exemplo n.º 31
0
def discriminator(x, hidden_size, scope='Discriminator', reuse=False):
  with tf.variable_scope(scope, reuse=reuse):
    h0 = tf.tanh(layers.linear(x, hidden_size * 2))
    h1 = tf.tanh(layers.linear(h0, hidden_size * 2))
    h2 = tf.tanh(layers.linear(h1, hidden_size * 2))
    return tf.sigmoid(layers.linear(h2, 1))
Exemplo n.º 32
0
def create_model(input_tensor, mode, hyper_params):
    """
    Creates a function classifier model using a gru.

    :param input_tensor: A dictionary containing all input tensors.
    :param mode: If the network is training or evaluating (tf.estimator.ModeKeys)
    :param hyper_params: The hyper parameters object containing {"arch": {"pkeep": Float, "sequence_length": Int,
        "hidden_layer_depth": Int, "hidden_layer_size": Int, "output_dimension": Int}}
    :return: The model as a dictionary of output tensors.
    """
    outputs = {}
    with tf.variable_scope('GruFunctionClassifier') as scope:
        batch_size = hyper_params.train.batch_size
        if mode == tf.estimator.ModeKeys.EVAL:
            batch_size = hyper_params.train.validation_batch_size
        if mode == tf.estimator.ModeKeys.PREDICT:
            batch_size = 1

        # Define inputs
        input_tensor = tf.reshape(
            input_tensor["feature"],
            (batch_size, hyper_params.arch.sequence_length, 1))
        Hin = tf.zeros([
            batch_size, hyper_params.arch.hidden_layer_size *
            hyper_params.arch.hidden_layer_depth
        ],
                       tf.float32,
                       name="Hin")

        # Define the actual cells
        cells = [
            rnn.GRUCell(hyper_params.arch.hidden_layer_size)
            for _ in range(hyper_params.arch.hidden_layer_depth)
        ]

        # "naive dropout" implementation
        if mode == tf.estimator.ModeKeys.TRAIN:
            cells = [
                rnn.DropoutWrapper(cell,
                                   input_keep_prob=hyper_params.arch.pkeep)
                for cell in cells
            ]

        multicell = rnn.MultiRNNCell(cells, state_is_tuple=False)
        if mode == tf.estimator.ModeKeys.TRAIN:
            multicell = rnn.DropoutWrapper(
                multicell, output_keep_prob=hyper_params.arch.pkeep
            )  # dropout for the softmax layer

        Yr, H = tf.nn.dynamic_rnn(multicell,
                                  input_tensor,
                                  dtype=tf.float32,
                                  initial_state=Hin)
        H = tf.identity(H, name='H')  # just to give it a name

        # Softmax layer implementation:
        # Flatten the first two dimension of the output [ BATCHSIZE, SEQLEN, self.hyper_params.arch.output_dim ] => [ BATCHSIZE x SEQLEN, self.hyper_params.arch.output_dim ]
        # then apply softmax readout layer. This way, the weights and biases are shared across unrolled time steps.
        # From the readout point of view, a value coming from a sequence time step or a minibatch item is the same thing.

        # Select last output.
        output = tf.transpose(Yr, [1, 0, 2])
        last = tf.gather(output, int(output.get_shape()[0]) - 1)
        outputs["logits"] = layers.linear(last,
                                          hyper_params.arch.output_dimension)
        outputs["probs"] = tf.nn.softmax(outputs["logits"], name="probs")
    return outputs
Exemplo n.º 33
0
    def __init__(self, params):
        """
        
        :param params: dictionary with fields:
            "N_HIDDEN": number of hidden states
            "N_BINS": number of bins on input/ output
            "LEARNING_RATE": learning rate in optimizer
        """
        self.params = params

        tf.reset_default_graph()

        self.session = tf.Session()

        self.inputs = tf.placeholder(tf.float32,
                                     (None, None, params['N_BINS']))

        self.cell = tf.contrib.rnn.LSTMCell(params['N_HIDDEN'],
                                            state_is_tuple=True)
        self.batch_size = tf.shape(self.inputs)[1]

        self.h_init = tf.Variable(tf.zeros([1, params['N_HIDDEN']]),
                                  trainable=True)
        self.h_init_til = tf.tile(self.h_init, [self.batch_size, 1])

        self.c_init = tf.Variable(tf.zeros([1, params['N_HIDDEN']]),
                                  trainable=True)
        self.c_init_til = tf.tile(self.c_init, [self.batch_size, 1])

        self.initial_state = LSTMStateTuple(self.c_init_til, self.h_init_til)

        self.rnn_outputs, self.rnn_states = \
            tf.nn.dynamic_rnn(self.cell,
                              self.inputs,
                              initial_state=self.initial_state,
                              time_major=True)

        with tf.variable_scope("output"):

            self.intermediate_projection = \
                lambda x: layers.fully_connected(x, num_outputs=params['N_HIDDEN'])

            self.final_projection = \
                lambda x: layers.linear(x, num_outputs=params['N_BINS'])

            self.intermediate_features = tf.map_fn(
                self.intermediate_projection, self.rnn_outputs)
            self.final_features = tf.map_fn(self.final_projection,
                                            self.intermediate_features)
            self.predicted_outputs = layers.softmax(self.final_features)

        with tf.variable_scope("train"):
            self.outputs = \
                tf.placeholder(tf.float32, (None, None, params['N_BINS']))

            self.mask = tf.placeholder(tf.float32,
                                       (None, None, params['N_BINS']))

            self.all_errors = losses.categorical_crossentropy(
                self.outputs * self.mask, self.predicted_outputs)

            self.error = tf.reduce_mean(self.all_errors)

            self.train_fn = \
                tf.train.AdamOptimizer(learning_rate=params['LEARNING_RATE']) \
                    .minimize(self.error)
Exemplo n.º 34
0
# RNN

# [ BATCHSIZE, SEQLEN, ALPHASIZE ]
input_x = tf.one_hot(inputs, VOCAB_SIZE)
input_y = tf.one_hot(targets, VOCAB_SIZE)

# creating RNN cell
rnn_cell = tf.contrib.rnn.GRUCell(HIDDEN_SIZE)

# run RNN
rnn_outputs, final_state = tf.nn.dynamic_rnn(rnn_cell, input_x,
					     initial_state=init_state, dtype=tf.float32)

# add dense layer on top of the RNN outputs
rnn_outputs_flat = tf.reshape(rnn_outputs, [-1, HIDDEN_SIZE])
dense_layer = layers.linear(rnn_outputs_flat, VOCAB_SIZE) 
labels = tf.reshape(input_y, [-1, VOCAB_SIZE])
output_softmax = tf.nn.softmax(dense_layer)

# Loss
loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=dense_layer, labels=labels))

# Minimizer
minimizer = tf.train.AdagradOptimizer(learning_rate=LEARNING_RATE).minimize(loss)

# Gradient clipping
'''
# Here's where the magic happens!
grads_and_vars = minimizer.compute_gradients(loss)

grad_clipping = tf.constant(5.0, name="grad_clipping")
Exemplo n.º 35
0
def discriminator_0layer(H, opt, dropout, prefix='', num_outputs=1, is_reuse=None):
    H = tf.squeeze(H)
    biasInit = tf.constant_initializer(0.001, dtype=tf.float32)
    logits = layers.linear(tf.nn.dropout(H, keep_prob=dropout), num_outputs=num_outputs, biases_initializer=biasInit,
                           scope=prefix + 'dis', reuse=is_reuse)
    return logits
Exemplo n.º 36
0
    def train(self):
        samples = tf.placeholder(tf.float32,
                                 shape=[
                                     self.BATCH_SIZE, self.SEQUENCE_LENGTH,
                                     self.EMBEDDING_LENGTH
                                 ],
                                 name='Samples')  # (batch, time, in)
        ground_truth = tf.placeholder(tf.int64,
                                      shape=[self.BATCH_SIZE],
                                      name='GroundTruth')
        probs = tf.placeholder(
            tf.float32,
            shape=[self.BATCH_SIZE, self.SEQUENCE_LENGTH, 1],
            name='Probs')
        mask = tf.placeholder(tf.float32,
                              shape=[self.BATCH_SIZE, self.SEQUENCE_LENGTH, 1],
                              name='padding_mask')

        cell, initial_state = create_model(model='skip_lstm',
                                           num_cells=[self.HIDDEN_UNITS],
                                           batch_size=self.BATCH_SIZE)

        rnn_outputs, rnn_states = tf.nn.dynamic_rnn(
            cell, samples, dtype=tf.float32, initial_state=initial_state)

        # Split the outputs of the RNN into the actual outputs and the state update gate
        rnn_outputs, updated_states = split_rnn_outputs(
            'skip_lstm', rnn_outputs)

        # print(f"\nUpdated states are {updated_states}.\n")

        logits = layers.linear(inputs=rnn_outputs[:, -1, :],
                               num_outputs=self.OUTPUT_SIZE)
        predictions = tf.argmax(logits, 1)

        # Compute cross-entropy loss
        printer_lab = tf.cond(
            tf.math.reduce_any(
                tf.logical_or(
                    tf.equal(tf.zeros_like(ground_truth), ground_truth),
                    tf.equal(tf.ones_like(ground_truth),
                             ground_truth))), lambda: tf.no_op(),
            lambda: tf.print("Found a label out of range: ", [ground_truth]))
        with tf.control_dependencies([printer_lab]):
            cross_entropy_per_sample = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=ground_truth)
        # cross_entropy_per_sample = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=ground_truth)
        # max_ce = tf.math.maximum(cross_entropy_per_sample)
        # median_ce = tf.math.median(cross_entropy_per_sample)
        # printer_max = tf.Print(max_ce, [max_ce], "The maximum cross entropy is ")
        # printer_median = tf.Print(median_ce, [median_ce], "The median cross entropy is ")
        printer_Nan = tf.cond(
            tf.math.reduce_any(tf.math.is_nan(cross_entropy_per_sample)),
            lambda: tf.print("Found NaN in entropy loss",
                             output_stream=sys.stderr), lambda: tf.no_op())
        with tf.control_dependencies([printer_Nan]):
            cross_entropy = tf.reduce_mean(
                tf.boolean_mask(cross_entropy_per_sample,
                                tf.is_finite(cross_entropy_per_sample)))

            # tf.where(tf.math.is_nan(cross_entropy_per_sample),
            #          tf.ones(cross_entropy_per_sample.get_shape()),
            #          cross_entropy_per_sample))

        # Compute accuracy
        accuracy = tf.reduce_mean(
            tf.cast(tf.equal(predictions, ground_truth), tf.float32))

        # updated_states = tf.boolean_mask(updated_states, mask)

        # Compute loss for each updated state
        budget_loss = compute_budget_loss('skip_lstm', cross_entropy,
                                          updated_states, self.COST_PER_SAMPLE,
                                          mask)
        # printer_Nan = tf.cond(tf.math.reduce_any(tf.math.is_nan(budget_loss)),
        #                       lambda: tf.print("Found NaN in budget loss"), lambda: tf.no_op())
        # with tf.control_dependencies([printer_Nan]):
        #     budget_loss = tf.where(tf.math.is_nan(budget_loss),
        #                            tf.ones(budget_loss.get_shape()),
        #                            budget_loss)

        # Compute loss for the amount of surprisal
        surprisal_loss = compute_surprisal_loss('skip_lstm', cross_entropy,
                                                updated_states, probs,
                                                self.SURPRISAL_COST, mask)
        # Avoid encouraging to not skip.
        # printer_Nan = tf.cond(tf.math.reduce_any(tf.math.is_nan(surprisal_loss)),
        #                       lambda: tf.print("Found NaN in surprisal loss"), lambda: tf.no_op())
        # with tf.control_dependencies([printer_Nan]):
        #     surprisal_loss = tf.where(tf.math.logical_or(tf.equal(surprisal_loss, tf.zeros_like(surprisal_loss)),
        #                                                  tf.math.is_nan(surprisal_loss)), tf.ones_like(surprisal_loss),
        #                               surprisal_loss)

        loss = cross_entropy + budget_loss + surprisal_loss
        loss = tf.reshape(loss, [])

        loss = tf.where(tf.is_nan(loss), tf.ones_like(loss), loss)

        # Optimizer
        opt, grads_and_vars = compute_gradients(
            loss, self.LEARNING_RATE,
            1)  # used to be 1 is for gradient clipping
        train_fn = opt.apply_gradients(grads_and_vars)

        sess = tf.Session()

        # log_dir = os.path.join(self.LOG_DIR, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
        # val_writer = tf.summary.FileWriter(log_dir + '/validation')

        # Initialize weights
        sess.run(tf.global_variables_initializer())

        # Results
        train_loss_plt = np.zeros((self.NUM_EPOCHS))
        loss_plt = np.zeros((self.NUM_EPOCHS, self.ITERATIONS_PER_EPOCH, 3))
        val_acc_df = np.zeros((self.NUM_EPOCHS))
        train_acc_df = np.zeros((self.NUM_EPOCHS))
        test_acc_df = np.zeros((self.NUM_EPOCHS))
        train_update_df = np.zeros((self.NUM_EPOCHS))
        val_update_df = np.zeros((self.NUM_EPOCHS))
        test_update_df = np.zeros((self.NUM_EPOCHS))
        test_time_df = np.zeros((self.NUM_EPOCHS))

        read_embs = np.zeros(
            (self.TEST_ITERS * self.BATCH_SIZE * self.SEQUENCE_LENGTH,
             self.EMBEDDING_LENGTH))
        non_read_embs = np.zeros(
            (self.TEST_ITERS * self.BATCH_SIZE * self.SEQUENCE_LENGTH,
             self.EMBEDDING_LENGTH))
        read_surps = np.ones(
            (self.TEST_ITERS * self.BATCH_SIZE * self.SEQUENCE_LENGTH))
        non_read_surps = np.ones(
            (self.TEST_ITERS * self.BATCH_SIZE * self.SEQUENCE_LENGTH))

        # FILE_NAME = f'hu{self.HIDDEN_UNITS}_bs{self.BATCH_SIZE}_lr{self.LEARNING_RATE}_b{self.COST_PER_SAMPLE}_s{self.SURPRISAL_COST}_t{self.TRIAL}'

        try:
            train_matrix, train_labels, train_probs, train_mask = self.input_fn(
                split='train')
            val_matrix, val_labels, val_probs, val_mask = self.input_fn(
                split='val')
            test_matrix, test_labels, test_probs, test_mask = self.input_fn(
                split='test')

            # train_loss_plt = np.empty((self.NUM_EPOCHS, self.ITERATIONS_PER_EPOCH)

            for epoch in range(self.NUM_EPOCHS):

                # Load the training dataset into the pipeline
                # sess.run(train_model_spec['iterator_init_op'])

                # sess.run(train_model_spec['samples'])

                start_time = time.time()
                train_accuracy, train_steps, train_loss = 0, 0, 0
                for iteration in range(self.ITERATIONS_PER_EPOCH):
                    # Perform SGD update
                    # print(iteration, train_probs[iteration].shape)
                    out = sess.run(
                        [
                            train_fn, loss, accuracy, updated_states,
                            cross_entropy, budget_loss, surprisal_loss
                        ],
                        feed_dict={
                            samples: train_matrix[iteration],
                            ground_truth: train_labels[iteration],
                            probs: train_probs[iteration],
                            mask: train_mask[iteration]
                        })
                    train_accuracy += out[2]
                    train_loss += out[1]
                    loss_plt[epoch][iteration] = out[
                        4:]  # entropy, budget, surprisal
                    if out[3] is not None:
                        train_steps += compute_used_samples(
                            out[3] * train_mask[iteration])
                    else:
                        train_steps += np.count_nonzero(train_mask[iteration])

                duration = time.time() - start_time

                train_accuracy /= self.ITERATIONS_PER_EPOCH
                train_loss /= self.ITERATIONS_PER_EPOCH
                train_steps /= (np.count_nonzero(train_mask) / self.BATCH_SIZE)
                train_loss_plt[epoch] = train_loss
                train_acc_df[epoch] = train_accuracy
                train_update_df[epoch] = train_steps

                val_accuracy, val_loss, val_steps = 0, 0, 0
                for iteration in range(self.VAL_ITERS):
                    val_iter_accuracy, val_iter_loss, val_used_inputs = sess.run(
                        [accuracy, loss, updated_states],
                        feed_dict={
                            samples: val_matrix[iteration],
                            ground_truth: val_labels[iteration],
                            probs: val_probs[iteration],
                            mask: val_mask[iteration]
                        })
                    val_accuracy += val_iter_accuracy
                    val_loss += val_iter_loss
                    if val_used_inputs is not None:
                        val_steps += compute_used_samples(val_used_inputs *
                                                          val_mask[iteration])
                    else:
                        val_steps += np.count_nonzero(val_mask[iteration])
                val_accuracy /= self.VAL_ITERS
                val_loss /= self.VAL_ITERS
                val_steps /= (np.count_nonzero(val_mask) / self.BATCH_SIZE)
                val_acc_df[epoch] = val_accuracy
                val_update_df[epoch] = val_steps

                # val_writer.add_summary(scalar_summary('accuracy', val_accuracy), epoch)
                # val_writer.add_summary(scalar_summary('loss', val_loss), epoch)
                # val_writer.add_summary(scalar_summary('used_samples', val_steps / self.SEQUENCE_LENGTH), epoch)
                # val_writer.flush()

                # print("Epoch %d/%d, "
                #       "duration: %.2f seconds, "
                #       "train accuracy: %.2f%%, "
                #       "train samples: %.2f (%.2f%%), "
                #       "val accuracy: %.2f%%, "
                #       "val samples: %.2f (%.2f%%)" % (epoch + 1,
                #                                        self.NUM_EPOCHS,
                #                                        duration,
                #                                        100. * train_accuracy,
                #                                        train_steps,
                #                                        100. * train_steps / self.SEQUENCE_LENGTH,
                #                                        100. * val_accuracy,
                #                                        val_steps,
                #                                        100. * val_steps / self.SEQUENCE_LENGTH))
                #

                # print("Absolute losses: entropy: %.3f, budget: %.3f, surprisal: %.3f." % (loss_abs[0], loss_abs[1], loss_abs[2]))
                #
                # print("Percentage losses: entropy: %.2f%%, budget: %.2f%%, surprisal: %.2f%%.\n" % (loss_perc[0], loss_perc[1], loss_perc[2]))

                loss_abs = loss_plt[epoch].mean(axis=0)
                loss_perc = np.divide(loss_abs, (loss_abs.sum())) * 100

                self.logger.info("\nEpoch %d/%d, "
                                 "duration: %.2f seconds, "
                                 "train accuracy: %.2f%%, "
                                 "train samples: %.2f%%, "
                                 "val accuracy: %.2f%%, "
                                 "val samples: %.2f%%" %
                                 (epoch + 1, self.NUM_EPOCHS, duration,
                                  100. * train_accuracy, 100. * train_steps,
                                  100. * val_accuracy, 100. * val_steps))
                self.logger.info(
                    "Absolute losses: entropy: %.3f, budget: %.3f, surprisal: %.3f."
                    % (loss_abs[0], loss_abs[1], loss_abs[2]))
                self.logger.info(
                    "Percentage losses: entropy: %.2f%%, budget: %.2f%%, surprisal: %.2f%%."
                    % (loss_perc[0], loss_perc[1], loss_perc[2]))
                print(
                    f"entropy: {loss_plt[epoch, :, 0].mean()}, budget: {loss_plt[epoch, :, 1].mean()}, surprisal: {loss_plt[epoch, :, 2].mean()}."
                )
                analysis_update = val_accuracy + 1e-4 > val_acc_df.max()
                if analysis_update:
                    self.logger.info("Updating Analysis")
                    read_embs = np.zeros(
                        (self.TEST_ITERS * self.BATCH_SIZE *
                         self.SEQUENCE_LENGTH, self.EMBEDDING_LENGTH))
                    non_read_embs = np.zeros(
                        (self.TEST_ITERS * self.BATCH_SIZE *
                         self.SEQUENCE_LENGTH, self.EMBEDDING_LENGTH))
                    read_surps = np.full((self.TEST_ITERS * self.BATCH_SIZE *
                                          self.SEQUENCE_LENGTH), -1)
                    non_read_surps = np.full(
                        (self.TEST_ITERS * self.BATCH_SIZE *
                         self.SEQUENCE_LENGTH), -1)

                test_accuracy, test_loss, test_steps, t = 0, 0, 0, 0
                for iteration in range(self.TEST_ITERS):
                    t0 = time.time()
                    test_iter_accuracy, test_iter_loss, test_used_inputs = sess.run(
                        [accuracy, loss, updated_states],
                        feed_dict={
                            samples: test_matrix[iteration],
                            ground_truth: test_labels[iteration],
                            probs: test_probs[iteration],
                            mask: test_mask[iteration]
                        })
                    t += time.time() - t0
                    test_accuracy += test_iter_accuracy
                    test_loss += test_iter_loss
                    if test_used_inputs is not None:
                        test_steps += compute_used_samples(
                            test_used_inputs * test_mask[iteration])
                        if analysis_update:
                            try:
                                re, nre, rs, nrs = stats_used_samples(
                                    test_used_inputs, test_matrix[iteration],
                                    test_probs[iteration],
                                    test_mask[iteration])
                                if len(re) > 0:
                                    read_embs[
                                        self.BATCH_SIZE * iteration *
                                        self.SEQUENCE_LENGTH:self.BATCH_SIZE *
                                        iteration * self.SEQUENCE_LENGTH +
                                        len(re)] = re
                                if len(nre) > 0:
                                    non_read_embs[
                                        self.BATCH_SIZE * iteration *
                                        self.SEQUENCE_LENGTH:self.BATCH_SIZE *
                                        iteration * self.SEQUENCE_LENGTH +
                                        len(nre)] = nre
                                if len(rs) > 0:
                                    read_surps[
                                        self.BATCH_SIZE * iteration *
                                        self.SEQUENCE_LENGTH:self.BATCH_SIZE *
                                        iteration * self.SEQUENCE_LENGTH +
                                        len(rs.flatten())] = rs.flatten(
                                        )  # take out flatten but should not be the problem
                                if len(nrs) > 0:
                                    non_read_surps[
                                        self.BATCH_SIZE * iteration *
                                        self.SEQUENCE_LENGTH:self.BATCH_SIZE *
                                        iteration * self.SEQUENCE_LENGTH +
                                        len(nrs.flatten())] = nrs.flatten()
                            except Exception as e:
                                self.logger.info("Could not update analysis")
                                self.logger.error(e)
                                pass
                    else:
                        test_steps += np.count_nonzero(test_mask[iteration])

                test_accuracy /= self.TEST_ITERS
                test_loss /= self.TEST_ITERS
                test_steps /= (np.count_nonzero(test_mask) / self.BATCH_SIZE)
                test_time_df[epoch] = t
                test_acc_df[epoch] = test_accuracy
                test_update_df[epoch] = test_steps

                self.logger.info("Test time: %.2f seconds, "
                                 "test accuracy: %.2f%%, "
                                 "test samples: %.2f%%.\n" %
                                 (test_time_df[epoch], 100. * test_accuracy,
                                  100. * test_steps))

                if self.EARLY_STOPPING and epoch > 15:
                    if epoch == 16:
                        best_accuracy = val_acc_df.max()
                        best_idx = val_acc_df.argmax()
                    if best_accuracy < val_acc_df[epoch] + 1e-4:
                        best_accuracy = val_acc_df[epoch]
                        best_idx = epoch
                    elif best_idx + 15 < epoch:
                        val_update_df = val_update_df[:epoch]
                        val_acc_df = val_acc_df[:epoch]
                        train_acc_df = train_acc_df[:epoch]
                        train_update_df = train_update_df[:epoch]
                        loss_plt = loss_plt[:epoch]
                        test_acc_df = test_acc_df[:epoch]
                        test_update_df = test_update_df[:epoch]
                        test_time_df = test_time_df[:epoch]
                        self.logger.info(
                            "Training was interrupted with early stopping")
                        break

        except KeyboardInterrupt:
            self.logger.info("Training was interrupted manually")
            pass

        try:
            df_dict = self.CONFIG_DICT
            df_dict['val_acc'] = val_acc_df
            df_dict['val_updates'] = val_update_df
            df_dict['train_acc'] = train_acc_df
            df_dict['train_updates'] = train_update_df
            df_dict['test_acc'] = test_acc_df
            df_dict['test_updates'] = test_update_df
            df_dict['test_time'] = test_time_df
            loss_plt_mean = loss_plt.mean(axis=1).transpose()
            df_dict['entropy_loss'] = loss_plt_mean[0]
            df_dict['budget_loss'] = loss_plt_mean[1]
            df_dict['surprisal_loss'] = loss_plt_mean[2]
            df = pd.DataFrame(df_dict)
            df.drop(columns=['epochs', 'file_name'], inplace=True)
            csv_loc = '../csvs'
            if not os.path.exists(csv_loc):
                os.makedirs(csv_loc)
            df.to_csv(f"{csv_loc}/{self.FILE_NAME}.csv")
        except Exception as e:
            print(e)
            self.logger.info("Could not create csvs")
            pass

        ## Saving analysis statistics
        try:
            analysis_loc = '../analysis'
            if not os.path.exists(analysis_loc):
                os.makedirs(analysis_loc)
            print("Read words")
            read_words = get_words_from_embedding(self.EMBEDDING_DICT,
                                                  read_embs)
            print("Skipped words")
            non_read_words = get_words_from_embedding(self.EMBEDDING_DICT,
                                                      non_read_embs)
            pickle.dump(read_words,
                        open(f"{analysis_loc}/{self.FILE_NAME}_read_vocab.pkl",
                             'wb'),
                        protocol=0)
            pickle.dump(
                non_read_words,
                open(f"{analysis_loc}/{self.FILE_NAME}_non_read_vocab.pkl",
                     'wb'),
                protocol=0)
            read_surps = np.vstack(read_surps).flatten()
            non_read_surps = np.vstack(non_read_surps).flatten()
            np.save(
                open(f"{analysis_loc}/{self.FILE_NAME}_read_surprisals.npy",
                     'wb'), read_surps[read_surps >= 0])
            np.save(
                open(
                    f"{analysis_loc}/{self.FILE_NAME}_non_read_surprisals.npy",
                    'wb'), non_read_surps[non_read_surps >= 0])

        except Exception as e:
            print(e)
            self.logger.info(
                "Something went wrong when reporting analysis results")
            pass

        sess.close()
        tf.reset_default_graph()
Exemplo n.º 37
0
    def __init__(self, sequence_length: int, batch_size: int,
                 gru_internal_size: int, num_hidden_layers: int,
                 stats_log_dir: str):
        """Construct `RNNTextModel` with specified hyperparameters.

        This model is a deep recurrent neural network that uses GRU cells for
        long-term memory.

        `sequence_length` is the length of each input sequence. Longer
        sequences mean the model has longer-term memory, but networks that use
        longer sequences (and thus, have more time steps) are harder/take
        longer to learn.

        `batch_size` is the number of sequences to put in each mini-batch. The
        network's weights are onlt adjusted after each mini-batch.

        `gru_internal_size` specifies the number of nodes inside each hidden
        GRU cell layer.

        `num_hidden_layers` specifies the number of hidden layers (number of
        GRU cell lateys) to use in the deep RNN.

        The model's loss and graph will be periodically logged to the
        `stats_log_dir` directory.
        """
        # Mark time this text model was initially created
        self._timestamp = str(math.trunc(time.time()))

        # Store hyperparameters
        self._sequence_length = sequence_length
        self._batch_size = batch_size
        self._gru_internal_size = gru_internal_size
        self._num_hidden_layers = num_hidden_layers

        # ---------------------------------------------------------------------
        # Graph Inputs
        # ---------------------------------------------------------------------
        X = tf.placeholder(tf.uint8, [None, None], name='X')
        self._inputs = {
            # Hyperparameters.
            'learning_rate':
            tf.placeholder(tf.float32, name='learning_rate'),
            'batch_size':
            tf.placeholder(tf.int32, name='batch_size'),

            # Dimensions: [ batch_size, sequence_length ]
            'X':
            X,
            # Dimensions: [ batch_size, sequence_length, ALPHABET_SIZE ]
            'Xo':
            tf.one_hot(X, ALPHABET_SIZE, 1.0, 0.0),

            # Input cell state.
            # Dimensions: [batch_size, gru_internal_size * num_hidden_layers]
            'H_in':
            tf.placeholder(
                tf.float32,
                [None, self._gru_internal_size * self._num_hidden_layers],
                name='Hin')
        }

        # Define expected RNN outputs. This is used for training.
        # This is the same sequence as the input sequence, but shifted by 1
        # since we are trying to predict the next character.
        Y_exp = tf.placeholder(tf.uint8, [None, None], name='Y_exp')
        self._expected_outputs = {
            # Dimensions: [ batch_size, sequence_length ]
            'Y': Y_exp,
            # Dimensions: [ batch_size, sequence_length, ALPHABET_SIZE ]
            'Yo': tf.one_hot(Y_exp, ALPHABET_SIZE, 1.0, 0.0)
        }

        # ---------------------------------------------------------------------
        # Hidden Layers
        # ---------------------------------------------------------------------

        # Define internal/hidden RNN layers. The RNN is composed of a certain
        # number of hidden layers, where each node is `GruCell` that uses
        # `gru_internal_size` as the internal state size of a single cell. A
        # higher `gru_internal_size` means more complex state can be stored in
        # a single cell.
        self._cells = [
            rnn.GRUCell(self._gru_internal_size)
            for _ in range(self._num_hidden_layers)
        ]
        self._multicell = rnn.MultiRNNCell(self._cells, state_is_tuple=False)

        # Using `dynamic_rnn` means Tensorflow "performs fully dynamic
        # unrolling" of the network. This is faster than compiling the full
        # graph at initialisation time.
        #
        # Note that compiling the full grapgh at train time isn’t that big of
        # an issue for training, because we only need to build the graph once.
        # It could be a big issue, however, if we need to build the graph
        # multiple times at test time. And remember, this training loop does
        # occassionally process inputs via test time, through the occassional
        # reports it outputs.
        #
        # Yr: [ batch_size, sequence_length, gru_internal_size ]
        # H:  [ batch_size, gru_internal_size * num_hidden_layers ]
        # H_out is the last state in the sequence.
        Yr, H_out = tf.nn.dynamic_rnn(self._multicell,
                                      self._inputs['Xo'],
                                      dtype=tf.float32,
                                      initial_state=self._inputs['H_in'])

        # ---------------------------------------------------------------------
        # Outputs
        # ---------------------------------------------------------------------

        # Softmax layer implementation:
        # Flatten the first two dimensions of the output. This performs the
        # following transformation:
        #
        # [ batch_size, sequence_length, ALPHABET_SIZE ]
        #     => [ batch_size x sequence_length, ALPHABET_SIZE ]
        Yflat = tf.reshape(Yr, [-1, self._gru_internal_size])

        # After this transformation, apply softmax readout layer. This way, the
        # weights and biases are shared across unrolled time steps. From the
        # readout point of view, a value coming from a cell or a minibatch is
        # the same thing.
        Ylogits = layers.linear(
            Yflat,
            ALPHABET_SIZE)  # [ batch_size x sequence_length, ALPHABET_SIZE ]
        Yflat_ = tf.reshape(  # [ batch_size x sequence_length, ALPHABET_SIZE ]
            self._expected_outputs['Yo'], [-1, ALPHABET_SIZE])
        Yo = tf.nn.softmax(
            Ylogits,
            name='Yo')  # [ batch_size x sequence_length, ALPHABET_SIZE ]
        Y = tf.argmax(Yo, 1)  # [ batch_size x sequence_length ]
        Y = tf.reshape(Y, [self._inputs['batch_size'], -1],
                       name='Y')  # [ batch_size, sequence_length ]

        # Store the output nodes in a dictionary for easy access later.
        self._outputs = {
            'Y': Y,
            # Output cell state after running a time step of the recurrent
            # network. We specify this just to give H_out a identifiable name.
            'H_out': tf.identity(H_out, name='H_out')
        }

        # Commpute the loss (error) of the network.
        self._loss = tf.nn.softmax_cross_entropy_with_logits(  # [ batch_size x sequence_length ]
            logits=Ylogits, labels=Yflat_)
        self._loss = tf.reshape(  # [ batch_size, sequence_length ]
            self._loss, [self._inputs['batch_size'], -1])

        # Used to adjust the weights at each training step, sich that the
        # loss function is minimised.
        self._train_step = tf.train.AdamOptimizer().minimize(self._loss)

        # Stats not used to directly train the network, but are logged so they
        # can be viewed by the human user.
        self._sequence_loss = tf.reduce_mean(self._loss, 1)
        self._batch_loss = tf.reduce_mean(self._sequence_loss)
        self._batch_accuracy = tf.reduce_mean(
            tf.cast(
                tf.equal(self._expected_outputs['Y'],
                         tf.cast(self._outputs['Y'], tf.uint8)), tf.float32))

        self._initialise_tf_session()
        self._build_statistics(stats_log_dir)
    def __call__(self, content_words, content_len, date, target, target_len):
        embeddings, reg_loss = self._encoder(content_words, content_len, date)

        for i in range(self.num_blocks):
            with tf.variable_scope("num_blocks_{}".format(i), reuse=tf.AUTO_REUSE):
                embeddings = multihead_attention(queries=embeddings,
                                                 keys=embeddings,
                                                 values=embeddings,
                                                 num_heads=self.num_heads,
                                                 dropout_rate=self.dropout_rate,
                                                 training=self.training,
                                                 causality=False)
                # feed forward
                embeddings = ff(embeddings, num_units=[self.feed_forward_in_dim, self.model_dim])
        # shape(?,300,512)
        outputs = tf.reduce_max(embeddings, axis=1)
        output_feature = outputs
        if self.training and self.dropout_rate > 0:
            print("In training mode, use dropout")
            outputs = tf.nn.dropout(outputs, keep_prob=1 - self.dropout_rate)

        with tf.variable_scope("MlpLayer") as hidden_layer_scope:
            outputs = layers.fully_connected(
                outputs, num_outputs=self.model_dim, activation_fn=tf.nn.tanh,
                scope=hidden_layer_scope, reuse=tf.AUTO_REUSE
            )
        outputs = layers.linear(
            outputs, self.num_vocabulary, scope="Logit_layer", reuse=tf.AUTO_REUSE
        )

        loss = None
        training_list = []
        if target is not None:
            non_zero_indices = tf.where(tf.not_equal(target, 0))
            col_indices = tf.cast(tf.gather_nd(target, non_zero_indices), tf.int64)
            expanded_target = to_dense(
                SparseTensor(
                    indices=tf.concat([
                        tf.reshape(non_zero_indices[:, 0], [-1, 1]),
                        tf.reshape(col_indices, [-1, 1]),
                    ], axis=1),
                    values=tf.ones([tf.shape(non_zero_indices)[0]], dtype=tf.float32),
                    dense_shape=[self._batch_size, self.num_vocabulary]
                )
            )
            target_dist = expanded_target / tf.cast(tf.reshape(target_len, [-1, 1]), tf.float32)

            loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=outputs,
                    labels=tf.stop_gradient(target_dist)
                ) + reg_loss
            )

            # set differnt lr
            print("**** learning_rate ****")
            tvars = tf.trainable_variables()
            for var in tvars:
                if not var.op.name.startswith("bert"):
                    training_list.append(var)


        return Namespace(
            logit=outputs,
            feature=output_feature,
            loss=loss,
            training_list=training_list
        )
Exemplo n.º 39
0
 def output_fn(x):
   return layers.linear(x, num_decoder_symbols, scope=scope)
def train():
    samples = tf.placeholder(tf.float32, [None, None, INPUT_SIZE])  # (batch, time, in)
    ground_truth = tf.placeholder(tf.int64, [None])  # (batch, out)

    cell, initial_state = create_model(model=FLAGS.model,
                                       num_cells=[FLAGS.rnn_cells] * FLAGS.rnn_layers,
                                       batch_size=FLAGS.batch_size)

    rnn_outputs, rnn_states = tf.nn.dynamic_rnn(cell, samples, dtype=tf.float32, initial_state=initial_state)

    # Split the outputs of the RNN into the actual outputs and the state update gate
    rnn_outputs, updated_states = split_rnn_outputs(FLAGS.model, rnn_outputs)

    out = layers.linear(inputs=rnn_outputs[:, -1, :], num_outputs=OUTPUT_SIZE)

    # Compute cross-entropy loss
    cross_entropy_per_sample = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=out, labels=ground_truth)
    cross_entropy = tf.reduce_mean(cross_entropy_per_sample)

    # Compute accuracy
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(out, 1), ground_truth), tf.float32))

    # Compute loss for each updated state
    budget_loss = compute_budget_loss(FLAGS.model, cross_entropy, updated_states, FLAGS.cost_per_sample)

    # Combine all losses
    loss = cross_entropy + budget_loss

    # Optimizer
    opt, grads_and_vars = compute_gradients(loss, FLAGS.learning_rate, FLAGS.grad_clip)
    train_fn = opt.apply_gradients(grads_and_vars)

    sess = tf.Session()

    log_dir = os.path.join(FLAGS.logdir, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    valid_writer = tf.summary.FileWriter(log_dir + '/val')

    sess.run(tf.global_variables_initializer())

    try:
        num_iters = 0
        while True:
            # Generate new batch and perform SGD update
            x, y = generate_batch(FLAGS.batch_size,
                                  FLAGS.sampling_period,
                                  FLAGS.signal_duration,
                                  START_PERIOD, END_PERIOD,
                                  START_TARGET_PERIOD, END_TARGET_PERIOD)
            sess.run([train_fn], feed_dict={samples: x, ground_truth: y})
            num_iters += 1

            # Evaluate on validation data generated on the fly
            if num_iters % FLAGS.evaluate_every == 0:
                valid_accuracy, valid_steps = 0., 0.
                for _ in range(FLAGS.validation_batches):
                    valid_x, valid_y = generate_batch(FLAGS.batch_size,
                                                      FLAGS.sampling_period,
                                                      FLAGS.signal_duration,
                                                      START_PERIOD, END_PERIOD,
                                                      START_TARGET_PERIOD, END_TARGET_PERIOD)
                    valid_iter_accuracy, valid_used_inputs = sess.run(
                        [accuracy, updated_states],
                        feed_dict={
                            samples: valid_x,
                            ground_truth: valid_y})
                    valid_accuracy += valid_iter_accuracy
                    if valid_used_inputs is not None:
                        valid_steps += compute_used_samples(valid_used_inputs)
                    else:
                        valid_steps += SEQUENCE_LENGTH
                valid_accuracy /= FLAGS.validation_batches
                valid_steps /= FLAGS.validation_batches

                valid_writer.add_summary(scalar_summary('accuracy', valid_accuracy), num_iters)
                valid_writer.add_summary(scalar_summary('used_samples', valid_steps / SEQUENCE_LENGTH), num_iters)
                valid_writer.flush()

                print("Iteration %d, "
                      "validation accuracy: %.2f%%, "
                      "validation samples: %.2f (%.2f%%)" % (num_iters,
                                                             100. * valid_accuracy,
                                                             valid_steps,
                                                             100. * valid_steps / SEQUENCE_LENGTH))
    except KeyboardInterrupt:
        pass
Exemplo n.º 41
0
def build_rnn(movies_cnt,
              cell_type='gru',
              user_aware=True,
              user_cnt=None,
              rating_aware=True,
              rnn_unit=300,
              user_embedding=300,
              movie_emb_dim=300,
              feed_previous=True,
              loss_weights=None,
              rating_with_user=False,
              batch_size=32):

    loss_weights = loss_weights or [10, 2]
    movie_idx_ph = tf.placeholder(tf.int32, [None, None])
    _, maxlen = tf.unstack(tf.shape(movie_idx_ph))

    if_training = tf.placeholder_with_default(True, [])
    cell = cells[cell_type](num_units=rnn_unit)

    movie_embeddings = build_embedding(movies_cnt, movie_emb_dim,
                                       'movie_embedding')

    if user_aware and user_cnt is None:
        raise ValueError

    if user_aware:

        user_idx_ph = tf.placeholder(tf.int32, [None])
        if cell_type == 'lstm':

            c_user_embedding = build_embedding(user_cnt,
                                               user_embedding,
                                               name='user_c_embedding')
            h_user_embedding = build_embedding(user_cnt,
                                               user_embedding,
                                               name='user_h_embedding')
            state = LSTMStateTuple(
                c=tf.nn.embedding_lookup(c_user_embedding, user_idx_ph),
                h=tf.nn.embedding_lookup(h_user_embedding, user_idx_ph))

        elif cell_type == 'gru':

            user_embedding = build_embedding(user_cnt,
                                             user_embedding,
                                             name='user_embedding')
            state = tf.nn.embedding_lookup(user_embedding, user_idx_ph)

    else:

        state = cell.zero_state(batch_size=batch_size, dtype=tf.float32)

    def _choose_best(vec, reuse=False):
        with tf.variable_scope(name_or_scope='chooser', reuse=reuse) as scope:
            w = tf.get_variable(name='weights',
                                shape=[movie_emb_dim, movies_cnt])
            b = tf.get_variable(name='bias', shape=[movies_cnt])
            return tf.matmul(vec, w) + b

    # not using dynamic_rnn since I want to feed previous output

    def walker(idx, input, outputs, state, fprev):

        output, state = cell(input, state)

        new_idx = tf.cond(
            fprev[idx],
            lambda: tf.cast(tf.argmax(_choose_best(output), 1), tf.int32),
            lambda: movie_idx_ph[:, idx + 1])

        input = tf.nn.embedding_lookup(movie_embeddings, new_idx)

        return idx + 1, input, tf.concat(
            (outputs, tf.expand_dims(output, axis=1)), axis=1), state, fprev

    def cond(idx, input, outputs, state, fprev):
        return idx < maxlen - 1

    idx = tf.Variable(0)
    input = tf.nn.embedding_lookup(movie_embeddings, movie_idx_ph[:, 0])

    feed_prev = tf.placeholder(tf.bool, [None], name='feed_prev_ph')

    loop_vars = [
        idx, input,
        tf.zeros((batch_size, 0, movie_emb_dim), dtype=tf.float32), state,
        feed_prev
    ]

    shape_invs = [
        idx.get_shape(),
        input.get_shape(),
        tf.TensorShape((batch_size, None, movie_emb_dim)),
        state.get_shape(),
        feed_prev.get_shape()
    ]

    print(len(loop_vars), len(shape_invs))
    idx, last_output, outputs, state, fp = tf.while_loop(
        cond, walker, loop_vars=loop_vars, shape_invariants=shape_invs)

    logits = tf.reshape(outputs, (-1, rnn_unit))
    logits = _choose_best(logits, reuse=True)
    logits = tf.reshape(logits, (batch_size, -1, movies_cnt))

    clf_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=movie_idx_ph[:, 1:])

    def training_mask():
        clf_mask = tf.greater(movie_idx_ph[:, 1:], tf.cast(0, dtype=tf.int32))
        clf_mask = tf.cast(clf_mask, tf.float32)
        return clf_mask

    def val_mask():
        clf_mask = tf.greater(movie_idx_ph[:, 1:], tf.cast(0, dtype=tf.int32))
        clf_mask = tf.cast(clf_mask, tf.float32)
        clf_mask = tf.multiply(clf_mask, tf.cast(feed_prev[1:], tf.float32))
        return clf_mask

    clf_mask = tf.cond(if_training, training_mask, val_mask)
    clf_loss *= clf_mask
    clf_loss = tf.reduce_sum(clf_loss) / tf.reduce_sum(clf_mask)
    total_loss = loss_weights[0] * clf_loss

    if rating_aware:
        true_ratings = tf.placeholder('float', [None, None])

        ratings = linear(outputs, 1)
        ratings = tf.squeeze(ratings, axis=2)

        rat_loss = tf.square(ratings - true_ratings)
        mask = tf.greater(true_ratings, tf.cast(0, dtype=tf.float32))
        mask = tf.cast(mask, tf.float32)
        rat_loss *= mask
        rat_loss = tf.reduce_sum(rat_loss) / tf.reduce_sum(mask)
        total_loss += loss_weights[1] * rat_loss

    bag = {
        'base': [movie_idx_ph, total_loss, clf_loss],
        'feed_prev': feed_prev,
        'if_training': if_training,
        'movie_embeddings': movie_embeddings
    }

    if user_aware:
        bag['user'] = user_idx_ph

    if rating_aware:
        bag['ratings'] = [true_ratings, rat_loss]

    return bag
Exemplo n.º 42
0
pkeep = tf.placeholder(tf.float32, name='pkeep')
batchsize = tf.placeholder(tf.int32, name='batchsize')

# Inputs
X = tf.placeholder(tf.uint32, [None, None], name='X') # [ BATCHSIZE, SEQUENCE_LENGTH ]
Xo = tf.one_hot(X, ALPHABET_LENGTH, 1.0, 0.0) # [ BATCHSIZE, SEQUENCE_LENGTH , ALPHABET_LENGTH ]

# Expected outputs
Y_ = tf.placeholder(tf.uint8, [None, None], name='Y_') # [ BATCHSIZE, SEQUENCE_LENGTH ]
Yo_ = tf.one_hot(Y_, ALPHABET_LENGTH, 1.0, 0.0) # [ BATCHSIZE, SEQUENCE_LENGTH , ALPHABET_LENGTH ]

# Initial internal cell state
Hin = tf.placeholder(tf.float32, [None, INTERNAL_SIZE * LAYERS], name='Hin') # [ BATCHSIZE , INTERNAL_SIZE * LAYERS ]

# Deep stacked GRU cell
deep_drop_cell = tfu.rnn.MultiDropoutGRUCell(size=INTERNAL_SIZE, pkeep=DROPOUT_KEEP_RATE, layers=LAYERS)

# Output predictions and output state
Yr, H = tf.nn.dynamic_rnn(deep_drop_cell, Xo, dtype=tf.float32, initial_state=Hin)

H = tf.identity(H, name='H')

Yflat = tf.reshape(Yr, [-1, INTERNAL_SIZE])
Ylogits = layers.linear(Yflat, ALPHABET_LENGTH)
Yflat_ = tf.reshape(Yo_, [-1, ALPHABET_LENGTH])
loss = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Yflat_)
loss = tf.reshape(loss, [batchsize, -1])
Yo = tf.nn.softmax(Ylogits, name='Yo')
Y = tf.argmax(Yo, 1)
Y = tf.reshape(Y, [batchsize, -1], name='Y')
train_step = tf.train.AdamOptimizer(lr).minimize(loss)
def inference_net(x, latent_size):
  return layers.linear(x, latent_size)
Exemplo n.º 44
0
batchsize = tf.placeholder(tf.int32, name='batchsize')
lr = tf.placeholder(tf.float32, name='lr')
pkeep = tf.placeholder(tf.float32, name='pkeep')
X = tf.placeholder(tf.uint8, [None, None], name='X') # Input vector
Xo = tf.one_hot(X, ALPHA_SIZE, 1.0, 0.0) # One Hots create vector size ALPHA_SIZE, all set 0 except character
Y_ = tf.placeholder(tf.uint8, [None, None], name='Y_') # Output tensor
Yo_ = tf.one_hot(Y_, ALPHA_SIZE, 1.0, 0.0) # OneHot our output  also
Hin = tf.placeholder(tf.float32, [None, NUM_OF_GRUS*NUM_LAYERS], name='Hin') # Recurrent input states
cells = [rnn.GRUCell(NUM_OF_GRUS) for _ in range(NUM_LAYERS)] # Create all our GRU cells per layer
dropcells = [rnn.DropoutWrapper(cell,input_keep_prob=pkeep) for cell in cells] # DropOut inside RNN
multicell = rnn.MultiRNNCell(dropcells, state_is_tuple=False)
multicell = rnn.DropoutWrapper(multicell, output_keep_prob=pkeep)  # DropOut for SoftMax layer
Yr, H = tf.nn.dynamic_rnn(multicell, Xo, dtype=tf.float32, initial_state=Hin) # Unrolling through time happens here
H = tf.identity(H, name='H')  # Last state of sequence
Yflat = tf.reshape(Yr, [-1, NUM_OF_GRUS])
Ylogits = layers.linear(Yflat, ALPHA_SIZE)
Yflat_ = tf.reshape(Yo_, [-1, ALPHA_SIZE])
loss = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Yflat_)
loss = tf.reshape(loss, [batchsize, -1])
Yo = tf.nn.softmax(Ylogits, name='Yo')
Y = tf.argmax(Yo, 1)
Y = tf.reshape(Y, [batchsize, -1], name="Y")
train_step = tf.train.AdamOptimizer(lr).minimize(loss)

# Calculate Statistics for Analysis
seqloss = tf.reduce_mean(loss, 1)
batchloss = tf.reduce_mean(seqloss)
accuracy = tf.reduce_mean(tf.cast(tf.equal(Y_, tf.cast(Y, tf.uint8)), tf.float32))
loss_summary = tf.summary.scalar("batch_loss", batchloss)
acc_summary = tf.summary.scalar("batch_accuracy", accuracy)
summaries = tf.summary.merge([loss_summary, acc_summary])
def main(_):

    # load data, either shakespeare, or the Python source of Tensorflow itself
    shakedir = FLAGS.text_dir
    # shakedir = "../tensorflow/**/*.py"
    codetext, valitext, bookranges = txt.read_data_files(shakedir,
                                                         validation=True)

    # display some stats on the data
    epoch_size = len(codetext) // (FLAGS.train_batch_size * FLAGS.seqlen)
    txt.print_data_stats(len(codetext), len(valitext), epoch_size)

    #
    # the model (see FAQ in README.md)
    #
    lr = tf.placeholder(tf.float32, name='lr')  # learning rate
    pkeep = tf.placeholder(tf.float32, name='pkeep')  # dropout parameter
    batchsize = tf.placeholder(tf.int32, name='batchsize')

    # inputs
    X = tf.placeholder(tf.uint8, [None, None],
                       name='X')  # [ BATCHSIZE, FLAGS.seqlen ]
    Xo = tf.one_hot(X, ALPHASIZE, 1.0,
                    0.0)  # [ BATCHSIZE, FLAGS.seqlen, ALPHASIZE ]
    # expected outputs = same sequence shifted by 1 since we are trying to predict the next character
    Y_ = tf.placeholder(tf.uint8, [None, None],
                        name='Y_')  # [ BATCHSIZE, FLAGS.seqlen ]
    Yo_ = tf.one_hot(Y_, ALPHASIZE, 1.0,
                     0.0)  # [ BATCHSIZE, FLAGS.seqlen, ALPHASIZE ]
    # input state
    Hin = tf.placeholder(tf.float32, [None, INTERNALSIZE * NLAYERS],
                         name='Hin')  # [ BATCHSIZE, INTERNALSIZE * NLAYERS]

    # using a NLAYERS=3 layers of GRU cells, unrolled FLAGS.seqlen=30 times
    # dynamic_rnn infers FLAGS.seqlen from the size of the inputs Xo

    onecell = rnn.GRUCell(INTERNALSIZE)
    dropcell = rnn.DropoutWrapper(onecell, input_keep_prob=pkeep)
    multicell = rnn.MultiRNNCell([dropcell] * NLAYERS, state_is_tuple=False)
    multicell = rnn.DropoutWrapper(multicell, output_keep_prob=pkeep)
    Yr, H = tf.nn.dynamic_rnn(multicell,
                              Xo,
                              dtype=tf.float32,
                              initial_state=Hin)
    # Yr: [ BATCHSIZE, FLAGS.seqlen, INTERNALSIZE ]
    # H:  [ BATCHSIZE, INTERNALSIZE*NLAYERS ] # this is the last state in the sequence

    H = tf.identity(H, name='H')  # just to give it a name

    # Softmax layer implementation:
    # Flatten the first two dimension of the output [ BATCHSIZE, FLAGS.seqlen, ALPHASIZE ] => [ BATCHSIZE x FLAGS.seqlen, ALPHASIZE ]
    # then apply softmax readout layer. This way, the weights and biases are shared across unrolled time steps.
    # From the readout point of view, a value coming from a cell or a minibatch is the same thing

    Yflat = tf.reshape(
        Yr, [-1, INTERNALSIZE])  # [ BATCHSIZE x FLAGS.seqlen, INTERNALSIZE ]
    Ylogits = layers.linear(
        Yflat, ALPHASIZE)  # [ BATCHSIZE x FLAGS.seqlen, ALPHASIZE ]
    Yflat_ = tf.reshape(
        Yo_, [-1, ALPHASIZE])  # [ BATCHSIZE x FLAGS.seqlen, ALPHASIZE ]
    loss = tf.nn.softmax_cross_entropy_with_logits(
        logits=Ylogits, labels=Yflat_)  # [ BATCHSIZE x FLAGS.seqlen ]
    loss = tf.reshape(loss, [batchsize, -1])  # [ BATCHSIZE, FLAGS.seqlen ]
    Yo = tf.nn.softmax(Ylogits,
                       name='Yo')  # [ BATCHSIZE x FLAGS.seqlen, ALPHASIZE ]
    Y = tf.argmax(Yo, 1)  # [ BATCHSIZE x FLAGS.seqlen ]
    Y = tf.reshape(Y, [batchsize, -1], name="Y")  # [ BATCHSIZE, FLAGS.seqlen ]
    train_step = tf.train.AdamOptimizer(lr).minimize(loss)

    # stats for display
    seqloss = tf.reduce_mean(loss, 1)
    batchloss = tf.reduce_mean(seqloss)
    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(Y_, tf.cast(Y, tf.uint8)), tf.float32))
    loss_summary = tf.summary.scalar("batch_loss", batchloss)
    acc_summary = tf.summary.scalar("batch_accuracy", accuracy)
    summaries = tf.summary.merge([loss_summary, acc_summary])

    # Init Tensorboard stuff. This will save Tensorboard information into a different
    # folder at each run named 'log/<timestamp>/'. Two sets of data are saved so that
    # you can compare training and validation curves visually in Tensorboard.
    timestamp = str(math.trunc(time.time()))
    summary_writer = tf.summary.FileWriter(
        os.path.join(FLAGS.summaries_dir, timestamp + "-training"))
    validation_writer = tf.summary.FileWriter(
        os.path.join(FLAGS.summaries_dir, timestamp + "-validation"))

    # Init for saving models. They will be saved into a directory named 'checkpoints'.
    # Only the last checkpoint is kept.
    if not os.path.exists(FLAGS.checkpoint_dir):
        os.mkdir(FLAGS.checkpoint_dir)
    saver = tf.train.Saver(max_to_keep=1)

    # for display: init the progress bar
    DISPLAY_FREQ = 50
    _50_BATCHES = DISPLAY_FREQ * FLAGS.train_batch_size * FLAGS.seqlen
    progress = txt.Progress(DISPLAY_FREQ,
                            size=111 + 2,
                            msg="Training on next " + str(DISPLAY_FREQ) +
                            " batches")

    # init
    istate = np.zeros([FLAGS.train_batch_size,
                       INTERNALSIZE * NLAYERS])  # initial zero input state
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    step = 0

    # training loop
    for x, y_, epoch in txt.rnn_minibatch_sequencer(codetext,
                                                    FLAGS.train_batch_size,
                                                    FLAGS.seqlen,
                                                    nb_epochs=1000):

        # train on one minibatch
        feed_dict = {
            X: x,
            Y_: y_,
            Hin: istate,
            lr: FLAGS.learning_rate,
            pkeep: FLAGS.dropout_pkeep,
            batchsize: FLAGS.train_batch_size
        }
        _, y, ostate, smm = sess.run([train_step, Y, H, summaries],
                                     feed_dict=feed_dict)

        # save training data for Tensorboard
        summary_writer.add_summary(smm, step)

        # display a visual validation of progress (every 50 batches)
        if step % _50_BATCHES == 0:
            feed_dict = {
                X: x,
                Y_: y_,
                Hin: istate,
                pkeep: 1.0,
                batchsize: FLAGS.train_batch_size
            }  # no dropout for validation
            y, l, bl, acc = sess.run([Y, seqloss, batchloss, accuracy],
                                     feed_dict=feed_dict)
            txt.print_learning_learned_comparison(x, y, l, bookranges, bl, acc,
                                                  epoch_size, step, epoch)

        # run a validation step every 50 batches
        # The validation text should be a single sequence but that's too slow (1s per 1024 chars!),
        # so we cut it up and batch the pieces (slightly inaccurate)
        # tested: validating with 5K sequences instead of 1K is only slightly more accurate, but a lot slower.
        if step % _50_BATCHES == 0 and len(valitext) > 0:
            VALI_SEQLEN = 1 * 1024  # Sequence length for validation. State will be wrong at the start of each sequence.
            bsize = len(valitext) // VALI_SEQLEN
            txt.print_validation_header(len(codetext), bookranges)
            vali_x, vali_y, _ = next(
                txt.rnn_minibatch_sequencer(valitext, bsize, VALI_SEQLEN,
                                            1))  # all data in 1 batch
            vali_nullstate = np.zeros([bsize, INTERNALSIZE * NLAYERS])
            feed_dict = {
                X: vali_x,
                Y_: vali_y,
                Hin: vali_nullstate,
                pkeep: 1.0,  # no dropout for validation
                batchsize: bsize
            }
            ls, acc, smm = sess.run([batchloss, accuracy, summaries],
                                    feed_dict=feed_dict)
            txt.print_validation_stats(ls, acc)
            # save validation data for Tensorboard
            validation_writer.add_summary(smm, step)

        # display a short text generated with the current weights and biases (every 150 batches)
        if step // 3 % _50_BATCHES == 0:
            txt.print_text_generation_header()
            ry = np.array([[txt.convert_from_alphabet(ord("K"))]])
            rh = np.zeros([1, INTERNALSIZE * NLAYERS])
            for k in range(1000):
                ryo, rh = sess.run([Yo, H],
                                   feed_dict={
                                       X: ry,
                                       pkeep: 1.0,
                                       Hin: rh,
                                       batchsize: 1
                                   })
                rc = txt.sample_from_probabilities(
                    ryo, topn=10 if epoch <= 1 else 2)
                print(chr(txt.convert_to_alphabet(rc)), end="")
                ry = np.array([[rc]])
            txt.print_text_generation_footer()

        # save a checkpoint (every 500 batches)
        if step // 10 % _50_BATCHES == 0:
            saver.save(sess,
                       FLAGS.checkpoint_dir + '/rnn_train_' + timestamp,
                       global_step=step)

        # display progress bar
        progress.step(reset=step % _50_BATCHES == 0)

        # loop state around
        istate = ostate
        step += FLAGS.train_batch_size * FLAGS.seqlen
Exemplo n.º 46
0
    def _inference(self):
        logging.info('...create inference')

        #fw_state_list = tf.unstack(self.fw_state, axis=0)
        #fw_state_tuple = tuple(
        #    [tf.contrib.rnn.LSTMStateTuple(fw_state_list[idx][0], fw_state_list[idx][1])
        #     for idx in range(self.num_layers)])

        #bw_state_list = tf.unstack(self.bw_state, axis=0)
        #bw_state_tuple = tuple(
        #    [tf.contrib.rnn.LSTMStateTuple(bw_state_list[idx][0], bw_state_list[idx][1])
        #     for idx in range(self.num_layers)])

        fw_cells = list()
        for i in range(0, self.num_layers):
            if (self.cell_type == 'lstm'):
                cell = rnn.LSTMCell(num_units=self.cell_sizes[i],
                                    state_is_tuple=True)
            elif (self.cell_type == 'gru'):
                # change to GRU
                cell = rnn.LSTMCell(num_units=self.cell_sizes[i],
                                    state_is_tuple=True)
            else:
                cell = rnn.BasicRNNCell(num_units=self.cell_sizes[i])
            cell = rnn.DropoutWrapper(cell, output_keep_prob=self.keep_prob)
            fw_cells.append(cell)
        self.fw_cells = rnn.MultiRNNCell(fw_cells, state_is_tuple=True)

        if (self.direction == 2):  # bidirectional
            print('bidirectional')
            bw_cells = list()
            for i in range(0, self.num_layers):
                if (self.cell_type == 'lstm'):
                    cell = rnn.LSTMCell(num_units=self.cell_sizes[i],
                                        state_is_tuple=True)
                elif (self.cell_type == 'gru'):
                    # change to GRU
                    cell = rnn.LSTMCell(num_units=self.cell_sizes[i],
                                        state_is_tuple=True)
                else:
                    cell = rnn.BasicRNNCell(num_units=self.cell_sizes[i])
                cell = rnn.DropoutWrapper(cell,
                                          output_keep_prob=self.keep_prob)
                bw_cells.append(cell)
            self.bw_cells = rnn.MultiRNNCell(bw_cells, state_is_tuple=True)

        if (self.direction == 1):
            rnn_outputs, states = tf.nn.dynamic_rnn(
                self.fw_cells,
                self.inputs,
                #initial_state=fw_state_tuple,
                sequence_length=self.seq_lengths,
                dtype=tf.float32,
                time_major=True)
        else:  # self.direction = 2
            # bidirectional rnn
            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=self.fw_cells,
                cell_bw=self.bw_cells,
                #initial_state_fw=fw_state_tuple,
                #initial_state_bw=bw_state_tuple,
                dtype=tf.float32,
                sequence_length=self.seq_lengths,
                inputs=self.inputs,
                time_major=True)
            rnn_outputs = tf.concat(outputs, axis=2)

        # project output from rnn output size to OUTPUT_SIZE. Sometimes it is worth adding
        # an extra layer here.
        self.projection = lambda x: layers.linear(
            x, num_outputs=self.label_classes, activation_fn=tf.nn.sigmoid)

        self.logits = tf.map_fn(self.projection, rnn_outputs, name="logits")
        self.probs = tf.nn.softmax(self.logits, name="probs")
        self.states = states

        tf.add_to_collection('probs', self.probs)