def get_last_features(self, x, reuse):
        x_has_timesteps = (x.get_shape().ndims == 5)
        if x_has_timesteps:
            sh = tf.shape(x)
            x = flatten_two_dims(x)

        #with tf.variable_scope(self.scope + "_features", reuse=reuse):
        with tf.variable_scope(self.scope+"_features", reuse=reuse):
            x = (tf.to_float(x) - self.ob_mean) / self.ob_std
            x = small_convnet(x, nl=self.nl, feat_dim=self.feat_dim, last_nl=None, layernormalize=self.layernormalize)

            if x_has_timesteps:
                x = unflatten_first_dim(x, sh)
            x = tf.reshape(x, [-1, sh[1], self.feat_dim])
        with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE):
            init_1 = tf.contrib.rnn.LSTMStateTuple(self.last_c_in_1, self.last_h_in_1)
            if self.lstm2_size:
                init_2 = tf.contrib.rnn.LSTMStateTuple(self.last_c_in_2, self.last_h_in_2)
            if self.aux_input:
                prev_rews = tf.expand_dims(self.ph_last_rew, -1)
                x = tf.concat([x, prev_rews], -1)
            x, c_out_1, h_out_1 = lstm(self.lstm1_size)(x, initial_state=init_1)
            if self.lstm2_size:
                if self.aux_input:
                    prev_acs = tf.one_hot(self.ph_last_ac, depth=self.num_actions)
                    x = tf.concat([x, tf.cast(prev_acs, tf.float32)], -1)
                    x = tf.concat([x, self.ph_last_vel], -1)

                x, c_out_2, h_out_2  = lstm(self.lstm2_size)(x, initial_state=init_2)
        return x
Пример #2
0
    def __init__(self, sess, state_dim, n_actions, n_steps, n_lstm=256, reuse=False):
        self.obs_in = tf.placeholder(dtype=tf.float32, shape=[None, state_dim], name='obs_in') # observations
        self.D = tf.placeholder(dtype=tf.float32, shape=[None], name='dones')  # dones
        self.LS = tf.placeholder(dtype=tf.float32, shape=[None, n_lstm*2], name='lstm_s')  # cell and hidden states

        with tf.variable_scope("model", reuse=reuse):
            h1 = tf.layers.dense(self.obs_in, units=20, activation=tf.nn.relu)
            h2 = tf.layers.dense(h1, units=20, activation=tf.nn.relu)

            # LSTM cell
            h3, s_new = lstm(h2, self.D, self.LS, scope='lstm', n_lstm=n_lstm)

            self.ap_out = tf.layers.dense(h3, units=n_actions, activation=tf.nn.softmax)
            self.vf_out = tf.layers.dense(h3, units=1, activation=None)

        # The output of the NN are non-normalized action probabilities. They are converted to a probabiltiy
        # distribution from which normalized probabilities can be sampled.
        # self.aps = tf.squeeze(tf.nn.softmax(self.ap_out))
        # a0 = np.random.choice(np.arange(n_actions), p=self.ap_out)
        v0 = self.vf_out[:, 0]
        # picked_action_prob = tf.gather(self.ap_out, a0)  # a0 are the labels for the cross entropy computation

        self.initial_states = [np.zeros(shape=n_lstm*2, dtype=np.float32)]

        def step(obs, dones, lstm_states):
            return sess.run([self.ap_out, v0, s_new], {self.obs_in: obs, self.D: dones, self.LS: lstm_states})
            # return sess.run([a0, self.ap_out, v0, s_new, picked_action_prob], {self.obs_in: obs, self.D: dones, self.LS: lstm_states})

        def value(obs, dones, lstm_states):
            return sess.run(v0, {self.obs_in: obs, self.D: dones, self.LS: lstm_states})
            # return sess.run([self.vf_out], {self.obs_in: obs, self.D: dones, self.LS: lstm_states})

        self.step = step
        self.value = value
Пример #3
0
    def __init__(self, sess, state_dim, n_actions, n_steps, n_lstm=256, reuse=False):
        self.obs_in = tf.placeholder(dtype=tf.float32, shape=[None, state_dim], name='obs_in') # observations
        self.D = tf.placeholder(dtype=tf.float32, shape=[None], name='dones')  # dones
        self.LS = tf.placeholder(dtype=tf.float32, shape=[None, n_lstm*2], name='lstm_s')  # cell and hidden states

        with tf.variable_scope("model", reuse=reuse):
            h1 = tf.layers.dense(self.obs_in, units=20, activation=tf.nn.relu)
            h2 = tf.layers.dense(h1, units=20, activation=tf.nn.relu)

            # LSTM cell
            h3, s_new = lstm(h2, self.D, self.LS, scope='lstm', n_lstm=n_lstm)

            self.ap_out = tf.layers.dense(h3, units=n_actions, activation=None)
            self.vf_out = tf.layers.dense(h3, units=1, activation=None)

        # The output of the NN are non-normalized action probabilities. They are converted to a probabiltiy
        # distribution from which normalized probabilities can be sampled.
        self.pd = CategoricalPd(self.ap_out)  # Init the distribution with output values of NN
        a0 = self.pd.sample()  # sample probabilities for each action from probability distribution which adds small unifrom noise to the prob distribution derived from NN output (a0=[n_actions])
        v0 = self.vf_out[:, 0]

        neglogprob0 = self.pd.neglogprob(a0)  # a0 are the labels for the cross entropy computation
        self.initial_states = [np.zeros(shape=n_lstm*2, dtype=np.float32)]

        def step(obs, dones, lstm_states):
            return sess.run([a0, self.ap_out, v0, s_new, neglogprob0], {self.obs_in: obs, self.D: dones, self.LS: lstm_states})

        def value(obs, dones, lstm_states):
            return sess.run(v0, {self.obs_in: obs, self.D: dones, self.LS: lstm_states})
            # return sess.run([self.vf_out], {self.obs_in: obs, self.D: dones, self.LS: lstm_states})

        self.step = step
        self.value = value
        self.a0 = a0
Пример #4
0
    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, nlstm=256, reuse=False):
        nenv = nbatch // nsteps
        self.pdtype = make_pdtype(ac_space)
        X, processed_x = observation_input(ob_space, nbatch)

        M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
        S = tf.placeholder(tf.float32, [nenv, nlstm*2]) #states
        with tf.variable_scope("model", reuse=reuse):
            h = nature_cnn(X)
            xs = batch_to_seq(h, nenv, nsteps)
            ms = batch_to_seq(M, nenv, nsteps)
            h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm)
            h5 = seq_to_batch(h5)
            vf = fc(h5, 'v', 1)
            self.pd, self.pi = self.pdtype.pdfromlatent(h5)

        v0 = vf[:, 0]
        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32)

        def step(ob, state, mask):
            return sess.run([a0, v0, snew, neglogp0], {X:ob, S:state, M:mask})

        def value(ob, state, mask):
            return sess.run(v0, {X:ob, S:state, M:mask})

        self.X = X
        self.M = M
        self.S = S
        self.vf = vf
        self.step = step
        self.value = value
Пример #5
0
        def body(i, sent, prev_c, prev_h, hidden_states):
            # only 1 time step
            # get embedding represetnation and infer through LSTM functino
            nextc, next_h = lstm(...)

            # trick for attention
            score = None
            if is_attention:
                # make use of enc_output
                next_h = next_h

            hidden_states = hidden_states.write(i, next_h)
            return i + 1, sent, next_c, next_h, hidden_states, score
Пример #6
0
    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, nlstm=256, reuse=False):
        nenv = nbatch // nsteps

        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc)
        nact = ac_space.n
        X = tf.placeholder(tf.uint8, ob_shape)  # obs
        M = tf.placeholder(tf.float32, [nbatch])  # mask (done t-1)
        S = tf.placeholder(tf.float32, [nenv, nlstm * 2])  # states
        with tf.variable_scope("model", reuse=reuse):
            h = conv(tf.cast(X, tf.float32) / 255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2))
            h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2))
            h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2))
            h3 = conv_to_fc(h3)
            h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2))
            xs = batch_to_seq(h4, nenv, nsteps)
            ms = batch_to_seq(M, nenv, nsteps)
            h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm)
            h5 = seq_to_batch(h5)
            pi = fc(h5, 'pi', nact, act=lambda x: x)
            vf = fc(h5, 'v', 1, act=lambda x: x)

        self.pdtype = make_pdtype(ac_space)
        self.pd = self.pdtype.pdfromflat(pi)

        v0 = vf[:, 0]
        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = np.zeros((nenv, nlstm * 2), dtype=np.float32)

        def step(ob, state, mask):
            return sess.run([a0, v0, snew, neglogp0], {X: ob, S: state, M: mask})

        def value(ob, state, mask):
            return sess.run(v0, {X: ob, S: state, M: mask})

        self.X = X
        self.M = M
        self.S = S
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
Пример #7
0
    'input_dim':26,
    'hidden_size': 100,
    'learning_rate':0.05,
    'scale': 1
}

print "Begin to load data"
reader = reader(conf)
reader.get_data()
# reader.padding()
features = reader.features
targets = reader.targets
print "Load data complete"

print "Begin to build Networks"
network = lstm(conf)
network.build_net()
print "Build Networks complete"
# print  network.fit(features, targets)
print network.test(features, targets, reader.testset,reader.testtar)
# network.draw("model.png")



# #####TEST#####

# a = np.ones((20,27))
# b = np.arange(0,20).reshape(20,)
# c = []
# for i in xrange(20):c.append(a[i]*b[i])
# c = np.array(c)
Пример #8
0
log("The index of 'the' is:",
    word_to_index["the"],
    logfile=logpath,
    is_verbose=is_verbose)
log("The word of index 20 is:",
    index_to_word[20],
    logfile=logpath,
    is_verbose=is_verbose)

# lstm = LSTM(batch_size, embedding_size, vocab_size, hidden_size, max_size)

x = tf.placeholder(tf.int32, (batch_size, max_size - 1), name="x")
label = tf.placeholder(tf.int32, (batch_size, max_size - 1), name="label")
teacher_forcing = tf.placeholder(tf.bool, (), name="teacher_forcing")

output, softmax_output = lstm(x, label, vocab_size, hidden_size, max_size,
                              batch_size, embedding_size, teacher_forcing)

with tf.Session() as sess:
    onehot = tf.argmax(softmax_output, 1)

with tf.variable_scope("optimizer", reuse=tf.AUTO_REUSE):
    optimizer, loss = optimize(output, label, learning_rate)
    # perplexity = tf.pow(2, loss)
    tf.summary.scalar('loss', loss)
"""Now let's execute the graph in the session.

We ge a data batch with `dataloader.get_batch(batch_size)`. This fetches a batch of word sequences.

We then need to transform that into a batch of word index. We can achieve this with the helper function
`word_to_index_transform(word_to_index, word_batch)` defined before.
        # load data
        filename = 'pkl_data/' + str(sample_length) + '.pkl'
        x_train, y_train, x_val, y_val, x_test, y_test, val_SNRs, test_SNRs = utils.radioml_IQ_data(
            filename, mod_name, swap_dim=swap_dim)

        # callbacks
        early_stopping = EarlyStopping(monitor='val_loss', patience=patience)
        best_model_path = 'result/models/LSTM/' + str(
            sample_length) + '/' + str(mod_name) + 'best.h5'
        checkpointer = ModelCheckpoint(best_model_path,
                                       verbose=1,
                                       save_best_only=True)
        TB_dir = 'result/TB/' + str(mod_name) + '_' + str(sample_length)
        tensorboard = TensorBoard(TB_dir)

        model = utils.lstm(lr, input_dim)

        history = model.fit(
            x_train,
            y_train,
            epochs=max_epoch,
            batch_size=batch_size,
            verbose=1,
            shuffle=True,
            validation_data=(x_val, y_val),
            callbacks=[early_stopping, checkpointer, tensorboard])
        print('Fisrt stage finished, loss is stable')

        pf_min = 6.0
        pf_max = 7.9
        pf_test = LambdaCallback(on_epoch_end=lambda epoch, logs: utils.get_pf(
Пример #10
0
    def _init_actor_net(self, scope, trainable=True, is_inference=False):
        my_initializer = tf.contrib.layers.xavier_initializer()
        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):

            last_output_dims = 0
            last_output = None
            last_hidden_state = None
            if USE_CNN:
                cnn_w_1 = tf.get_variable("cnn_w_1", [8, 8, C, 32],
                                          initializer=my_initializer)
                cnn_b_1 = tf.get_variable(
                    "cnn_b_1", [32], initializer=tf.constant_initializer(0.0))

                output1 = tf.nn.relu(
                    tf.nn.bias_add(
                        tf.nn.conv2d(self.s,
                                     cnn_w_1,
                                     strides=[1, 4, 4, 1],
                                     padding='SAME'), cnn_b_1))
                cnn_w_2 = tf.get_variable("cnn_w_2", [4, 4, 32, 64],
                                          initializer=my_initializer)
                cnn_b_2 = tf.get_variable(
                    "cnn_b_2", [64], initializer=tf.constant_initializer(0.0))

                output2 = tf.nn.relu(
                    tf.nn.bias_add(
                        tf.nn.conv2d(output1,
                                     cnn_w_2,
                                     strides=[1, 2, 2, 1],
                                     padding='SAME'), cnn_b_2))
                cnn_w_3 = tf.get_variable("cnn_w_3", [3, 3, 64, 64],
                                          initializer=my_initializer)
                cnn_b_3 = tf.get_variable(
                    "cnn_b_3", [64], initializer=tf.constant_initializer(0.0))

                output3 = tf.nn.relu(
                    tf.nn.bias_add(
                        tf.nn.conv2d(output2,
                                     cnn_w_3,
                                     strides=[1, 1, 1, 1],
                                     padding='SAME'), cnn_b_3))
                last_output_dims = np.prod(
                    [v.value for v in output3.get_shape()[1:]])
                last_output = tf.reshape(output3, [-1, last_output_dims])
            else:
                flat_output_size = INPUT_DIMENS_FLAT
                flat_output = tf.reshape(self.s, [-1, flat_output_size],
                                         name='flat_output')

                fc_W_1 = tf.get_variable(
                    shape=[flat_output_size, self.layer_size],
                    name='fc_W_1',
                    trainable=trainable,
                    initializer=my_initializer)
                fc_b_1 = tf.get_variable(shape=[1],
                                         name='fc_b_1',
                                         trainable=trainable)

                output1 = tf.nn.relu(tf.matmul(flat_output, fc_W_1) + fc_b_1)

                fc_W_2 = tf.get_variable(
                    shape=[self.layer_size, self.layer_size],
                    name='fc_W_2',
                    trainable=trainable,
                    initializer=my_initializer)
                fc_b_2 = tf.get_variable(shape=[1],
                                         name='fc_b_2',
                                         trainable=trainable)

                output2 = tf.nn.relu(tf.matmul(output1, fc_W_2) + fc_b_2)

                fc_W_3 = tf.get_variable(
                    shape=[self.layer_size, self.layer_size],
                    name='fc_W_3',
                    trainable=trainable,
                    initializer=my_initializer)
                fc_b_3 = tf.get_variable(shape=[1],
                                         name='fc_b_3',
                                         trainable=trainable)

                output3 = tf.nn.relu(tf.matmul(output2, fc_W_3) + fc_b_3)

                last_output_dims = self.layer_size
                last_output = output3

            # Add lstm here, to convert last_output to lstm_output
            tf.summary.histogram("lstm_input", last_output)
            tf.summary.histogram("lstm_input_hidden_state", self.lstm_hidden)

            use_tensorflow_lstm = True
            if use_tensorflow_lstm:
                lstm_input = last_output
                use_keras = True
                if use_keras:
                    lstm_layer = tf.keras.layers.LSTM(
                        HIDDEN_STATE_LEN,
                        return_state=True,
                        return_sequences=True,
                        kernel_initializer=my_initializer,
                        recurrent_initializer=my_initializer)

                    fold_time_step = TIME_STEP
                    if is_inference:
                        fold_time_step = 1
                    reshaped_lstm_input = tf.reshape(
                        lstm_input, [-1, fold_time_step, last_output_dims])

                    reshaped_lstm_mask = tf.reshape(self.lstm_mask,
                                                    [-1, fold_time_step])
                    reshaped_lstm_mask = 1 - reshaped_lstm_mask
                    reshaped_lstm_mask = tf.cast(reshaped_lstm_mask,
                                                 dtype=np.bool)
                    c_old, h_old = tf.split(axis=1,
                                            num_or_size_splits=2,
                                            value=self.lstm_hidden)
                    c_old = c_old[::fold_time_step, ...]
                    h_old = h_old[::fold_time_step, ...]
                    reshaped_output, last_h, last_c = lstm_layer(
                        reshaped_lstm_input,  #mask=reshaped_lstm_mask,
                        initial_state=[h_old, c_old])
                    '''                                                
                    layer_weights = lstm_layer.get_weights()
                    
                    for idx in range(len(layer_weights)):
                        new_tensor = tf.convert_to_tensor(layer_weights[idx])
                        tf.summary.histogram("lstm_layer_{}".format(idx), new_tensor)
                    '''

                    last_c = tf.reshape(last_c, [-1, HIDDEN_STATE_LEN])
                    last_h = tf.reshape(last_h, [-1, HIDDEN_STATE_LEN])
                    last_hidden_state = tf.concat(axis=1,
                                                  values=[last_c, last_h])
                    last_output = tf.reshape(reshaped_output,
                                             [-1, HIDDEN_STATE_LEN])

                else:
                    lstm_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_STATE_LEN,
                                                        name='lstm_cell',
                                                        dynamic=True)
                    c_old, h_old = tf.split(axis=1,
                                            num_or_size_splits=2,
                                            value=self.lstm_hidden)
                    c_old, h_old = c_old[:, ::TIME_STEP], h_old[:, ::TIME_STEP]
                    combined_hidden_states = tf.stack([c_old, h_old], axis=2)
                    lstm_input = tf.reshape(
                        lstm_input,
                        (-1, TIME_STEP, *(lstm_input.get_shape()[1:])))
                    lstm_input = tf.unstack(lstm_input, axis=1)
                    last_output, last_hidden_state = tf.nn.static_rnn(
                        cell=lstm_cell,
                        inputs=lstm_input,
                        initial_state=combined_hidden_states)
                    # last_output, last_hidden_state = lstm_cell(inputs=lstm_input, state=(c_old, h_old))
            #        last_output, last_hidden_state = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=lstm_input, initial_state = combined_hidden_states)
            #        last_output = last_output[0]

            else:
                lstm_input = last_output
                last_output, last_hidden_state = utils.lstm(
                    lstm_input, self.is_inference, self.lstm_hidden,
                    self.lstm_mask, 'lstm', HIDDEN_STATE_LEN, LSTM_CELL_COUNT,
                    my_initializer)

            tf.summary.histogram("last_output", last_output)
            tf.summary.histogram("last_hidden_state", last_hidden_state)

            last_output_dims = HIDDEN_STATE_LEN
            a_logits_arr = []
            a_prob_arr = []
            #self.a_space_keys
            for k in self.a_space_keys:
                output_num = self.a_space[k]
                # actor network
                weight_layer_name = 'fc_W_{}'.format(k)
                bias_layer_name = 'fc_b_{}'.format(k)
                logit_layer_name = '{}_logits'.format(k)
                head_layer_name = '{}_head'.format(k)

                fc_W_a = tf.get_variable(shape=[last_output_dims, output_num],
                                         name=weight_layer_name,
                                         trainable=trainable,
                                         initializer=my_initializer)
                fc_b_a = tf.get_variable(
                    shape=[1],
                    name=bias_layer_name,
                    trainable=trainable,
                    initializer=tf.constant_initializer(0.0))

                a_logits = tf.matmul(last_output, fc_W_a) + fc_b_a
                a_logits_arr.append(a_logits)

                a_prob = stable_softmax(
                    a_logits, head_layer_name)  #tf.nn.softmax(a_logits)
                a_prob_arr.append(a_prob)
                tf.summary.histogram("a_prob_{}".format(k), a_prob)

            # value network
            fc1_W_v = tf.get_variable(shape=[last_output_dims, 1],
                                      name='fc1_W_v',
                                      trainable=trainable,
                                      initializer=my_initializer)
            fc1_b_v = tf.get_variable(shape=[1],
                                      name='fc1_b_v',
                                      trainable=trainable,
                                      initializer=tf.constant_initializer(0.0))

            value = tf.matmul(last_output, fc1_W_v) + fc1_b_v
            value = tf.reshape(value, [
                -1,
            ], name="value_output")
            tf.summary.histogram("value", value)

            summary_merged = tf.summary.merge_all()

            return a_prob_arr, a_logits_arr, value, last_hidden_state, summary_merged