Exemplo n.º 1
0
 def __init__(self, n_actions, rnn_units=256, conv_units=32):
     super(CRPolicy, self).__init__()
     cells = [
         GRUCell(rnn_units, kernel_initializer=orthogonal())
         for _ in range(2)
     ]
     self.gru = MultiRNNCell(cells)
     self.state = self.gru.zero_state(batch_size=1, dtype=tf.float32)
     self.cv1 = Conv2D(conv_units,
                       3,
                       strides=2,
                       activation='elu',
                       kernel_initializer=orthogonal())
     self.cv2 = Conv2D(conv_units,
                       3,
                       strides=2,
                       activation='elu',
                       kernel_initializer=orthogonal())
     self.cv3 = Conv2D(conv_units,
                       3,
                       strides=2,
                       activation='elu',
                       kernel_initializer=orthogonal())
     self.cv4 = Conv2D(conv_units,
                       3,
                       strides=2,
                       activation='elu',
                       kernel_initializer=orthogonal())
     self.flatten = Flatten()
     self.fc1 = Dense(rnn_units, kernel_initializer=orthogonal())
     self.pol = Dense(n_actions, kernel_initializer=orthogonal(0.01))
     self.val = Dense(1, kernel_initializer=orthogonal())
Exemplo n.º 2
0
    def __init__(self, nact, rnn_units=256):
        cells = [GRUCell(rnn_units, kernel_initializer=orthogonal()) for _ in range(2)]
        self.gru = MultiRNNCell(cells)
        self.state = self.gru.zero_state(batch_size=1, dtype=tf.float32)

        self.obs_ph = tf.placeholder(dtype=tf.float32)
        fc1 = dense(self.obs_ph, rnn_units, activation=elu, kernel_initializer=orthogonal(), name='fc1')
        expand = tf.expand_dims(fc1, axis=0, name='expand')
        rnn_out, self.state = dynamic_rnn(self.gru, expand, initial_state=self.state)
        reshape = tf.reshape(rnn_out, shape=[-1, rnn_units], name='reshape')

        self.logits = dense(reshape, nact, kernel_initializer=orthogonal(0.01), name='logits')
        self.pi = tf.nn.softmax(self.logits, name='pi')
        self.action = boltzmann(self.pi)
        self.value = dense(self.logits, 1, kernel_initializer=orthogonal(), name='value')
Exemplo n.º 3
0
    def build(self, input_shape: Union[list, tuple]):
        super().build(input_shape)
        k_init = init.orthogonal()

        self.__create_weights__(tf.zeros(self.state_shape(1)), 'initial_state')
        self.__create_weights__(
            k_init((14, self.__power_type_encoding_size__)),
            'power_type_encoding')
Exemplo n.º 4
0
 def __init__(self, n_actions, rnn_units=256):
     super(RPolicy, self).__init__()
     cells = [
         GRUCell(rnn_units, kernel_initializer=orthogonal())
         for _ in range(2)
     ]
     self.gru = MultiRNNCell(cells)
     self.state = self.gru.zero_state(batch_size=1, dtype=tf.float32)
     self.fc1 = Dense(rnn_units, activation='relu')
     self.pol = Dense(n_actions)
     self.val = Dense(1)
Exemplo n.º 5
0
 def __init__(self, n_actions):
     super(CRPolicy, self).__init__()
     cells = [GRUCell(128, kernel_initializer=orthogonal(np.sqrt(2))) for _ in range(2)]
     self.gru = MultiRNNCell(cells)
     self.s0 = self.gru.zero_state(batch_size=1, dtype=tf.float32)
     self.cv1 = Conv2D(32, 3, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
     self.mp1 = MaxPool2D()
     self.cv2 = Conv2D(32, 3, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
     self.mp2 = MaxPool2D()
     self.cv3 = Conv2D(32, 3, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
     self.mp3 = MaxPool2D()
     self.flatten = Flatten()
     self.fc1 = Dense(128, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
     self.fc2 = Dense(100, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
     self.fc3 = Dense(100, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
     self.pol = Dense(n_actions, kernel_initializer=orthogonal(0.01))
     self.val = Dense(1, kernel_initializer=orthogonal(1))
Exemplo n.º 6
0
    def __init__(self, h, w, c, nact, rnn_units=256, cnn_units=32):
        cells = [GRUCell(rnn_units, kernel_initializer=orthogonal()) for _ in range(2)]
        self.gru = MultiRNNCell(cells)
        self.state = self.gru.zero_state(batch_size=1, dtype=tf.float32)

        self.obs_ph = tf.placeholder(dtype=tf.float32, shape=[None, h, w, c])
        cv1 = conv2d(self.obs_ph, cnn_units, 3, strides=2, activation=elu, kernel_initializer=orthogonal(), name='cv1')
        cv2 = conv2d(cv1, cnn_units, 3, strides=2, activation=elu, kernel_initializer=orthogonal(), name='cv2')
        cv3 = conv2d(cv2, cnn_units, 3, strides=2, activation=elu, kernel_initializer=orthogonal(), name='cv3')
        cv4 = conv2d(cv3, cnn_units, 3, strides=2, activation=elu, kernel_initializer=orthogonal(), name='cv4')
        flat = flatten(cv4, name='flatten')
        fc1 = dense(flat, rnn_units, activation=elu, kernel_initializer=orthogonal(), name='fc1')
        expand = tf.expand_dims(fc1, axis=0, name='expand')
        rnn_out, self.state = dynamic_rnn(self.gru, expand, initial_state=self.state)
        reshape = tf.reshape(rnn_out, shape=[-1, rnn_units], name='reshape')

        self.logits = dense(reshape, nact, kernel_initializer=orthogonal(0.01), name='logits')
        self.pi = tf.nn.softmax(self.logits, name='pi')
        self.action = boltzmann(self.pi)
        value = dense(reshape, 1, kernel_initializer=orthogonal(), name='value')
        self.value = tf.squeeze(value)
Exemplo n.º 7
0
 def __init__(self,
              num_output,
              num_layers=2,
              res_block_size=2,
              activation=tf.nn.relu,
              kernel_initializer=init.orthogonal(np.sqrt(2)),
              bias_initializer=init.zeros(),
              name=None):
     super().__init__(name)
     self.__num_output__ = num_output
     self.__num_layers__ = num_layers
     self.__block_size__ = res_block_size
     self.__activation__ = activation
     self.__kernel_initializer__ = kernel_initializer
     self.__bias_initializer__ = bias_initializer
Exemplo n.º 8
0
 def __init__(self, n_actions):
     super(RPolicy, self).__init__()
     cells = [GRUCell(128, kernel_initializer=orthogonal(np.sqrt(2))) for _ in range(2)]
     self.gru = MultiRNNCell(cells)
     self.s0 = self.gru.zero_state(batch_size=1, dtype=tf.float32)
     self.fc1 = Dense(128, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
     self.fc2 = Dense(100, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
     self.fc3 = Dense(100, activation='relu', kernel_initializer=orthogonal(np.sqrt(2)))
     self.pol = Dense(n_actions, kernel_initializer=orthogonal(0.01))
     self.val = Dense(1, kernel_initializer=orthogonal(np.sqrt(2)))
Exemplo n.º 9
0
    def build(self, input_shape: Union[list, tuple]):
        """
            Input shape format in NCH
            ConvolutionEncoder kernel upgrades the shape to NCH1 to perform convolution
        """
        super().build(input_shape)
        l_initializer = init.orthogonal()
        k_initializer = self.__kernel_initializer__
        b_initializer = self.__bias_initializer__

        kernel_size = (int(input_shape[-1] - self.__projection_size__[1] + 1),
                       1, input_shape[1], self.__projection_size__[0])
        self.__create_weights__(l_initializer(kernel_size),
                                'initial_projection_kernel')
        self.__create_weights__(b_initializer(self.__projection_size__[0]),
                                'initial_projection_bias')

        projection_in_c = self.__projection_size__[0]
        projection_out_k = 1
        for idx in range(self.__num_layers__):
            out_c = int(self.__num_output__ /
                        (2**(self.__num_layers__ - idx - 1)))

            kernel_size = (3, 1, kernel_size[-1], out_c)
            self.__create_weights__(k_initializer(kernel_size),
                                    "conv_kernel_%d" % idx)
            self.__create_weights__(b_initializer(out_c), "conv_bias_%d" % idx)

            projection_out_k = projection_out_k * self.__strides__[idx]

            if idx % self.__block_size__ == 1:
                residx = idx // self.__block_size__

                p_kernel_size = (projection_out_k, 1, projection_in_c, out_c)
                self.__create_weights__(l_initializer(p_kernel_size),
                                        "projection_kernel_%d" % residx)
                self.__create_weights__(b_initializer(out_c),
                                        "projection_bias_%d" % residx)

                initial_gate_value = tf.ones((1, out_c, 1, 1))
                self.__create_weights__(initial_gate_value,
                                        "cgate_kernel_%d" % residx)

                projection_in_c = out_c
                projection_out_k = 1
Exemplo n.º 10
0
 def __init__(self,
              num_output,
              initial_dim=(8, 24),
              ssizes=(2, 2, 2, 3),
              res_block_size=2,
              activation=tf.nn.relu,
              kernel_initializer=init.orthogonal(np.sqrt(2)),
              bias_initializer=init.zeros(),
              name=None):
     super().__init__(name)
     self.__num_output__ = num_output
     self.__num_layers__ = len(ssizes)
     self.__projection_size__ = initial_dim
     self.__strides__ = ssizes
     self.__block_size__ = res_block_size
     self.__activation__ = activation
     self.__kernel_initializer__ = kernel_initializer
     self.__bias_initializer__ = bias_initializer
Exemplo n.º 11
0
    def build(self, input_shape: Union[list, tuple]):
        """
            Input shape format in NCH
            DepthEncoder kernel upgrades the shape to NCH1 to perform convolution
        """
        super().build(input_shape)
        l_initializer = init.orthogonal()
        k_initializer = self.__kernel_initializer__
        b_initializer = self.__bias_initializer__

        inc_layer_count = (self.__num_output__ -
                           input_shape[1]) // self.__num_layers__
        kernel_size = (1, 1, 1, input_shape[1])
        projection_in_c = input_shape[1]
        for idx in range(self.__num_layers__):
            if idx < self.__num_layers__ - 1:
                out_c = kernel_size[-1] + inc_layer_count
            else:
                out_c = self.__num_output__

            kernel_size = (1, 1, kernel_size[-1], out_c)
            self.__create_weights__(k_initializer(kernel_size),
                                    "conv_kernel_%d" % idx)
            self.__create_weights__(b_initializer(out_c), "conv_bias_%d" % idx)

            if idx % self.__block_size__ == 1:
                residx = idx // self.__block_size__

                p_kernel_size = (1, 1, projection_in_c, out_c)
                self.__create_weights__(l_initializer(p_kernel_size),
                                        "projection_kernel_%d" % residx)
                self.__create_weights__(b_initializer(out_c),
                                        "projection_bias_%d" % residx)

                initial_gate_value = tf.ones((1, out_c, 1, 1))
                self.__create_weights__(initial_gate_value,
                                        "cgate_kernel_%d" % residx)

                projection_in_c = out_c
Exemplo n.º 12
0
    def build(self, input_shape: list):
        super().build(input_shape)

        l_initializer = init.orthogonal()
        k_initializer = self.__kernel_initializer__
        b_initializer = self.__bias_initializer__

        inc_layer_count = (self.__num_output__ -
                           input_shape[1]) // self.__num_layers__
        kernel_size = (1, input_shape[1])
        projection_in_c = input_shape[1]
        for idx in range(self.__num_layers__):
            if idx < self.__num_layers__ - 1:
                out_c = kernel_size[-1] + inc_layer_count
            else:
                out_c = self.__num_output__

            kernel_size = (kernel_size[-1], out_c)
            self.__create_weights__(k_initializer(kernel_size),
                                    "dense_kernel_%d" % idx)
            self.__create_weights__(b_initializer(out_c),
                                    "dense_bias_%d" % idx)

            if idx % self.__block_size__ == 1:
                residx = idx // self.__block_size__

                p_kernel_size = (projection_in_c, out_c)
                self.__create_weights__(l_initializer(p_kernel_size),
                                        "projection_kernel_%d" % residx)
                self.__create_weights__(b_initializer(out_c),
                                        "projection_bias_%d" % residx)

                initial_gate_value = tf.ones((1, out_c))
                self.__create_weights__(initial_gate_value,
                                        "cgate_kernel_%d" % residx)

                projection_in_c = out_c
Exemplo n.º 13
0
 def __init__(self, nact):
     self.obs_ph = tf.placeholder(dtype=tf.float32)
     self.logits = dense(self.obs_ph, nact, kernel_initializer=orthogonal(0.01), name='logits')
     self.pi = tf.nn.softmax(self.logits, name='pi')
     self.action = boltzmann(self.pi)
     self.value = dense(self.logits, 1, kernel_initializer=orthogonal(), name='value')
Exemplo n.º 14
0
testY[np.arange(test_Y.shape[0]), test_Y-1] = 1  # one hot matrix

# experiment with small datasets
trainX = trainX[:1000]
trainY = trainY[:1000]

n = trainX.shape[0]


# Create the model
x = tf.placeholder(tf.float32, [None, NUM_FEATURES])
y_ = tf.placeholder(tf.float32, [None, NUM_CLASSES])

# Build the graph for the deep net

hidden_layer = layers.dense(x, 10, activation=tf.nn.relu, kernel_initializer=init.orthogonal(
    np.sqrt(2)), bias_initializer=init.zeros(), kernel_regularizer=l2_regularizer(1e-6))
output_layer = layers.dense(hidden_layer, 6, kernel_initializer=init.orthogonal(
), bias_initializer=init.zeros(),  kernel_regularizer=l2_regularizer(1e-6))

cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
    labels=y_, logits=output_layer)
loss = tf.reduce_mean(cross_entropy) + get_regularization_loss()


# Create the gradient descent optimizer with the given learning rate.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(loss)

correct_prediction = tf.cast(
    tf.equal(tf.argmax(output_layer, 1), tf.argmax(y_, 1)), tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
# experiment with small datasets
trainX = trainX[:1000]
trainY = trainY[:1000]

for neuron_count in neurons:
    tf.reset_default_graph()
    with tf.Session() as sess:

        # Create the model
        lr = tf.placeholder(tf.float32, [])
        x = tf.placeholder(tf.float32, [None, NUM_FEATURES])
        y_ = tf.placeholder(tf.float32, [None, 1])

        # Build the graph for the deep net
        hidden_layer = layers.dense(x, neuron_count, activation=tf.nn.relu, kernel_initializer=init.orthogonal(np.sqrt(2)),
                                    bias_initializer=init.zeros(), kernel_regularizer=l2_regularizer(1e-3))
        output_layer = layers.dense(hidden_layer, 1, kernel_initializer=init.orthogonal(), bias_initializer=init.zeros(),
                                    kernel_regularizer=l2_regularizer(1e-3))

        loss = tf.reduce_mean(tf.square(y_ - output_layer)
                              ) + get_regularization_loss()

        # Create the gradient descent optimizer with the given learning rate.
        optimizer = tf.train.GradientDescentOptimizer(lr)
        train_op = optimizer.minimize(loss)
        error = tf.reduce_mean(tf.square(y_ - output_layer))

        fold_errors = []
        for o in range(NUM_FOLDS):
            sess.run(tf.global_variables_initializer())
Exemplo n.º 16
0

# experiment with small datasets
trainX = trainX[:1000]
trainX = (trainX - np.mean(trainX, axis=0)) / np.std(trainX, axis=0)
trainY = trainY[:1000]

n = trainX.shape[0]


# Create the model
x = tf.placeholder(tf.float32, [None, NUM_FEATURES])
y_ = tf.placeholder(tf.float32, [None, NUM_CLASSES])

# Build the graph for the deep nets
hidden_layer = layers.dense(x, 10, activation=tf.nn.selu, kernel_initializer=init.orthogonal(),
                            bias_initializer=init.zeros(),
                            kernel_regularizer=l2_regularizer(1e-6))
output_layer = layers.dense(x, NUM_CLASSES, kernel_initializer=init.orthogonal(), bias_initializer=init.zeros(),
                            kernel_regularizer=l2_regularizer(1e-6))

cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=output_layer)
loss = tf.reduce_mean(cross_entropy) + get_regularization_loss()

# Create the gradient descent optimizer with the given learning rate.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(loss)

correct_prediction = tf.cast(tf.equal(tf.argmax(output_layer, 1), tf.argmax(y_, 1)), tf.float32)
accuracy = tf.reduce_mean(correct_prediction)
Exemplo n.º 17
0
trainX = (trainX - np.mean(trainX, axis=0)) / np.std(trainX, axis=0)

# experiment with small datasets
trainX = trainX[:1000]
trainY = trainY[:1000]

# Create the model
x = tf.placeholder(tf.float32, [None, NUM_FEATURES])
y_ = tf.placeholder(tf.float32, [None, 1])

# Build the graph for the deep net
hidden_layer = layers.dense(x,
                            30,
                            activation=tf.nn.relu,
                            kernel_initializer=init.orthogonal(np.sqrt(2)),
                            bias_initializer=init.zeros(),
                            kernel_regularizer=l2_regularizer(1e-3))
output_layer = layers.dense(hidden_layer,
                            1,
                            kernel_initializer=init.orthogonal(),
                            bias_initializer=init.zeros(),
                            kernel_regularizer=l2_regularizer(1e-3))

loss = tf.reduce_mean(tf.square(y_ - output_layer)) + get_regularization_loss()

#Create the gradient descent optimizer with the given learning rate.
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(loss)
error = tf.reduce_mean(tf.square(y_ - output_layer))
Exemplo n.º 18
0
    def __init__(self, name, session, observation_space, action_space):
        self.name = name

        assert isinstance(action_space, spaces.Box)
        assert len(action_space.shape) == 1
        assert (np.abs(action_space.low) == action_space.high
                ).all()  # we assume symmetric actions.

        self.session = session

        with tf.variable_scope(self.name):
            self.observations_input = tf.placeholder(
                observation_space.dtype,
                shape=tuple([None] + list(observation_space.shape)),
                name="observations_input")

            self.actions_input = tf.placeholder(
                action_space.dtype,
                shape=tuple([None] + list(action_space.shape)),
                name="actions_input")

            self.actions_size = action_space.shape[0]

            self.combined_input = tf.concat(
                [self.observations_input, self.actions_input],
                axis=1,
                name="combined_input")

            with tf.variable_scope("actor"):
                self.actor01 = layers.dense(
                    inputs=self.observations_input,
                    units=64,
                    activation=tf.tanh,
                    name="layer_one",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(
                        gain=np.sqrt(2)))

                self.actor02 = layers.dense(
                    inputs=self.actor01,
                    units=64,
                    activation=tf.tanh,
                    name="layer_two",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(
                        gain=np.sqrt(2)))

                self.actor_head = layers.dense(
                    inputs=self.actor02,
                    units=self.actions_input.shape[1].value,
                    activation=tf.tanh,
                    name="head",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(gain=0.01))

                self.actor_head_rescaled = self.actor_head * action_space.high

            self.model_computed_combined = tf.concat(
                [self.observations_input, self.actor_head_rescaled],
                axis=1,
                name="model_computed_combined")

            with tf.variable_scope("critic"):
                self.critic01 = layers.dense(
                    inputs=self.combined_input,
                    units=64,
                    activation=tf.tanh,
                    name="layer_one",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(
                        gain=np.sqrt(2)))

                self.critic02 = layers.dense(
                    inputs=self.critic01,
                    units=64,
                    activation=tf.tanh,
                    name="layer_two",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(
                        gain=np.sqrt(2)))

                self.action_value_head = layers.dense(
                    inputs=self.critic02,
                    units=1,
                    activation=None,
                    name="head",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.random_uniform(
                        -3.0e-3, 3.0e-3))

                self.model_critic01 = layers.dense(
                    inputs=self.model_computed_combined,
                    units=64,
                    activation=tf.tanh,
                    name="layer_one",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(
                        gain=np.sqrt(2)),
                    reuse=
                    True  # Use the same weights for 'critic' and for 'model critic'
                )

                self.model_critic02 = layers.dense(
                    inputs=self.model_critic01,
                    units=64,
                    activation=tf.tanh,
                    name="layer_two",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.orthogonal(
                        gain=np.sqrt(2)),
                    reuse=
                    True  # Use the same weights for 'critic' and for 'model critic'
                )

                self.state_value_head = layers.dense(
                    inputs=self.model_critic02,
                    units=1,
                    activation=None,
                    name="head",
                    bias_initializer=initializers.zeros(),
                    kernel_initializer=initializers.random_uniform(
                        -3.0e-3, 3.0e-3),
                    reuse=
                    True  # Use the same weights for 'critic' and for 'model critic'
                )