Пример #1
0
    def build_model(self):
        l2_regularization_kernel = 1e-5

        # Input Layer
        input = layers.Input(shape=(self.state_size,), name='input_states')

        # Hidden Layers
        model = layers.Dense(units=300, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(input)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(1e-2)(model)

        model = layers.Dense(units=400, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(1e-2)(model)

        model = layers.Dense(units=200, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(1e-2)(model)

        # Our output layer - a fully connected layer
        output = layers.Dense(units=self.action_size, activation='tanh', kernel_regularizer=regularizers.l2(l2_regularization_kernel),
                               kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='output_actions')(model)

        # Keras model
        self.model = models.Model(inputs=input, outputs=output)

        # Define loss and optimizer
        action_gradients = layers.Input(shape=(self.action_size,))
        loss = K.mean(-action_gradients * output)
        optimizer = optimizers.Adam(lr=1e-4)

        update_operation = optimizer.get_updates(params=self.model.trainable_weights, loss=loss)
        self.train_fn = K.function(inputs=[self.model.input, action_gradients, K.learning_phase()],
            outputs=[], updates=update_operation)
Пример #2
0
    def build_model(self):
        l2_kernel_regularization = 1e-5

        # Define input layers
        input_states = layers.Input(shape=(self.state_size, ),
                                    name='input_states')
        input_actions = layers.Input(shape=(self.action_size, ),
                                     name='input_actions')

        # Hidden layers for states
        model_states = layers.Dense(
            units=32,
            kernel_regularizer=regularizers.l2(l2_kernel_regularization))(
                input_states)
        model_states = layers.BatchNormalization()(model_states)
        model_states = layers.LeakyReLU(1e-2)(model_states)

        model_states = layers.Dense(
            units=64,
            kernel_regularizer=regularizers.l2(l2_kernel_regularization))(
                model_states)
        model_states = layers.BatchNormalization()(model_states)
        model_states = layers.LeakyReLU(1e-2)(model_states)

        # Hidden layers for actions
        model_actions = layers.Dense(
            units=64,
            kernel_regularizer=regularizers.l2(l2_kernel_regularization))(
                input_actions)
        model_actions = layers.BatchNormalization()(model_actions)
        model_actions = layers.LeakyReLU(1e-2)(model_actions)

        # Both models merge here
        model = layers.add([model_states, model_actions])

        # Fully connected and batch normalization
        model = layers.Dense(units=32,
                             kernel_regularizer=regularizers.l2(
                                 l2_kernel_regularization))(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(1e-2)(model)

        # Q values / output layer
        Q_values = layers.Dense(
            units=1,
            activation=None,
            kernel_regularizer=regularizers.l2(l2_kernel_regularization),
            kernel_initializer=initializers.RandomUniform(minval=-5e-3,
                                                          maxval=5e-3),
            name='output_Q_values')(model)

        # Keras wrap the model
        self.model = models.Model(inputs=[input_states, input_actions],
                                  outputs=Q_values)
        optimizer = optimizers.Adam(lr=1e-2)
        self.model.compile(optimizer=optimizer, loss='mse')
        action_gradients = K.gradients(Q_values, input_actions)
        self.get_action_gradients = K.function(
            inputs=[*self.model.input, K.learning_phase()],
            outputs=action_gradients)
Пример #3
0
    def build_model(self):
        """Build an actor (policy) network that maps states -> actions."""
        # Define input layer (states)
        states = layers.Input(shape=(self.state_size, ), name='states')
        '''# Add hidden layers
        net = layers.Dense(units=32, activation='relu')(states)
        net = layers.Dense(units=64, activation='relu')(net)
        net = layers.Dense(units=32, activation='relu')(net)
        
        # Try different layer sizes, activations, add batch normalization, regularizers, etc.

        # Add final output layer with sigmoid activation
        raw_actions = layers.Dense(units=self.action_size, activation='sigmoid',
            name='raw_actions')(net)
        '''
        ###################################
        # Add hidden layers
        net = layers.Dense(units=400,
                           kernel_regularizer=regularizers.l2(1e-6))(states)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)
        net = layers.Dense(units=300,
                           kernel_regularizer=regularizers.l2(1e-6))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        # Add final output layer with sigmoid activation
        raw_actions = layers.Dense(
            units=self.action_size,
            activation='sigmoid',
            name='raw_actions',
            kernel_initializer=initializers.RandomUniform(minval=-0.003,
                                                          maxval=0.003))(net)
        #######################################

        # Scale [0, 1] output for each action dimension to proper range
        actions = layers.Lambda(lambda x:
                                (x * self.action_range) + self.action_low,
                                name='actions')(raw_actions)

        # Create Keras model
        self.model = models.Model(inputs=states, outputs=actions)

        # Define loss function using action value (Q value) gradients
        action_gradients = layers.Input(shape=(self.action_size, ))
        loss = K.mean(-action_gradients * actions)

        # Incorporate any additional losses here (e.g. from regularizers)

        # Define optimizer and training function
        optimizer = optimizers.Adam(lr=1e-6)
        updates_op = optimizer.get_updates(params=self.model.trainable_weights,
                                           loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, action_gradients,
                    K.learning_phase()],
            outputs=[],
            updates=updates_op)
Пример #4
0
def res_block(inputs, size):
    kernel_l2_reg = 1e-3
    net = layers.Dense(size,
                       activation=None,
                       kernel_regularizer=regularizers.l2(kernel_l2_reg),
                       kernel_initializer=initializers.RandomUniform(
                           minval=-5e-3, maxval=5e-3))(inputs)
    net = layers.BatchNormalization()(net)
    net = layers.LeakyReLU(1e-2)(net)

    net = layers.Dense(size,
                       activation=None,
                       kernel_regularizer=regularizers.l2(kernel_l2_reg),
                       kernel_initializer=initializers.RandomUniform(
                           minval=-5e-3, maxval=5e-3))(net)
    net = layers.BatchNormalization()(net)
    net = layers.LeakyReLU(1e-2)(net)
    net = layers.add([inputs, net])
    return net
Пример #5
0
    def build_model(self):
        """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
        # Define input layers
        states = layers.Input(shape=(self.state_size, ), name='states')
        actions = layers.Input(shape=(self.action_size, ), name='actions')

        # Add hidden layer(s) for state pathway
        net_states = layers.Dense(
            units=300, kernel_regularizer=regularizers.l2(0.00001))(states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(0.01)(net_states)
        net_states = layers.Dense(
            units=400, kernel_regularizer=regularizers.l2(0.00001))(net_states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(0.01)(net_states)

        # Add hidden layer(s) for action pathway
        net_actions = layers.Dense(
            units=400, kernel_regularizer=regularizers.l2(0.00001))(actions)
        net_actions = layers.BatchNormalization()(net_actions)
        net_actions = layers.LeakyReLU(0.01)(net_actions)

        # Try different layer sizes, activations, add batch normalization, regularizers, etc.

        # Combine state and action pathways
        net = layers.Add()([net_states, net_actions])
        net = layers.Activation('relu')(net)

        # Add more layers to the combined network if needed
        net = layers.Dense(units=256,
                           kernel_regularizer=regularizers.l2(0.00001))(net)
        net = layers.BatchNormalization()(net)
        net = layers.Activation(activation='relu')(net)
        # Add final output layer to prduce action values (Q values)
        Q_values = layers.Dense(units=1,
                                activation=None,
                                name='q_values',
                                kernel_initializer=initializers.RandomUniform(
                                    minval=-3e-4, maxval=3e-4))(net)

        # Create Keras model
        self.model = models.Model(inputs=[states, actions], outputs=Q_values)

        # Define optimizer and compile model for training with built-in loss function
        optimizer = optimizers.Adam()
        self.model.compile(optimizer=optimizer, loss='mse')

        # Compute action gradients (derivative of Q values w.r.t. to actions)
        action_gradients = K.gradients(Q_values, actions)

        # Define an additional function to fetch action gradients (to be used by actor model)
        self.get_action_gradients = K.function(
            inputs=[*self.model.input, K.learning_phase()],
            outputs=action_gradients)
    def build_model(self):
        """Build an actor (policy) network that maps states -> actions."""
        # Define input layer (states)
        states = layers.Input(shape=(self.state_size, ), name='states')

        #--------- copy from DDPG quadcopter -----------
        net = layers.Dense(units=400)(states)
        # net = layers.BatchNormalization()(net)
        net = layers.Activation("relu")(net)
        net = layers.Dense(units=200)(net)
        # net = layers.BatchNormalization()(net)
        net = layers.Activation("relu")(net)
        actions = layers.Dense(units=self.action_size,
                               activation='softmax',
                               name='actions',
                               kernel_initializer=initializers.RandomUniform(
                                   minval=-1, maxval=1))(net)

        # actions = layers.Dense(units=self.action_size, activation='sigmoid', name='actions',
        # 		kernel_initializer=initializers.RandomUniform(minval=-0.001, maxval=0.001))(net)

        # Add hidden layers
        # net = layers.Dense(units=16,activation=activations.sigmoid)(states)
        # net = layers.BatchNormalization()(net)

        # net = layers.Dense(units=16,activation=activations.sigmoid)(net)
        # net = layers.BatchNormalization()(net)

        # net = layers.Dense(units=128,activation=activations.relu)(net)
        # net = layers.BatchNormalization()(net)

        # Add final output layer with sigmoid activation
        # actions = layers.Dense(units=self.action_size, activation='linear', # sigmoid
        # 	name='raw_actions' )(net)

        # Scale [0, 1] output for each action dimension to proper range
        #         actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low,
        #             name='actions')(raw_actions)

        # Create Keras model
        self.model = models.Model(inputs=states, outputs=actions)
        action_gradients = layers.Input(shape=(self.action_size, ))
        loss = K.mean(-action_gradients * actions)

        # Define optimizer and training function
        optimizer = optimizers.Adam(lr=.0001)
        updates_op = optimizer.get_updates(params=self.model.trainable_weights,
                                           loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, action_gradients,
                    K.learning_phase()],
            outputs=[],
            updates=updates_op)
    def build_model(self):

        # Define input layers
        input_states = layers.Input(shape=(self.state_size, ),
                                    name='input_states')
        input_actions = layers.Input(shape=(self.action_size, ),
                                     name='input_actions')

        #---------- copy from DDPG quadcopter ---------
        # Add hidden layer(s) for state pathway
        net_states = layers.Dense(units=400)(input_states)
        # net_states = layers.BatchNormalization()(net_states)
        net_states = layers.Activation("relu")(net_states)

        net_states = layers.Dense(units=300)(net_states)
        net_states = layers.Activation("relu")(net_states)

        # Add hidden layer(s) for action pathway
        net_actions = layers.Dense(units=300)(input_actions)
        net_actions = layers.Activation("relu")(net_actions)

        # net_actions = layers.Dense(units=250,kernel_regularizer=regularizers.l2(1e-7))(net_actions)
        # net_actions = layers.BatchNormalization()(net_actions)
        # net_actions = layers.Activation("relu")(net_actions)

        # Combine state and action pathways
        net = layers.Add()([net_states, net_actions])
        net = layers.Activation('relu')(net)

        net = layers.Dense(units=200,
                           kernel_initializer=initializers.RandomUniform(
                               minval=-0.5, maxval=0.5))(net)
        net = layers.Activation('relu')(net)

        # Add final output layer to prduce action values (Q values)
        Q_values = layers.Dense(units=1, name='q_values')(net)

        # ---------------- Hidden layers for states ----------------
        # model_states = layers.Dense(units=32, activation=activations.sigmoid)(input_states)
        # # model_states = layers.BatchNormalization()(model_states)

        # model_states = layers.Dense(units=16, activation=activations.sigmoid)(model_states)
        # # model_states = layers.BatchNormalization()(model_states)

        # # model_states = layers.Dense(units=64)(model_states)
        # # model_states = layers.BatchNormalization()(model_states)

        # # ---------------- Hidden layers for actions ----------------
        # model_actions = layers.Dense(units=16, activation=activations.sigmoid)(input_actions)
        # # model_actions = layers.BatchNormalization()(model_actions)

        # model_actions = layers.Dense(units=16, activation=activations.sigmoid)(model_actions)
        # # model_actions = layers.BatchNormalization()(model_actions)

        # # Both models merge here
        # model = layers.add([model_states, model_actions])

        # # Fully connected and batch normalization
        # model = layers.Dense(units=8, activation=activations.sigmoid)(model)
        # # model = layers.BatchNormalization()(model)

        # # model = layers.Dense(units=64, activation=activations.relu)(model)
        # # model = layers.BatchNormalization()(model)

        # # Q values / output layer
        # Q_values = layers.Dense(units=1, name='Q_s_a')(model)
        # # model = layers.BatchNormalization()(model)

        #  Keras wrap the model
        self.model = models.Model(inputs=[input_states, input_actions],
                                  outputs=Q_values)

        optimizer = optimizers.Adam(lr=0.0001)
        self.model.compile(optimizer=optimizer, loss='mse')

        action_gradients = K.gradients(Q_values, input_actions)
        self.get_action_gradients = K.function(
            inputs=[*self.model.input, K.learning_phase()],
            outputs=action_gradients)
Пример #8
0
    def build_model(self):
        kernel_l2_reg = 1e-5

        # Dense Options
        # units = 200,
        # activation='relu',
        # activation = None,
        # activity_regularizer=regularizers.l2(0.01),
        # kernel_regularizer=regularizers.l2(kernel_l2_reg),
        # bias_initializer=initializers.Constant(1e-2),
        # use_bias = True
        # use_bias=False
        """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
        # Define input layers
        states = layers.Input(shape=(self.state_size, ), name='states')
        actions = layers.Input(shape=(self.action_size, ), name='actions')

        # size_repeat = 30
        # state_size = size_repeat*self.state_size
        # action_size = size_repeat*self.action_size
        # block_size = size_repeat*self.state_size + size_repeat*self.action_size
        # print("Critic block size = {}".format(block_size))
        #
        # net_states = layers.concatenate(size_repeat * [states])
        # net_states = layers.BatchNormalization()(net_states)
        # net_states = layers.Dropout(0.2)(net_states)
        #
        # net_actions = layers.concatenate(size_repeat * [actions])
        # net_actions = layers.BatchNormalization()(net_actions)
        # net_actions = layers.Dropout(0.2)(net_actions)
        #
        # # State pathway
        # for _ in range(3):
        #     net_states = res_block(net_states, state_size)
        #
        # # Action pathway
        # for _ in range(2):
        #     net_actions = res_block(net_actions, action_size)
        #
        # # Merge state and action pathways
        # net = layers.concatenate([net_states, net_actions])
        #
        # # Final blocks
        # for _ in range(3):
        #     net = res_block(net, block_size)

        # Add hidden layer(s) for state pathway
        net_states = layers.Dense(
            units=300,
            kernel_regularizer=regularizers.l2(kernel_l2_reg))(states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(1e-2)(net_states)

        net_states = layers.Dense(
            units=400,
            kernel_regularizer=regularizers.l2(kernel_l2_reg))(net_states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(1e-2)(net_states)

        # Add hidden layer(s) for action pathway
        net_actions = layers.Dense(
            units=400,
            kernel_regularizer=regularizers.l2(kernel_l2_reg))(actions)
        net_actions = layers.BatchNormalization()(net_actions)
        net_actions = layers.LeakyReLU(1e-2)(net_actions)

        # Merge state and action pathways
        net = layers.add([net_states, net_actions])

        net = layers.Dense(
            units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        # Add final output layer to prduce action values (Q values)
        Q_values = layers.Dense(
            units=1,
            activation=None,
            kernel_regularizer=regularizers.l2(kernel_l2_reg),
            kernel_initializer=initializers.RandomUniform(minval=-5e-3,
                                                          maxval=5e-3),
            # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3),
            name='q_values')(net)

        # Create Keras model
        self.model = models.Model(inputs=[states, actions], outputs=Q_values)

        # Define optimizer and compile model for training with built-in loss function
        optimizer = optimizers.Adam(lr=1e-2)

        self.model.compile(optimizer=optimizer, loss='mse')

        # Compute action gradients (derivative of Q values w.r.t. to actions)
        action_gradients = K.gradients(Q_values, actions)

        # Define an additional function to fetch action gradients (to be used by actor model)
        self.get_action_gradients = K.function(
            inputs=[*self.model.input, K.learning_phase()],
            outputs=action_gradients)
Пример #9
0
    def build_model(self):
        kernel_l2_reg = 1e-5
        """Build an actor (policy) network that maps states -> actions."""
        # Define input layer (states)
        states = layers.Input(shape=(self.state_size, ), name='states')

        # size_repeat = 30
        # block_size = size_repeat*self.state_size
        # print("Actor block size = {}".format(block_size))
        #
        # net = layers.concatenate([states]*size_repeat)
        # # net = layers.Dense(block_size,
        # #                    # kernel_initializer=initializers.RandomNormal(mean=1.0, stddev=0.1),
        # #                    #  bias_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01),
        # #                    activation=None,
        # #                    use_bias=False)(states)
        # net = layers.BatchNormalization()(net)
        # net = layers.Dropout(0.2)(net)
        # # net = layers.LeakyReLU(1e-2)(net)
        #
        # for _ in range(5):
        #     net = res_block(net, block_size)

        # Add hidden layers
        net = layers.Dense(
            units=300,
            kernel_regularizer=regularizers.l2(kernel_l2_reg))(states)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        net = layers.Dense(
            units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        net = layers.Dense(
            units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        # Try different layer sizes, activations, add batch normalization, regularizers, etc.

        # # Add final output layer with sigmoid activation
        # raw_actions = layers.Dense(units=self.action_size,
        #                            activation='sigmoid',
        #                            # kernel_regularizer=regularizers.l2(kernel_l2_reg),
        #                            kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3),
        #                            # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3),
        #                            name='raw_actions')(net)
        #
        # # Scale [0, 1] output for each action dimension to proper range
        # actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions)

        actions = layers.Dense(
            units=self.action_size,
            activation='tanh',
            kernel_regularizer=regularizers.l2(kernel_l2_reg),
            kernel_initializer=initializers.RandomUniform(minval=-3e-3,
                                                          maxval=3e-3),
            name='actions')(net)

        # Create Keras model
        self.model = models.Model(inputs=states, outputs=actions)

        # Define loss function using action value (Q value) gradients
        action_gradients = layers.Input(shape=(self.action_size, ))
        loss = K.mean(-action_gradients * actions)

        # Incorporate any additional losses here (e.g. from regularizers)

        # Define optimizer and training function
        optimizer = optimizers.Adam(lr=1e-4)

        updates_op = optimizer.get_updates(params=self.model.trainable_weights,
                                           loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, action_gradients,
                    K.learning_phase()],
            outputs=[],
            updates=updates_op)