Пример #1
0
    def _build_module(self, input_layer):
        # This is almost exactly the same as Dueling Network but we predict the future measurements for each action

        multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead

        # actions expectation tower (expectation stream) - E
        with name_scope("expectation_stream"):
            expectation_stream = neon.Sequential([
                neon.Affine(nout=256, activation=neon.Rectlin(),
                            weight_init=self.weights_init, bias_init=self.biases_init),
                neon.Affine(nout=multistep_measurements_size,
                            weight_init=self.weights_init, bias_init=self.biases_init)
            ])(input_layer)

        # action fine differences tower (action stream) - A
        with name_scope("action_stream"):
            action_stream_unnormalized = neon.Sequential([
                neon.Affine(nout=256, activation=neon.Rectlin(),
                            weight_init=self.weights_init, bias_init=self.biases_init),
                neon.Affine(nout=self.num_actions * multistep_measurements_size,
                            weight_init=self.weights_init, bias_init=self.biases_init),
                neon.Reshape((self.num_actions, multistep_measurements_size))
            ])(input_layer)
            action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized)

        repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0)
        repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0)

        # merge to future measurements predictions
        self.output = repeated_expectation_stream + action_stream
Пример #2
0
    def _build_module(self, input_layer):
        # Dueling Network
        # state value tower - V
        output_axis = ng.make_axis(self.num_actions, name='q_values')

        state_value = neon.Sequential([
            neon.Affine(nout=256,
                        activation=neon.Rectlin(),
                        weight_init=self.weights_init,
                        bias_init=self.biases_init),
            neon.Affine(nout=1,
                        weight_init=self.weights_init,
                        bias_init=self.biases_init)
        ])(input_layer)

        # action advantage tower - A
        action_advantage_unnormalized = neon.Sequential([
            neon.Affine(nout=256,
                        activation=neon.Rectlin(),
                        weight_init=self.weights_init,
                        bias_init=self.biases_init),
            neon.Affine(axes=output_axis,
                        weight_init=self.weights_init,
                        bias_init=self.biases_init)
        ])(input_layer)
        action_advantage = action_advantage_unnormalized - ng.mean(
            action_advantage_unnormalized)

        repeated_state_value = ng.expand_dims(
            ng.slice_along_axis(state_value, state_value.axes[0], 0),
            output_axis, 0)

        # merge to state-action value function Q
        self.output = repeated_state_value + action_advantage
Пример #3
0
 def __init__(self,
              input_size,
              batch_size=None,
              activation_function=neon.Rectlin(),
              name="embedder"):
     InputEmbedder.__init__(self, input_size, batch_size,
                            activation_function, name)
Пример #4
0
 def __init__(self, activation_function=neon.Rectlin(), name="middleware_embedder"):
     self.name = name
     self.input = None
     self.output = None
     self.weights_init = neon.GlorotInit()
     self.biases_init = neon.ConstantInit()
     self.activation_function = activation_function
Пример #5
0
def model(action_axes):
    return neon.Sequential([
        neon.Affine(
            nout=20,
            weight_init=neon.XavierInit(),
            activation=neon.Tanh(),
            batch_norm=True,
        ),
        neon.Affine(
            nout=20,
            weight_init=neon.XavierInit(),
            activation=neon.Tanh(),
            batch_norm=True,
        ),
        neon.Affine(
            nout=20,
            weight_init=neon.XavierInit(),
            activation=neon.Tanh(),
            batch_norm=True,
        ),
        neon.Affine(
            axes=action_axes,
            weight_init=neon.XavierInit(),
            activation=neon.Rectlin(),
            batch_norm=True,
        ),
    ])
Пример #6
0
 def __init__(self, input_size, batch_size=None, activation_function=neon.Rectlin(), name="embedder"):
     self.name = name
     self.input_size = input_size
     self.batch_size = batch_size
     self.activation_function = activation_function
     self.weights_init = neon.GlorotInit()
     self.biases_init = neon.ConstantInit()
     self.input = None
     self.output = None
Пример #7
0
def model(action_axes):
    """
    Given the expected action axes, return a model mapping from observation to
    action axes for use by the dqn agent.
    """
    return neon.Sequential([
        neon.Preprocess(lambda x: x / 255.0),
        neon.Convolution(
            (8, 8, 32),
            neon.XavierInit(),
            strides=4,
            activation=neon.Rectlin(),
        ),
        neon.Convolution(
            (4, 4, 64),
            neon.XavierInit(),
            strides=2,
            activation=neon.Rectlin(),
        ),
        neon.Convolution(
            (3, 3, 64),
            neon.XavierInit(),
            strides=1,
            activation=neon.Rectlin(),
        ),
        neon.Affine(
            nout=512,
            weight_init=neon.XavierInit(),
            bias_init=neon.ConstantInit(),
            activation=neon.Rectlin(),
        ),
        neon.Affine(weight_init=neon.XavierInit(),
                    bias_init=neon.ConstantInit(),
                    activation=None,
                    axes=(action_axes, )),
    ])
Пример #8
0
def baselines_model(action_axes):
    return neon.Sequential([
        neon.Affine(
            nout=64,
            weight_init=neon.XavierInit(),
            bias_init=neon.ConstantInit(),
            activation=neon.Rectlin(),
            batch_norm=False,
        ),
        neon.Affine(
            axes=action_axes,
            weight_init=neon.XavierInit(),
            bias_init=neon.ConstantInit(),
            activation=None,
        ),
    ])