Пример #1
0
    def __init__(self, dim_state: int, dim_action: int, hidden_sizes: List[int], normalizer: GaussianNormalizer,
                 init_std=1.):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes
        self.init_std = init_std
        self.normalizer = normalizer
        with self.scope:
            self.op_states = tf.placeholder(tf.float32, shape=[None, dim_state], name='states')
            self.op_actions_ = tf.placeholder(tf.float32, shape=[None, dim_action], name='actions')

            layers = []
            all_sizes = [dim_state, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.Tanh())
            layers.append(FCLayer(all_sizes[-1], dim_action, init_scale=0.01))
            self.net = nn.Sequential(*layers)

            self.op_log_std = nn.Parameter(
                tf.constant(np.log(self.init_std), shape=[self.dim_action], dtype=tf.float32), name='log_std')

        self.distribution = self(self.op_states)
        self.op_actions = self.distribution.sample()
        self.op_actions_mean = self.distribution.mean()
        self.op_actions_std = self.distribution.stddev()
        self.op_mse_loss = tf.reduce_mean(tf.square(self.op_actions_ - self.op_actions_mean))
        self.op_nlls_ = -self.distribution.log_prob(self.op_actions_).reduce_sum(axis=1)
    def __init__(self, state_spec, action_spec, hidden_sizes=(64, 64)):
        super().__init__()
        self.state_spec = state_spec
        self.action_spec = action_spec
        self.hidden_sizes = hidden_sizes

        self.op_states = tf.placeholder(state_spec.dtype,
                                        [None, *state_spec.shape], 'states')
        self.op_actions_ = tf.placeholder(action_spec.dtype,
                                          [None, *action_spec.shape],
                                          'actions')

        all_sizes = [state_spec.shape[0], *hidden_sizes]
        layer = []
        for nin, nh in zip(all_sizes[:-1], all_sizes[1:]):
            layer.append(FCLayer(nin, nh, init_scale=np.sqrt(2)))
            layer.append(nn.Tanh())
        self.mlp_net = nn.Sequential(*layer)
        self.pi_net = FCLayer(all_sizes[-1], action_spec.n, init_scale=0.01)
        self.q_net = FCLayer(all_sizes[-1], action_spec.n)

        pi_logits, q_values, = self.forward(self.op_states)
        self.pd = CategoricalPd(pi_logits)
        self.op_actions = self.pd.sample()
        self.op_actions_mean = self.pd.mode()
        self.op_mus = tf.nn.softmax(pi_logits)
        self.op_v_values = tf.reduce_sum(self.op_mus * q_values, axis=-1)
        self.op_nlls = self.pd.neglogp(self.op_actions)
        self.op_q_values = get_by_index(q_values, self.op_actions)
        self.op_q_values_ = get_by_index(q_values, self.op_actions_)
    def __init__(self, dim_state, dim_action, hidden_sizes):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes

        with self.scope:
            self.op_states = tf.placeholder(tf.float32, shape=[None, dim_state])
            self.op_actions = tf.placeholder(tf.float32, shape=[None, dim_action])
            layers = []
            all_sizes = [dim_state + dim_action, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.ReLU())
            layers.append(FCLayer(all_sizes[-1], 1))
            self.net1 = nn.Sequential(*layers)
            layers = []
            all_sizes = [dim_state + dim_action, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.ReLU())
            layers.append(FCLayer(all_sizes[-1], 1))
            self.net2 = nn.Sequential(*layers)

        self.op_q1, self.op_q2 = self.forward(self.op_states, self.op_actions)
Пример #4
0
    def __init__(self,
                 dim_state: int,
                 dim_action: int,
                 hidden_sizes: List[int],
                 normalizer: Normalizers,
                 save_normalizer=False):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes
        # this avoid to save normalizer into self.state_dict
        self.state_process_fn = lambda states_: normalizer.state(states_)
        self.action_process_fn = lambda actions_: actions_
        if save_normalizer:
            self.normalizer = normalizer

        with self.scope:
            self.op_states = tf.placeholder(tf.float32, [None, dim_state],
                                            "state")
            self.op_actions = tf.placeholder(tf.float32, [None, dim_action],
                                             "action")

            layers = []
            all_sizes = [dim_state + dim_action, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(
                    zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.ReLU())
            layers.append(FCLayer(all_sizes[-1], 1))
            self.net = nn.Sequential(*layers)

            self.op_logits = self(self.op_states, self.op_actions)
            self.op_rewards = -tf.log(1 - tf.nn.sigmoid(self.op_logits) + 1e-6)
    def __init__(self, state_spec, action_spec):
        super().__init__()

        self.state_spec = state_spec
        self.action_spec = action_spec

        self.op_states = tf.placeholder(state_spec.dtype,
                                        [None, *state_spec.shape], 'states')
        self.op_actions_ = tf.placeholder(action_spec.dtype,
                                          [None, *action_spec.shape],
                                          'actions')

        self.cnn_net = NatureCNN(state_spec.shape[-1])
        self.pi_net = FCLayer(nin=512, nh=self.action_spec.n, init_scale=0.01)
        self.q_net = FCLayer(nin=512, nh=self.action_spec.n)

        pi_logits, q_values, = self.forward(self.op_states)
        self.pd = CategoricalPd(pi_logits)
        self.op_actions = self.pd.sample()
        self.op_actions_mean = self.pd.mode()
        self.op_mus = tf.nn.softmax(pi_logits)
        self.op_v_values = tf.reduce_sum(self.op_mus * q_values, axis=-1)
        self.op_nlls = self.pd.neglogp(self.op_actions)
        self.op_q_values = get_by_index(q_values, self.op_actions)
        self.op_q_values_ = get_by_index(q_values, self.op_actions_)
Пример #6
0
    def __init__(self, dim_state, hidden_sizes, normalizer=None):
        super().__init__()
        self.hidden_sizes = hidden_sizes

        layers = []
        all_sizes = [dim_state, *self.hidden_sizes]
        for i, (in_features,
                out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])):
            layers.append(FCLayer(in_features, out_features))
            layers.append(nn.Tanh())
        layers.append(FCLayer(all_sizes[-1], 1))
        self.net = nn.Sequential(*layers)
        self.normalizer = normalizer
        self.op_states = tf.placeholder(tf.float32, shape=[None, dim_state])
        self.op_values = self.forward(self.op_states)
Пример #7
0
    def __init__(self,
                 dim_state: int,
                 dim_action: int,
                 hidden_sizes: List[int],
                 state_process_fn,
                 action_process_fn,
                 activ_fn='none'):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes
        # this avoid to save normalizer into self.state_dict
        self.state_process_fn = state_process_fn
        self.action_process_fn = action_process_fn

        with self.scope:
            self.op_states = tf.placeholder(tf.float32, [None, dim_state],
                                            "state")
            self.op_actions = tf.placeholder(tf.float32, [None, dim_action],
                                             "action")
            self.op_next_states = tf.placeholder(tf.float32, [None, dim_state],
                                                 "next_state")

            layers = []
            all_sizes = [dim_state * 2 + dim_action, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(
                    zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.ReLU())
            layers.append(FCLayer(all_sizes[-1], 1))
            if activ_fn == 'none':
                pass
            elif activ_fn == 'sigmoid':
                layers.append(nn.Sigmoid())
            elif activ_fn == 'tanh':
                layers.append(nn.Tanh())
            else:
                raise ValueError('%s is not supported' % activ_fn)
            self.net = nn.Sequential(*layers)

            self.op_logits = self(self.op_states, self.op_actions,
                                  self.op_next_states)
            self.op_rewards = -tf.log(1 - tf.nn.sigmoid(self.op_logits) + 1e-6)
Пример #8
0
    def __init__(self, dim_state, dim_action, hidden_sizes: List[int]):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes

        with self.scope:
            self.op_states = tf.placeholder(tf.float32,
                                            shape=[None, dim_state],
                                            name='states')

            layers = []
            all_sizes = [dim_state, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(
                    zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.ReLU())
            layers.append(FCLayer(all_sizes[-1], dim_action, init_scale=0.01))
            layers.append(nn.Tanh())
            self.net = nn.Sequential(*layers)
            self.op_actions = self(self.op_states)
Пример #9
0
    def __init__(self, dim_state: int, dim_action: int, hidden_sizes: List[int]):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes
        with self.scope:
            self.op_states = tf.placeholder(tf.float32, shape=[None, dim_state], name='states')
            self.op_actions_ = tf.placeholder(tf.float32, shape=[None, dim_action], name='actions')

            layers = []
            all_sizes = [dim_state, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.ReLU())
            layers.append(FCLayer(all_sizes[-1], dim_action*2))
            self.net = nn.Sequential(*layers)

        self.op_actions, self.op_log_density, pd, self.op_dist_mean, self.op_dist_log_std = self(self.op_states)
        self.op_actions_mean = tf.tanh(self.op_dist_mean)
        pi_ = tf.atanh(clip_but_pass_gradient(self.op_actions_, -1+EPS, 1-EPS))
        log_prob_pi_ = pd.log_prob(pi_).reduce_sum(axis=1)
        log_prob_pi_ -= tf.reduce_sum(tf.log(1 - self.op_actions_ ** 2 + EPS), axis=1)
        self.op_log_density_ = log_prob_pi_
Пример #10
0
    def __init__(self,
                 dim_state: int,
                 dim_action: int,
                 hidden_sizes: List[int],
                 normalizers: Normalizers,
                 output_diff=False,
                 init_std=1.):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes
        self.output_diff = output_diff
        self.init_std = init_std
        self.normalizers = normalizers
        with self.scope:
            self.op_states = tf.placeholder(tf.float32,
                                            shape=[None, dim_state],
                                            name='states')
            self.op_actions = tf.placeholder(tf.float32,
                                             shape=[None, dim_action],
                                             name='actions')
            self.op_next_states_ = tf.placeholder(tf.float32,
                                                  shape=[None, dim_state],
                                                  name='next_states')

            layers = []
            all_sizes = [dim_state + dim_action, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(
                    zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(FCLayer(in_features, out_features))
                layers.append(nn.Tanh())
            layers.append(FCLayer(all_sizes[-1], dim_state, init_scale=0.01))
            self.net = nn.Sequential(*layers)

            self.op_log_std = nn.Parameter(tf.constant(np.log(self.init_std),
                                                       shape=[self.dim_state],
                                                       dtype=tf.float32),
                                           name='log_std')

            self.distribution = self(self.op_states, self.op_actions)
            self.op_next_states_std = self.distribution.stddev()
            if self.output_diff:
                self.op_next_states_mean = self.op_states + self.normalizers.diff(
                    self.distribution.mean(), inverse=True)
                self.op_next_states = self.op_states + self.normalizers.diff(
                    tf.clip_by_value(
                        self.distribution.sample(),
                        self.distribution.mean() -
                        3 * self.distribution.stddev(),
                        self.distribution.mean() +
                        3 * self.distribution.stddev()),
                    inverse=True)
            else:
                self.op_next_states_mean = self.normalizers.state(
                    self.distribution.mean(), inverse=True)
                self.op_next_states = self.normalizers.state(tf.clip_by_value(
                    self.distribution.sample(),
                    self.distribution.mean() - 3 * self.distribution.stddev(),
                    self.distribution.mean() + 3 * self.distribution.stddev()),
                                                             inverse=True)
            self.op_mse_loss = tf.reduce_mean(
                tf.square(
                    self.normalizers.state(self.op_next_states_) -
                    self.normalizers.state(self.op_next_states_mean), ))