示例#1
0
    def __init__(self, dim_state: int, dim_action: int, hidden_sizes: List[int], normalizer: GaussianNormalizer,
                 init_std=1.):
        super().__init__()
        self.dim_state = dim_state
        self.dim_action = dim_action
        self.hidden_sizes = hidden_sizes
        self.init_std = init_std
        self.normalizer = normalizer
        with self.scope:
            self.op_states = tf.placeholder(tf.float32, shape=[None, dim_state], name='states')
            self.op_actions_ = tf.placeholder(tf.float32, shape=[None, dim_action], name='actions')

            layers = []
            # note that the placeholder has size 105.
            all_sizes = [dim_state, *self.hidden_sizes]
            for i, (in_features, out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])):
                layers.append(nn.Linear(in_features, out_features, weight_initializer=normc_initializer(1)))
                layers.append(nn.Tanh())
            layers.append(nn.Linear(all_sizes[-1], dim_action, weight_initializer=normc_initializer(0.01)))
            self.net = nn.Sequential(*layers)

            self.op_log_std = nn.Parameter(
                tf.constant(np.log(self.init_std), shape=[self.dim_action], dtype=tf.float32), name='log_std')

        self.distribution = self(self.op_states)
        self.op_actions = self.distribution.sample()
        self.op_actions_mean = self.distribution.mean()
        self.op_actions_std = self.distribution.stddev()
        self.op_nlls_ = -self.distribution.log_prob(self.op_actions_).reduce_sum(axis=1)
示例#2
0
    def __init__(self, dim_state: int, dim_action: int,
                 normalizers: Normalizers, *, arch: FLAGS.arch):
        super().__init__()
        initializer = tf.truncated_normal_initializer(mean=0.0, stddev=1e-5)

        self.dim_state = dim_state
        self.dim_action = dim_action
        self.op_states = tf.placeholder(tf.float32,
                                        shape=[None, self.dim_state],
                                        name='states')
        self.op_actions = tf.placeholder(tf.float32,
                                         shape=[None, self.dim_action],
                                         name='actions')
        self.mlp = nn.Sequential(
            nn.Linear(dim_state + dim_action,
                      arch.n_units,
                      weight_initializer=initializer),
            nn.ReLU(),
            make_blocks(FixupResBlock, arch.n_units, arch.n_blocks,
                        arch.n_blocks),
            nn.Linear(arch.n_units, dim_state, weight_initializer=initializer),
        )

        self.normalizers = normalizers
        self.build()
    def __init__(self,
                 blocks,
                 activation=nn.ReLU,
                 squeeze=False,
                 weight_initializer=None,
                 build=True):
        super().__init__()

        self._blocks = blocks
        if build:
            self.op_inputs = tf.placeholder(tf.float32,
                                            [None, self._blocks[0]])

        with self.scope:
            kwargs = {}
            if weight_initializer is not None:
                kwargs['weight_initializer'] = weight_initializer
            layers = []
            for in_features, out_features in zip(blocks[:-1], blocks[1:]):
                if layers:
                    layers.append(activation())
                layers.append(nn.Linear(in_features, out_features, **kwargs))
            if squeeze:
                layers.append(nn.Squeeze(axis=1))
            self.net = nn.Sequential(*layers)

        self._squeeze = squeeze
        self._activation = activation

        if build:
            self.build()
示例#4
0
    def __init__(self,
                 blocks,
                 activation,
                 squeeze=False,
                 weight_initializer=None,
                 output_activation=None):
        super().__init__()

        self._blocks = blocks

        with self.scope:
            kwargs = {}
            if weight_initializer is not None:
                kwargs['weight_initializer'] = weight_initializer
            layers = []
            for in_features, out_features in zip(blocks[:-1], blocks[1:]):
                if layers:
                    layers.append(activation())
                layers.append(nn.Linear(in_features, out_features, **kwargs))
            if squeeze:
                layers.append(nn.Squeeze(axis=-1))
            if output_activation:
                layers.append(output_activation())
            self._modules = {i: module for i, module in enumerate(layers)}

        self._squeeze = squeeze
        self._activation = activation
        self._built = False
示例#5
0
 def __init__(self, x, n_total_blocks):
     super().__init__()
     std = np.sqrt(2. / x / n_total_blocks)
     self.bias1a = nn.Parameter(tf.zeros(1), name='bias1a')
     self.fc1 = nn.Linear(x,
                          x,
                          bias=False,
                          weight_initializer=tf.initializers.random_normal(
                              0, stddev=std))
     self.bias1b = nn.Parameter(tf.zeros(1), name='bias1b')
     self.relu = nn.ReLU()
     self.bias2a = nn.Parameter(tf.zeros(1), name='bias2a')
     self.fc2 = nn.Linear(x,
                          x,
                          bias=False,
                          weight_initializer=tf.initializers.zeros())
     self.scale = nn.Parameter(tf.ones(1), name='scale')
     self.bias2b = nn.Parameter(tf.zeros(1), name='bias2b')
示例#6
0
 def __init__(self, n_params):
     '''
     self.goal_velocity:  [-1,1]
     '''
     super().__init__()
     with self.scope:
         layers = []
         layers.append(nn.Linear(1, n_params, bias=False, weight_initializer=normc_initializer(1.0)))
         if FLAGS.task.scaler == 'tanh':
             layers.append(nn.Tanh())
     self.net = nn.Sequential(*layers)
     c = tf.constant(1, shape=[1,1], dtype=tf.float32)
     #self.net(c) -> [None, n_params]
     if n_params == 1:
         self.goal_velocity = self.net(c)[0]
     else:
         self.goal_velocity = self.net(c)[0]
     print (self.goal_velocity)