Python ortho_init示例，stable_baselines.a2c.utils.ortho_init Python示例

示例#1

0

显示文件

文件： utils.py 项目： Breakend/rl-baselines-zoo-1

def DiagonalwiseRefactorization(x,
                                in_channels,
                                stride=1,
                                groups=4,
                                is_training=True,
                                scope='depthwise',
                                kernel_size=3):
    with tf.variable_scope(scope):
        channels = int(in_channels / groups)
        mask = tf.constant(get_mask(channels, kernel_size).tolist(),
                           dtype=tf.float32,
                           shape=(kernel_size, kernel_size, channels,
                                  channels))
        splitw = [
            tf.get_variable('weights_%d' % _,
                            (kernel_size, kernel_size, channels, channels),
                            initializer=ortho_init(init_scale))
            for _ in range(groups)
        ]
        splitw = [tf.multiply(w, mask) for w in splitw]
        splitx = tf.split(x, groups, 1)
        splitx = [
            tf.nn.conv2d(x,
                         w, (1, 1, stride, stride),
                         'SAME',
                         data_format='NCHW') for x, w in zip(splitx, splitw)
        ]
        x = tf.concat(splitx, 1)
        x = tf.nn.relu(x)
        return x

示例#2

0

显示文件

文件： utils.py 项目： Breakend/rl-baselines-zoo-1

def Pointwise(x,
              in_channels,
              out_channels,
              is_training=True,
              scope='pointwise'):
    with tf.variable_scope(scope):
        w = tf.get_variable('weights', (1, 1, in_channels, out_channels),
                            initializer=ortho_init(init_scale))
        x = tf.nn.conv2d(x, w, (1, 1, 1, 1), 'SAME', data_format='NCHW')
        x = tf.nn.relu(x)
        return x

示例#3

0

显示文件

文件： ppo2_model_weight.py 项目： sun-te/robotics-rl-srl

def linear(input_tensor, scope, n_hidden, *, init_scale=1.0, init_bias=0.0):
    """
    Creates a fully connected layer for TensorFlow

    :param input_tensor: (TensorFlow Tensor) The input tensor for the fully connected layer
    :param scope: (str) The TensorFlow variable scope
    :param n_hidden: (int) The number of hidden neurons
    :param init_scale: (int) The initialization scale
    :param init_bias: (int) The initialization offset bias
    :return: (TensorFlow Tensor) fully connected layer
    """
    with tf.variable_scope(scope):
        n_input = input_tensor.get_shape()[1].value
        weight = tf.get_variable("w", [n_input, n_hidden], initializer=ortho_init(init_scale))
        bias = tf.get_variable("b", [n_hidden], initializer=tf.constant_initializer(init_bias))
        return tf.matmul(input_tensor, weight) + bias

示例#4

0

显示文件

文件： utils.py 项目： Breakend/rl-baselines-zoo-1

def Depthwise(x,
              in_channels,
              stride=1,
              is_training=True,
              scope='depthwise',
              kernel_size=3):
    with tf.variable_scope(scope):
        w = tf.get_variable('weights',
                            (kernel_size, kernel_size, in_channels, 1),
                            initializer=ortho_init(init_scale))
        x = tf.nn.depthwise_conv2d(x,
                                   w, (1, 1, stride, stride),
                                   'SAME',
                                   data_format='NCHW')
        x = tf.nn.relu(x)
        return x

示例#5

0

显示文件

    def build_linear_layer(self, input_tensor, scope, n_hidden, *, init_scale=1.0, init_bias=0.0):
        """
        Creates a fully connected layer for TensorFlow

        :param input_tensor: (TensorFlow Tensor) The input tensor for the fully connected layer
        :param scope: (str) The TensorFlow variable scope
        :param n_hidden: (int) The number of hidden neurons
        :param init_scale: (int) The initialization scale
        :param init_bias: (int) The initialization offset bias
        :return: (TensorFlow Tensor) fully connected layer
        """
        with tf.variable_scope(scope):
            n_input = input_tensor.get_shape()[1].value
            weight = tf.get_variable("w", [n_input, n_hidden], initializer=ortho_init(init_scale),
                                     regularizer= (tf.keras.regularizers.l2(cfg.l2_coef)
                                     if cfg.is_mod(cfg.MOD_L2_REG) else None))
            bias = tf.get_variable("b", [n_hidden], initializer=tf.constant_initializer(init_bias))
            return tf.matmul(input_tensor, weight) + bias

示例#6

0

显示文件

文件： utils.py 项目： Breakend/rl-baselines-zoo-1

def Conv3x3(x,
            in_channels,
            out_channels,
            stride=1,
            is_training=True,
            scope='convolution',
            kernel_size=3):
    with tf.variable_scope(scope):
        w = tf.get_variable('weight',
                            shape=(kernel_size, kernel_size, in_channels,
                                   out_channels),
                            initializer=ortho_init(init_scale))
        x = tf.nn.conv2d(x,
                         w, (1, 1, stride, stride),
                         'SAME',
                         data_format='NCHW')
        x = tf.nn.relu(x)
        return x

示例#7

0

显示文件

文件： _constants.py 项目： tu2id4n/pmm

enemies = [(train_idx + 1) % 4, (train_idx + 3) % 4]
enemies.sort()
random_explore = True
update_eps = 0.2
pgn = False
n_actions = 122

# replay_buffer
hindsight = False
her_size = 30
her_K = 4

# dfp_policy
import tensorflow as tf
from stable_baselines.a2c.utils import ortho_init

init_scale = 1
conv_init = ortho_init(init_scale)
# conv_init = tf.glorot_normal_initializer()
linear_init = tf.glorot_normal_initializer()

# dfp
buffer_size = 50000
learning_starts = 10000
exploration_final_eps = 0.2
exploration_fraction = 0.05
gamma = 0.99
batch_size = 64
lr_decay_step = 5e5
decay_rate = 0.3

示例#8

0

显示文件

文件： transfers.py 项目： pkulium/CISR_NeurIPS20

def init_weights(scale, shape):
    init_function = ortho_init(scale)
    return init_function(shape)

示例#9

0

显示文件

文件： models.py 项目： utkuoguzman/graph_rl

    def __init__(self,
                 num_processing_steps=None,
                 latent_size=None,
                 n_layers=None,
                 edge_output_size=None,
                 node_output_size=None,
                 global_output_size=None,
                 reducer=None,
                 out_init_scale=5.0,
                 name="AggregationNet"):
        super(AggregationDiffNet, self).__init__(name=name)

        if num_processing_steps is None:
            self._proc_hops = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        else:
            self._proc_hops = num_processing_steps

        if reducer is None or reducer == 'max':
            reducer = unsorted_segment_max_or_zero
        elif reducer == 'logsumexp':
            reducer = segment_logsumexp
        elif reducer == 'softmax':
            reducer = segment_transformer
        elif reducer == 'sum':
            reducer = tf.math.unsorted_segment_sum
        else:
            raise ValueError('Unkown reducer!')

        if latent_size is None:
            latent_size = 16

        if n_layers is None:
            n_layers = 2

        self._num_processing_steps = len(self._proc_hops)
        self._n_stacked = latent_size * self._num_processing_steps

        def make_mlp():
            return snt.nets.MLP([latent_size] * n_layers, activate_final=True)

        # def make_linear():
        #     return snt.nets.MLP([latent_size], activate_final=False)

        self._core = modules.GraphNetwork(
            edge_model_fn=make_mlp,
            node_model_fn=make_mlp,
            global_model_fn=make_mlp,
            edge_block_opt={'use_globals': False},
            node_block_opt={
                'use_globals': False,
                'use_sent_edges': False
            },
            name="graph_net",
            reducer=reducer)

        self._encoder = modules.GraphIndependent(make_mlp,
                                                 make_mlp,
                                                 make_mlp,
                                                 name="encoder")
        self._decoder = modules.GraphIndependent(make_mlp,
                                                 make_mlp,
                                                 make_mlp,
                                                 name="decoder")

        inits = {
            'w': ortho_init(out_init_scale),
            'b': tf.constant_initializer(0.0)
        }

        # Transforms the outputs into the appropriate shapes.
        edge_fn = None if edge_output_size is None else lambda: snt.Linear(
            edge_output_size, initializers=inits, name="edge_output")
        node_fn = None if node_output_size is None else lambda: snt.Linear(
            node_output_size, initializers=inits, name="node_output")
        global_fn = None if global_output_size is None else lambda: snt.Linear(
            global_output_size, initializers=inits, name="global_output")

        with self._enter_variable_scope():
            self._output_transform = modules.GraphIndependent(edge_fn,
                                                              node_fn,
                                                              global_fn,
                                                              name="output")

示例#10

0

显示文件

    def __init__(self,
                 num_processing_steps=None,
                 latent_size=None,
                 n_layers=None,
                 edge_output_size=None,
                 node_output_size=None,
                 global_output_size=None,
                 reducer=None,
                 out_init_scale=5.0,
                 name="AggregationNet"):
        super(AggregationNet, self).__init__(name=name)

        if num_processing_steps is None:
            self._num_processing_steps = 5
        else:
            self._num_processing_steps = num_processing_steps

        if reducer is None or reducer == 'max':
            reducer = unsorted_segment_max_or_zero
        elif reducer == 'mean':
            reducer = tf.math.unsorted_segment_mean
        elif reducer == 'sum':
            reducer = tf.math.unsorted_segment_sum
        else:
            raise ValueError('Unknown reducer!')

        if latent_size is None:
            latent_size = 16

        if n_layers is None:
            n_layers = 2

        def make_mlp():
            return snt.nets.MLP([latent_size] * n_layers, activate_final=True)

        if self._num_processing_steps > 0:
            # Edge block copies the node features onto the edges.
            core_a = blocks.EdgeBlock(edge_model_fn=lambda: Identity(),
                                      use_edges=False,
                                      use_receiver_nodes=False,
                                      use_sender_nodes=True,
                                      use_globals=False,
                                      name='LinearNodeAggGCN_core_a')

            # Then, edge data is aggregated onto the node by the reducer function.
            core_b = blocks.NodeBlock(node_model_fn=lambda: Identity(),
                                      use_received_edges=True,
                                      use_sent_edges=False,
                                      use_nodes=False,
                                      use_globals=False,
                                      received_edges_reducer=reducer,
                                      name='LinearNodeAggGCN_core_b')

            self._cores = [core_a, core_b]

        self._encoder = modules.GraphIndependent(make_mlp,
                                                 make_mlp,
                                                 make_mlp,
                                                 name="encoder")
        self._decoder = modules.GraphIndependent(make_mlp,
                                                 make_mlp,
                                                 make_mlp,
                                                 name="decoder")

        inits = {
            'w': ortho_init(out_init_scale),
            'b': tf.constant_initializer(0.0)
        }

        # Transforms the outputs into the appropriate shapes.
        edge_fn = None if edge_output_size is None else lambda: snt.Linear(
            edge_output_size, initializers=inits, name="edge_output")
        node_fn = None if node_output_size is None else lambda: snt.Linear(
            node_output_size, initializers=inits, name="node_output")
        global_fn = None if global_output_size is None else lambda: snt.Linear(
            global_output_size, initializers=inits, name="global_output")
        with self._enter_variable_scope():
            self._output_transform = modules.GraphIndependent(edge_fn,
                                                              node_fn,
                                                              global_fn,
                                                              name="output")

示例#11

0

显示文件

    def __init__(self,
                 num_processing_steps=None,
                 latent_size=None,
                 n_layers=None,
                 edge_output_size=None,
                 node_output_size=None,
                 global_output_size=None,
                 reducer=None,
                 out_init_scale=5.0,
                 name="AggregationNet"):
        super(NonLinearGraphNet, self).__init__(name=name)

        if num_processing_steps is None:
            self._num_processing_steps = 5
        else:
            self._num_processing_steps = num_processing_steps

        if reducer is None or reducer == 'max':
            reducer = unsorted_segment_max_or_zero
        elif reducer == 'mean':
            reducer = tf.math.unsorted_segment_mean
        elif reducer == 'sum':
            reducer = tf.math.unsorted_segment_sum
        else:
            raise ValueError('Unknown reducer!')

        if latent_size is None:
            latent_size = 16

        if n_layers is None:
            n_layers = 2

        def make_mlp():
            return snt.nets.MLP([latent_size] * n_layers, activate_final=False)

        if self._num_processing_steps > 0:
            # Edge model f^e(v_sender, v_receiver, e)     -   in the linear linear model, f^e = v_sender
            # Average over all the received edge features to get e'
            # Node model f^v(v, e'), but in the linear model, it was just f^v = e'
            self._core = modules.GraphNetwork(
                edge_model_fn=make_mlp,
                node_model_fn=make_mlp,
                global_model_fn=make_mlp,
                edge_block_opt={'use_globals': False},
                node_block_opt={
                    'use_globals': False,
                    'use_sent_edges': False
                },
                name="graph_net",
                reducer=reducer)

        self._encoder = modules.GraphIndependent(make_mlp,
                                                 make_mlp,
                                                 make_mlp,
                                                 name="encoder")
        self._decoder = modules.GraphIndependent(make_mlp,
                                                 make_mlp,
                                                 make_mlp,
                                                 name="decoder")

        inits = {
            'w': ortho_init(out_init_scale),
            'b': tf.constant_initializer(0.0)
        }

        # Transforms the outputs into the appropriate shapes.
        edge_fn = None if edge_output_size is None else lambda: snt.Linear(
            edge_output_size, initializers=inits, name="edge_output")
        node_fn = None if node_output_size is None else lambda: snt.Linear(
            node_output_size, initializers=inits, name="node_output")
        global_fn = None if global_output_size is None else lambda: snt.Linear(
            global_output_size, initializers=inits, name="global_output")
        with self._enter_variable_scope():
            self._output_transform = modules.GraphIndependent(edge_fn,
                                                              node_fn,
                                                              global_fn,
                                                              name="output")

示例#12

0

显示文件

文件： policies.py 项目： zhougroup/IDAC

def mlp_extractor(flat_observations, net_arch, act_fun, layer_norm=False):
    """
    Constructs an MLP that receives observations as an input and outputs a latent representation for the policy and
    a value network. The ``net_arch`` parameter allows to specify the amount and size of the hidden layers and how many
    of them are shared between the policy network and the value network. It is assumed to be a list with the following
    structure:

    1. An arbitrary length (zero allowed) number of integers each specifying the number of units in a shared layer.
       If the number of ints is zero, there will be no shared layers.
    2. An optional dict, to specify the following non-shared layers for the value network and the policy network.
       It is formatted like ``dict(vf=[<value layer sizes>], pi=[<policy layer sizes>])``.
       If it is missing any of the keys (pi or vf), no non-shared layers (empty list) is assumed.

    For example to construct a network with one shared layer of size 55 followed by two non-shared layers for the value
    network of size 255 and a single non-shared layer of size 128 for the policy network, the following layers_spec
    would be used: ``[55, dict(vf=[255, 255], pi=[128])]``. A simple shared network topology with two layers of size 128
    would be specified as [128, 128].

    :param flat_observations: (tf.Tensor) The observations to base policy and value function on.
    :param net_arch: ([int or dict]) The specification of the policy and value networks.
        See above for details on its formatting.
    :param act_fun: (tf function) The activation function to use for the networks.
    :return: (tf.Tensor, tf.Tensor) latent_policy, latent_value of the specified network.
        If all layers are shared, then ``latent_policy == latent_value``
    """
    latent = flat_observations
    policy_only_layers = []  # Layer sizes of the network that only belongs to the policy network
    value_only_layers = []  # Layer sizes of the network that only belongs to the value network

    # Iterate through the shared layers and build the shared parts of the network
    for idx, layer in enumerate(net_arch):
        if isinstance(layer, int):  # Check that this is a shared layer
            layer_size = layer
            latent = act_fun(linear(latent, "shared_fc{}".format(idx), layer_size, init_scale=np.sqrt(2)))
        else:
            assert isinstance(layer, dict), "Error: the net_arch list can only contain ints and dicts"
            if 'pi' in layer:
                assert isinstance(layer['pi'], list), "Error: net_arch[-1]['pi'] must contain a list of integers."
                policy_only_layers = layer['pi']
            if 'vf' in layer:
                assert isinstance(layer['vf'], list), "Error: net_arch[-1]['vf'] must contain a list of integers."
                value_only_layers = layer['vf']
            break  # From here on the network splits up in policy and value network

    # Build the non-shared part of the network
    latent_policy = latent
    latent_value = latent
    for idx, (pi_layer_size, vf_layer_size) in enumerate(zip_longest(policy_only_layers, value_only_layers)):
        if pi_layer_size is not None:
            assert isinstance(pi_layer_size, int), "Error: net_arch[-1]['pi'] must only contain integers."
            if layer_norm:
                with tf.variable_scope("pi_fc{}".format(idx)):
                    n_input = latent_policy.get_shape()[1].value
                    weight = tf.get_variable("w", [n_input, pi_layer_size], initializer=ortho_init(np.sqrt(2)))
                    bias = tf.get_variable("b", [pi_layer_size], initializer=tf.constant_initializer(np.sqrt(2)))
                    pi_h = tf.matmul(latent_policy, weight) + bias
                    pi_h = tf.contrib.layers.layer_norm(pi_h, center=True, scale=True)
                latent_policy = act_fun(pi_h)
            else:
                latent_policy = act_fun(linear(latent_policy, "pi_fc{}".format(idx), pi_layer_size, init_scale=np.sqrt(2)))
        if vf_layer_size is not None:
            assert isinstance(vf_layer_size, int), "Error: net_arch[-1]['vf'] must only contain integers."
            latent_value = act_fun(linear(latent_value, "vf_fc{}".format(idx), vf_layer_size, init_scale=np.sqrt(2)))

    return latent_policy, latent_value

示例#13

0

显示文件

文件： utils.py 项目： Breakend/rl-baselines-zoo-1

def separable_conv(input_tensor,
                   scope,
                   *,
                   n_filters,
                   filter_size,
                   stride,
                   pad='VALID',
                   channel_multiplier=1,
                   init_scale=1.0,
                   data_format='NHWC',
                   one_dim_bias=False):
    """
    Creates a 2d convolutional layer for TensorFlow
    :param input_tensor: (TensorFlow Tensor) The input tensor for the convolution
    :param scope: (str) The TensorFlow variable scope
    :param n_filters: (int) The number of filters
    :param filter_size:  (Union[int, [int], tuple<int, int>]) The filter size for the squared kernel matrix,
    or the height and width of kernel filter if the input is a list or tuple
    :param stride: (int) The stride of the convolution
    :param pad: (str) The padding type ('VALID' or 'SAME')
    :param init_scale: (int) The initialization scale
    :param data_format: (str) The data format for the convolution weights
    :param one_dim_bias: (bool) If the bias should be one dimentional or not
    :return: (TensorFlow Tensor) 2d convolutional layer
    """
    if isinstance(filter_size, list) or isinstance(filter_size, tuple):
        assert len(filter_size) == 2, \
            "Filter size must have 2 elements (height, width), {} were given".format(len(filter_size))
        filter_height = filter_size[0]
        filter_width = filter_size[1]
    else:
        filter_height = filter_size
        filter_width = filter_size
    if data_format == 'NHWC':
        channel_ax = 3
        strides = [1, stride, stride, 1]
        bshape = [1, 1, 1, n_filters]
    elif data_format == 'NCHW':
        channel_ax = 1
        strides = [1, 1, stride, stride]
        bshape = [1, n_filters, 1, 1]
    else:
        raise NotImplementedError
    bias_var_shape = [n_filters] if one_dim_bias else [1, n_filters, 1, 1]
    n_input = input_tensor.get_shape()[channel_ax].value
    wshape = [filter_height, filter_width, n_input, n_filters]
    with tf.variable_scope(scope):
        depthwise_filter = tf.get_variable(shape=(filter_height, filter_width,
                                                  n_input, channel_multiplier),
                                           name="deptwise_filter",
                                           initializer=ortho_init(init_scale))
        pointwise_filter = tf.get_variable(
            shape=[1, 1, channel_multiplier * n_input, n_filters],
            name="pointwise_filter",
            initializer=ortho_init(init_scale))
        bias = tf.get_variable("b",
                               bias_var_shape,
                               initializer=tf.constant_initializer(0.0))
        if not one_dim_bias and data_format == 'NHWC':
            bias = tf.reshape(bias, bshape)

        output = tf.nn.separable_conv2d(input_tensor,
                                        depthwise_filter,
                                        pointwise_filter,
                                        strides=strides,
                                        padding=pad,
                                        data_format=data_format)
        return bias + output

示例#14

0

显示文件

    def proba_distribution_from_latent(self, pi_latent_vector, vf_latent_vector, init_scale=1.0, init_bias=0.0):

        master_W, master_b = get_aggregation_var(pi_latent_vector, 'master', self.sources_actions.get_shape()[1],
                                                 self.n_cat, no_bias=self.no_bias,
                                                 SDW=self.SDW, bias_layer_initializer=ortho_init(init_scale),
                                                 summary=self.summary)

        pdparam = affine_transformation(self.sources_actions, master_W, master_b, summary=self.summary)
        q_values = linear(vf_latent_vector, 'q', self.n_cat, init_scale=init_scale, init_bias=init_bias)

        return self.proba_distribution_from_flat(pdparam), pdparam, q_values

示例#15

0

显示文件

    def proba_distribution_from_latent(self, pi_latent_vector, vf_latent_vector, init_scale=1.0, init_bias=0.0):

        master_W, master_b = get_aggregation_var(pi_latent_vector, 'master', self.sources_actions.get_shape()[1],
                                                 self.sources_actions.get_shape()[2], no_bias=self.no_bias,
                                                 SDW=self.SDW, bias_layer_initializer=ortho_init(init_scale),
                                                 summary=self.summary)
        mean = affine_transformation(self.sources_actions, master_W, master_b, summary=self.summary)

        logstd = tf.get_variable(name='pi/logstd', shape=[1, self.size], initializer=tf.zeros_initializer())
        pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
        q_values = linear(vf_latent_vector, 'q', self.size, init_scale=init_scale, init_bias=init_bias)

        return self.proba_distribution_from_flat(pdparam), mean, q_values