示例#1
0
文件: singh_q.py 项目: namch29/dca
 def _build_net(self, inp, name):
     with tf.variable_scope('model/' + name) as scope:
         pad = tf.keras.layers.ZeroPadding2D((1, 1))
         out = pad(inp)
         conv2 = tf.keras.layers.LocallyConnected2D(
             filters=70,
             kernel_size=3,
             padding="valid",
             kernel_initializer=self.kern_init_conv(),
             use_bias=self.pp['conv_bias'],
             activation=None)(out)
         value = tf.layers.dense(inputs=conv2,
                                 units=1,
                                 kernel_initializer=self.kern_init_dense(),
                                 use_bias=False,
                                 name="value")
         assert (value.shape[-1] == 1)
         advantages = tf.layers.dense(
             inputs=conv2,
             units=self.n_channels,
             use_bias=False,
             kernel_initializer=self.kern_init_dense(),
             name="advantages")
         q_vals = value + (
             advantages - tf.reduce_mean(advantages, axis=1, keepdims=True))
         trainable_vars = get_trainable_vars(scope)
         print(q_vals.shape)
         return q_vals, trainable_vars
示例#2
0
 def _build_net(self, grid, name):
     with tf.variable_scope(name) as scope:
         conv1 = tf.layers.conv2d(
             inputs=grid,
             filters=self.n_channels,
             kernel_size=4,
             padding="same",
             kernel_initializer=self.kern_init_conv(),
             kernel_regularizer=self.regularizer,
             use_bias=True,  # Default setting
             activation=self.act_fn)
         conv2 = tf.layers.conv2d(
             inputs=conv1,
             filters=70,
             kernel_size=3,
             padding="same",
             kernel_initializer=self.kern_init_conv(),
             kernel_regularizer=self.regularizer,
             use_bias=True,
             activation=self.act_fn)
         flat = tf.layers.flatten(conv2)
         q_vals = tf.layers.dense(
             inputs=flat,
             units=self.n_channels,
             kernel_initializer=self.kern_init_dense(),
             kernel_regularizer=self.regularizer,
             use_bias=False,
             name="q_vals")
         trainable_vars_by_name = get_trainable_vars(scope)
     return q_vals, trainable_vars_by_name
示例#3
0
 def _build_net(self, inp, name):
     with tf.variable_scope('model/' + name) as scope:
         if self.pp['dueling_qnet']:
             value = tf.layers.dense(
                 inputs=inp,
                 units=1,
                 kernel_initializer=self.kern_init_dense(),
                 use_bias=False,
                 name="value")
             assert (value.shape[-1] == 1)
             advantages = tf.layers.dense(
                 inputs=inp,
                 units=self.n_channels,
                 use_bias=False,
                 kernel_initializer=self.kern_init_dense(),
                 name="advantages")
             q_vals = value + (
                 advantages - tf.reduce_mean(advantages, axis=1, keepdims=True))
             print("Dueling q-out shape:", q_vals.shape)
         else:
             q_vals = tf.layers.dense(
                 inputs=inp,
                 units=self.n_channels,
                 use_bias=False,
                 kernel_initializer=self.kern_init_dense(),
                 name="qvals")
         trainable_vars = get_trainable_vars(scope)
         return q_vals, trainable_vars
示例#4
0
    def build(self):
        frepshape = [None, self.rows, self.cols, self.n_channels + 1]
        self.freps = tf.placeholder(tf.float32, frepshape, "feature_reps")
        self.next_freps = tf.placeholder(tf.float32, frepshape, "next_feature_reps")
        self.rewards = tf.placeholder(tf.float32, [None], "rewards")
        self.discount = tf.placeholder(tf.float32, [None], "discount")
        self.dot = tf.placeholder(tf.float32, [None, 1], "dot")
        freps_rowvec = tf.layers.flatten(self.freps)
        next_freps_rowvec = tf.layers.flatten(self.next_freps)
        with tf.variable_scope('model/' + self.name) as scope:
            dense = tf.layers.Dense(
                units=1,
                kernel_initializer=tf.zeros_initializer(),
                kernel_regularizer=None,
                bias_initializer=tf.zeros_initializer(),
                use_bias=False,
                activation=None,
                name="vals")
            self.value = dense.apply(freps_rowvec)
            self.next_value = dense.apply(next_freps_rowvec)
            # online_vars = tuple(get_trainable_vars(scope).values())
            online_vars = get_trainable_vars(scope)

        self.td_err = self.rewards + self.discount * self.next_value - self.value

        trainer, self.lr, global_step = build_default_trainer(**self.pp)
        grads, trainable_vars = zip(*trainer.compute_gradients(self.td_err, online_vars))
        # grads = grads * self.dot  #
        grads = [grad * self.dot for grad in grads]
        self.do_train = trainer.apply_gradients(
            zip(grads, trainable_vars), global_step=global_step)
        return None, None
示例#5
0
文件: afterstate.py 项目: namch29/dca
    def build(self):
        # depth = self.n_channels * 2 if self.pp['grid_split'] else self.n_channels
        # depth = self.n_channels + 1
        depth = self.n_channels
        self.freps = tf.placeholder(
            tf.float32, [None, self.pp['rows'], self.pp['cols'], depth], "grids")
        self.value_target = tf.placeholder(tf.float32, [None, 1], "value_target")

        if self.pp['scale_freps']:
            frepshape = [None, self.rows, self.cols, self.n_channels + 1]
            mult1 = np.ones(frepshape[1:], np.float32)  # Scaling feature reps
            mult1[:, :, :-1] /= 43
            mult1[:, :, -1] /= 70
            inp = self.freps * tf.constant(mult1)
        else:
            inp = self.freps

        with tf.variable_scope('model/' + self.name) as scope:
            conv1 = tf.layers.conv2d(
                inputs=inp,
                filters=70,
                kernel_size=8,
                padding="same",
                kernel_initializer=self.kern_init_conv(),
                kernel_regularizer=self.regularizer,
                use_bias=True,
                activation=self.act_fn)
            # conv2 = tf.layers.conv2d(
            #     inputs=conv1,
            #     filters=140,
            #     kernel_size=4,
            #     kernel_initializer=self.kern_init_conv(),
            #     kernel_regularizer=self.regularizer,
            #     use_bias=True,
            #     activation=self.act_fn)
            self.value = tf.layers.dense(
                inputs=tf.layers.flatten(conv1),
                units=1,
                kernel_initializer=tf.zeros_initializer(),
                kernel_regularizer=None,
                bias_initializer=tf.zeros_initializer(),
                use_bias=True,
                activation=None,
                name="vals")
            online_vars = get_trainable_vars(scope)

        self.err = self.value_target - self.value
        self.loss = tf.losses.mean_squared_error(
            labels=self.value_target, predictions=self.value)
        return online_vars
示例#6
0
文件: qnet.py 项目: namch29/dca
 def _build_net(self, top_inp, cell, name):
     inp = self._build_base_net(top_inp, cell, name)
     with tf.variable_scope('model/' + name) as scope:
         if self.pp['dueling_qnet']:
             h1 = inp
             # h1 = tf.layers.dense(
             #     inputs=base_net,
             #     units=140,
             #     kernel_initializer=self.kern_init_dense(),
             #     use_bias=False,
             #     name="h1")
             value = tf.layers.dense(
                 inputs=h1,
                 units=1,
                 kernel_initializer=self.kern_init_dense(),
                 use_bias=False,
                 name="value")
             assert (value.shape[-1] == 1)
             advantages = tf.layers.dense(
                 inputs=h1,
                 units=self.n_channels,
                 use_bias=False,
                 kernel_initializer=self.kern_init_dense(),
                 name="advantages")
             # Avg. dueling supposedly more stable than max according to paper
             # Max Dueling
             # q_vals = value + (advantages - tf.reduce_max(
             #     advantages, axis=1, keepdims=True))
             # Average Dueling
             q_vals = value + (advantages - tf.reduce_mean(
                 advantages, axis=1, keepdims=True))
             if "online" in name:
                 self.online_advantages = advantages
             if "target" in name:
                 self.target_value = value
         elif self.pp['bighead']:
             q_vals = inp
         else:
             q_valsd = tf.layers.Dense(
                 units=self.n_channels,
                 kernel_initializer=self.kern_init_dense(),
                 kernel_regularizer=self.dense_regularizer,
                 use_bias=False,
                 name="q_vals")
             q_vals = q_valsd.apply(inp)
             self.weight_vars.append(q_valsd.kernel)
             self.weight_names.append(q_valsd.name)
         # Also includes vars from base net
         trainable_vars_by_name = get_trainable_vars(scope)
     return q_vals, trainable_vars_by_name
示例#7
0
文件: singh.py 项目: namch29/dca
 def _build_net(self, top_inp, name):
     dense_inp = self._build_pre_conv(top_inp,
                                      name) if self.pre_conv else top_inp
     with tf.variable_scope('model/' + name) as scope:
         value_layer = tf.layers.Dense(
             units=1,
             kernel_initializer=self.kern_init_dense,
             use_bias=False,
             activation=None)
         value = value_layer.apply(tf.layers.flatten(dense_inp))
         self.weight_vars.append(value_layer.kernel)
         self.weight_names.append(value_layer.name)
         trainable_vars = get_trainable_vars(scope)
     return value, trainable_vars
示例#8
0
文件: singh_ppo.py 项目: namch29/dca
 def _build_vnet(self, freps, name):
     with tf.variable_scope('model/' + name) as scope:
         value_layer = tf.layers.Dense(
             units=1,
             kernel_initializer=tf.zeros_initializer(),
             kernel_regularizer=self.dense_regularizer,
             use_bias=False,
             activation=None)
         value = value_layer.apply(tf.layers.flatten(freps))
         self.weight_vars.append(value_layer.kernel)
         self.weight_names.append(value_layer.name)
         # NOTE TODO either gotta have 7x7x70 outputs, or input cell
         # also gotta think about a hidden layer before value/policy
         trainable_vars = get_trainable_vars(scope)
     return value, trainable_vars
示例#9
0
 def _build_net(self, top_inp, ncells, name):
     with tf.variable_scope('model/' + name) as scope:
         # print(top_inp.shape)
         # conv1 = self.add_conv_layer(top_inp, self.pp['conv_nfilters'][0],
         #                             self.pp['conv_kernel_sizes'][0])
         # conv1 = SeparableSplit(
         # conv1 = InPlaneSplit(
         #     kernel_size=self.pp['conv_kernel_sizes'][0],
         #     stride=1,
         #     use_bias=self.pp['conv_bias'],
         #     padding="SAME",
         #     kernel_initializer=self.kern_init_conv()).apply(top_inp)
         # conv2 = InPlaneSplit(
         #     kernel_size=self.pp['conv_kernel_sizes'][1],
         #     stride=1,
         #     use_bias=self.pp['conv_bias'],
         #     padding="SAME",
         #     kernel_initializer=self.kern_init_conv()).apply(conv1)
         # conv = separable_conv2d(
         #     inp=top_inp,
         #     kernel_size=self.pp['conv_kernel_sizes'][0],
         #     stride=1,
         #     padding="SAME",
         #     kernel_initializer=self.kern_init_conv())
         pad = tf.keras.layers.ZeroPadding2D((1, 1))
         out = pad(top_inp)
         conv1 = tf.keras.layers.LocallyConnected2D(
             filters=70,
             kernel_size=self.pp['conv_kernel_sizes'][0],
             padding="valid",
             kernel_initializer=self.kern_init_dense(),
             use_bias=False,
             activation=tf.nn.relu)(out)
         pad = tf.keras.layers.ZeroPadding2D((1, 1))
         out = pad(conv1)
         conv3 = tf.keras.layers.LocallyConnected2D(
             filters=70,
             kernel_size=self.pp['conv_kernel_sizes'][-1],
             padding="valid",
             kernel_initializer=self.kern_init_dense(),
             use_bias=False,
             activation=tf.nn.relu)(out)
         print(conv3.shape)
         q_vals = tf.gather_nd(conv3, ncells)
         trainable_vars_by_name = get_trainable_vars(scope)
     return q_vals, trainable_vars_by_name
示例#10
0
 def _build_net(self, grid, cell, name):
     base_net = self._build_base_net(grid, cell, name)
     with tf.variable_scope(name) as scope:
         hidden = tf.layers.dense(base_net, units=128, activation=tf.nn.relu)
         # Output layers for policy and value estimations
         policy = tf.layers.dense(
             hidden,
             units=self.n_channels,
             activation=tf.nn.softmax,
             kernel_initializer=nutils.normalized_columns_initializer(0.01))
         value = tf.layers.dense(
             hidden,
             units=1,
             activation=None,
             kernel_initializer=nutils.normalized_columns_initializer(1.0))
         trainable_vars_by_name = get_trainable_vars(scope)
     return policy, value, trainable_vars_by_name
示例#11
0
文件: singh_ac.py 项目: namch29/dca
 def _build_net(self, freps, name):
     with tf.variable_scope('model/' + name) as scope:
         if self.pre_conv:
             dense_inp = self.add_conv_layer(
                 freps, self.pp['conv_nfilters'][0],
                 self.pp['conv_kernel_sizes'][0])
         else:
             dense_inp = freps
         h = self.add_dense_layer(dense_inp, 70,
                                  normalized_columns_initializer(0.01))
         value = self.add_dense_layer(h, 1,
                                      normalized_columns_initializer(0.01))
         policy = self.add_dense_layer(h, 70,
                                       normalized_columns_initializer(0.01),
                                       tf.nn.softmax)
         trainable_vars = get_trainable_vars(scope)
         # Output layers for policy and value estimations
     return value, policy, trainable_vars
示例#12
0
文件: rnn_qnet.py 项目: namch29/dca
 def _build_head(self, inp, name):
     with tf.variable_scope('model/' + name) as scope:
         if self.pp['dueling_qnet']:
             h1 = inp
             # h1 = tf.layers.dense(
             #     inputs=base_net,
             #     units=140,
             #     kernel_initializer=self.kern_init_dense(),
             #     use_bias=False,
             #     name="h1")
             value = tf.layers.dense(
                 inputs=h1,
                 units=1,
                 kernel_initializer=self.kern_init_dense(),
                 use_bias=False,
                 name="value")
             advantages = tf.layers.dense(
                 inputs=h1,
                 units=self.n_channels,
                 use_bias=False,
                 kernel_initializer=self.kern_init_dense(),
                 name="advantages")
             # Avg. dueling supposedly more stable than max according to paper
             # Max Dueling
             # q_vals = value + (advantages - tf.reduce_max(
             #     advantages, axis=1, keepdims=True))
             # Average Dueling
             q_vals = value + (advantages - tf.reduce_mean(
                 advantages, axis=1, keepdims=True))
             if "online" in name:
                 self.advantages = advantages
             # if "target" in name:
             #     self.value = value
         else:
             q_vals = tf.layers.dense(
                 inputs=inp,
                 units=self.n_channels,
                 kernel_initializer=self.kern_init_dense(),
                 kernel_regularizer=self.regularizer,
                 use_bias=False,
                 name="q_vals")
         trainable_vars_by_name = get_trainable_vars(scope)
     return q_vals, trainable_vars_by_name
示例#13
0
    def _build_net(self, top_inps):
        with tf.variable_scope('model/' + self.name) as scope:
            # conv = DepthwiseConv2D(self.depth, self.pp['conv_kernel_sizes'][0])
            conv1 = tf.layers.Conv2D(
                filters=self.pp['conv_nfilters'][0],
                kernel_size=self.pp['conv_kernel_sizes'][0],
                padding='SAME',
                kernel_initializer=self.kern_init_conv(),
                kernel_regularizer=self.conv_regularizer,
                use_bias=self.pp['conv_bias'],
                bias_initializer=tf.constant_initializer(0.1),
                activation=self.act_fn,
                name="vconv",
                _reuse=False)
            conv2 = tf.layers.Conv2D(
                filters=self.pp['conv_nfilters'][1],
                kernel_size=self.pp['conv_kernel_sizes'][1],
                padding='SAME',
                kernel_initializer=self.kern_init_conv(),
                kernel_regularizer=self.conv_regularizer,
                use_bias=self.pp['conv_bias'],
                bias_initializer=tf.constant_initializer(0.1),
                activation=self.act_fn,
                name="vconv2",
                _reuse=False)
            value_layer = tf.layers.Dense(
                units=1,
                kernel_initializer=self.kern_init_dense(),
                use_bias=False,
                activation=None)

            val = value_layer.apply(
                tf.layers.flatten(conv2.apply(conv1.apply(top_inps[0]))))
            nval = value_layer.apply(
                tf.layers.flatten(conv2.apply(conv1.apply(top_inps[1]))))

            self.weight_vars.append(value_layer.kernel)
            self.weight_names.append(value_layer.name)
            # self.weight_vars.append(conv.filters)
            # self.weight_names.append(conv.name)
            trainable_vars_by_name = get_trainable_vars(scope)
        return val, nval, trainable_vars_by_name
示例#14
0
文件: singh_ppo.py 项目: namch29/dca
 def _build_pnet(self, freps, name):
     with tf.variable_scope('model/' + name) as scope:
         # policy = tf.keras.layers.LocallyConnected2D(
         #     filters=70,
         #     kernel_size=1,
         #     padding="valid",
         #     kernel_initializer=tf.zeros_initializer(),
         #     use_bias=self.pp['conv_bias'],
         #     activation=None)(freps)
         # print(policy.shape)
         policy_layer = tf.layers.Dense(
             units=70,
             kernel_initializer=tf.zeros_initializer(),
             kernel_regularizer=self.dense_regularizer,
             use_bias=False,
             activation=None)
         policy = policy_layer.apply(tf.layers.flatten(freps))
         # self.weight_vars.append(policy_layer.kernel)
         # self.weight_names.append(policy_layer.name)
         trainable_vars = get_trainable_vars(scope)
     return policy, trainable_vars
示例#15
0
    def build(self):
        # frepshape = [None, self.rows, self.cols, self.n_channels * 3 + 1]
        self.frep = tf.placeholder(tf.int32, [None, *self.frepshape],
                                   "feature_reps")
        self.grads = tf.placeholder(tf.float32, [self.wdim, 1], "grad_corr")

        frep = tf.cast(self.frep, tf.float32)
        if self.grid_inp:
            grid_depth = 2 * self.n_channels
            self.grid = tf.placeholder(
                tf.bool, [None, self.rows, self.cols, grid_depth], "grid")
            grid = tf.cast(self.grid, tf.float32)
            top_inp = tf.concat([grid, frep], axis=3)
            self.depth = self.frepshape[-1] + grid_depth
        else:
            top_inp = frep
            self.depth = self.frepshape[-1]

        with tf.variable_scope('model/' + self.name) as scope:
            self.value = tf.layers.dense(
                inputs=tf.layers.flatten(top_inp),
                units=1,
                kernel_initializer=tf.zeros_initializer(),
                kernel_regularizer=None,
                bias_initializer=tf.zeros_initializer(),
                use_bias=False,
                activation=None,
                name="vals")
            online_vars = tuple(get_trainable_vars(scope).values())
        self.grads = [(tf.placeholder(tf.float32,
                                      [self.wdim, 1]), online_vars[0])]

        trainer, self.lr, global_step = build_default_trainer(**self.pp)
        self.do_train = trainer.apply_gradients(self.grads,
                                                global_step=global_step)
        return None, None