def _build_net(self, inp, name): with tf.variable_scope('model/' + name) as scope: pad = tf.keras.layers.ZeroPadding2D((1, 1)) out = pad(inp) conv2 = tf.keras.layers.LocallyConnected2D( filters=70, kernel_size=3, padding="valid", kernel_initializer=self.kern_init_conv(), use_bias=self.pp['conv_bias'], activation=None)(out) value = tf.layers.dense(inputs=conv2, units=1, kernel_initializer=self.kern_init_dense(), use_bias=False, name="value") assert (value.shape[-1] == 1) advantages = tf.layers.dense( inputs=conv2, units=self.n_channels, use_bias=False, kernel_initializer=self.kern_init_dense(), name="advantages") q_vals = value + ( advantages - tf.reduce_mean(advantages, axis=1, keepdims=True)) trainable_vars = get_trainable_vars(scope) print(q_vals.shape) return q_vals, trainable_vars
def _build_net(self, grid, name): with tf.variable_scope(name) as scope: conv1 = tf.layers.conv2d( inputs=grid, filters=self.n_channels, kernel_size=4, padding="same", kernel_initializer=self.kern_init_conv(), kernel_regularizer=self.regularizer, use_bias=True, # Default setting activation=self.act_fn) conv2 = tf.layers.conv2d( inputs=conv1, filters=70, kernel_size=3, padding="same", kernel_initializer=self.kern_init_conv(), kernel_regularizer=self.regularizer, use_bias=True, activation=self.act_fn) flat = tf.layers.flatten(conv2) q_vals = tf.layers.dense( inputs=flat, units=self.n_channels, kernel_initializer=self.kern_init_dense(), kernel_regularizer=self.regularizer, use_bias=False, name="q_vals") trainable_vars_by_name = get_trainable_vars(scope) return q_vals, trainable_vars_by_name
def _build_net(self, inp, name): with tf.variable_scope('model/' + name) as scope: if self.pp['dueling_qnet']: value = tf.layers.dense( inputs=inp, units=1, kernel_initializer=self.kern_init_dense(), use_bias=False, name="value") assert (value.shape[-1] == 1) advantages = tf.layers.dense( inputs=inp, units=self.n_channels, use_bias=False, kernel_initializer=self.kern_init_dense(), name="advantages") q_vals = value + ( advantages - tf.reduce_mean(advantages, axis=1, keepdims=True)) print("Dueling q-out shape:", q_vals.shape) else: q_vals = tf.layers.dense( inputs=inp, units=self.n_channels, use_bias=False, kernel_initializer=self.kern_init_dense(), name="qvals") trainable_vars = get_trainable_vars(scope) return q_vals, trainable_vars
def build(self): frepshape = [None, self.rows, self.cols, self.n_channels + 1] self.freps = tf.placeholder(tf.float32, frepshape, "feature_reps") self.next_freps = tf.placeholder(tf.float32, frepshape, "next_feature_reps") self.rewards = tf.placeholder(tf.float32, [None], "rewards") self.discount = tf.placeholder(tf.float32, [None], "discount") self.dot = tf.placeholder(tf.float32, [None, 1], "dot") freps_rowvec = tf.layers.flatten(self.freps) next_freps_rowvec = tf.layers.flatten(self.next_freps) with tf.variable_scope('model/' + self.name) as scope: dense = tf.layers.Dense( units=1, kernel_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_initializer=tf.zeros_initializer(), use_bias=False, activation=None, name="vals") self.value = dense.apply(freps_rowvec) self.next_value = dense.apply(next_freps_rowvec) # online_vars = tuple(get_trainable_vars(scope).values()) online_vars = get_trainable_vars(scope) self.td_err = self.rewards + self.discount * self.next_value - self.value trainer, self.lr, global_step = build_default_trainer(**self.pp) grads, trainable_vars = zip(*trainer.compute_gradients(self.td_err, online_vars)) # grads = grads * self.dot # grads = [grad * self.dot for grad in grads] self.do_train = trainer.apply_gradients( zip(grads, trainable_vars), global_step=global_step) return None, None
def build(self): # depth = self.n_channels * 2 if self.pp['grid_split'] else self.n_channels # depth = self.n_channels + 1 depth = self.n_channels self.freps = tf.placeholder( tf.float32, [None, self.pp['rows'], self.pp['cols'], depth], "grids") self.value_target = tf.placeholder(tf.float32, [None, 1], "value_target") if self.pp['scale_freps']: frepshape = [None, self.rows, self.cols, self.n_channels + 1] mult1 = np.ones(frepshape[1:], np.float32) # Scaling feature reps mult1[:, :, :-1] /= 43 mult1[:, :, -1] /= 70 inp = self.freps * tf.constant(mult1) else: inp = self.freps with tf.variable_scope('model/' + self.name) as scope: conv1 = tf.layers.conv2d( inputs=inp, filters=70, kernel_size=8, padding="same", kernel_initializer=self.kern_init_conv(), kernel_regularizer=self.regularizer, use_bias=True, activation=self.act_fn) # conv2 = tf.layers.conv2d( # inputs=conv1, # filters=140, # kernel_size=4, # kernel_initializer=self.kern_init_conv(), # kernel_regularizer=self.regularizer, # use_bias=True, # activation=self.act_fn) self.value = tf.layers.dense( inputs=tf.layers.flatten(conv1), units=1, kernel_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_initializer=tf.zeros_initializer(), use_bias=True, activation=None, name="vals") online_vars = get_trainable_vars(scope) self.err = self.value_target - self.value self.loss = tf.losses.mean_squared_error( labels=self.value_target, predictions=self.value) return online_vars
def _build_net(self, top_inp, cell, name): inp = self._build_base_net(top_inp, cell, name) with tf.variable_scope('model/' + name) as scope: if self.pp['dueling_qnet']: h1 = inp # h1 = tf.layers.dense( # inputs=base_net, # units=140, # kernel_initializer=self.kern_init_dense(), # use_bias=False, # name="h1") value = tf.layers.dense( inputs=h1, units=1, kernel_initializer=self.kern_init_dense(), use_bias=False, name="value") assert (value.shape[-1] == 1) advantages = tf.layers.dense( inputs=h1, units=self.n_channels, use_bias=False, kernel_initializer=self.kern_init_dense(), name="advantages") # Avg. dueling supposedly more stable than max according to paper # Max Dueling # q_vals = value + (advantages - tf.reduce_max( # advantages, axis=1, keepdims=True)) # Average Dueling q_vals = value + (advantages - tf.reduce_mean( advantages, axis=1, keepdims=True)) if "online" in name: self.online_advantages = advantages if "target" in name: self.target_value = value elif self.pp['bighead']: q_vals = inp else: q_valsd = tf.layers.Dense( units=self.n_channels, kernel_initializer=self.kern_init_dense(), kernel_regularizer=self.dense_regularizer, use_bias=False, name="q_vals") q_vals = q_valsd.apply(inp) self.weight_vars.append(q_valsd.kernel) self.weight_names.append(q_valsd.name) # Also includes vars from base net trainable_vars_by_name = get_trainable_vars(scope) return q_vals, trainable_vars_by_name
def _build_net(self, top_inp, name): dense_inp = self._build_pre_conv(top_inp, name) if self.pre_conv else top_inp with tf.variable_scope('model/' + name) as scope: value_layer = tf.layers.Dense( units=1, kernel_initializer=self.kern_init_dense, use_bias=False, activation=None) value = value_layer.apply(tf.layers.flatten(dense_inp)) self.weight_vars.append(value_layer.kernel) self.weight_names.append(value_layer.name) trainable_vars = get_trainable_vars(scope) return value, trainable_vars
def _build_vnet(self, freps, name): with tf.variable_scope('model/' + name) as scope: value_layer = tf.layers.Dense( units=1, kernel_initializer=tf.zeros_initializer(), kernel_regularizer=self.dense_regularizer, use_bias=False, activation=None) value = value_layer.apply(tf.layers.flatten(freps)) self.weight_vars.append(value_layer.kernel) self.weight_names.append(value_layer.name) # NOTE TODO either gotta have 7x7x70 outputs, or input cell # also gotta think about a hidden layer before value/policy trainable_vars = get_trainable_vars(scope) return value, trainable_vars
def _build_net(self, top_inp, ncells, name): with tf.variable_scope('model/' + name) as scope: # print(top_inp.shape) # conv1 = self.add_conv_layer(top_inp, self.pp['conv_nfilters'][0], # self.pp['conv_kernel_sizes'][0]) # conv1 = SeparableSplit( # conv1 = InPlaneSplit( # kernel_size=self.pp['conv_kernel_sizes'][0], # stride=1, # use_bias=self.pp['conv_bias'], # padding="SAME", # kernel_initializer=self.kern_init_conv()).apply(top_inp) # conv2 = InPlaneSplit( # kernel_size=self.pp['conv_kernel_sizes'][1], # stride=1, # use_bias=self.pp['conv_bias'], # padding="SAME", # kernel_initializer=self.kern_init_conv()).apply(conv1) # conv = separable_conv2d( # inp=top_inp, # kernel_size=self.pp['conv_kernel_sizes'][0], # stride=1, # padding="SAME", # kernel_initializer=self.kern_init_conv()) pad = tf.keras.layers.ZeroPadding2D((1, 1)) out = pad(top_inp) conv1 = tf.keras.layers.LocallyConnected2D( filters=70, kernel_size=self.pp['conv_kernel_sizes'][0], padding="valid", kernel_initializer=self.kern_init_dense(), use_bias=False, activation=tf.nn.relu)(out) pad = tf.keras.layers.ZeroPadding2D((1, 1)) out = pad(conv1) conv3 = tf.keras.layers.LocallyConnected2D( filters=70, kernel_size=self.pp['conv_kernel_sizes'][-1], padding="valid", kernel_initializer=self.kern_init_dense(), use_bias=False, activation=tf.nn.relu)(out) print(conv3.shape) q_vals = tf.gather_nd(conv3, ncells) trainable_vars_by_name = get_trainable_vars(scope) return q_vals, trainable_vars_by_name
def _build_net(self, grid, cell, name): base_net = self._build_base_net(grid, cell, name) with tf.variable_scope(name) as scope: hidden = tf.layers.dense(base_net, units=128, activation=tf.nn.relu) # Output layers for policy and value estimations policy = tf.layers.dense( hidden, units=self.n_channels, activation=tf.nn.softmax, kernel_initializer=nutils.normalized_columns_initializer(0.01)) value = tf.layers.dense( hidden, units=1, activation=None, kernel_initializer=nutils.normalized_columns_initializer(1.0)) trainable_vars_by_name = get_trainable_vars(scope) return policy, value, trainable_vars_by_name
def _build_net(self, freps, name): with tf.variable_scope('model/' + name) as scope: if self.pre_conv: dense_inp = self.add_conv_layer( freps, self.pp['conv_nfilters'][0], self.pp['conv_kernel_sizes'][0]) else: dense_inp = freps h = self.add_dense_layer(dense_inp, 70, normalized_columns_initializer(0.01)) value = self.add_dense_layer(h, 1, normalized_columns_initializer(0.01)) policy = self.add_dense_layer(h, 70, normalized_columns_initializer(0.01), tf.nn.softmax) trainable_vars = get_trainable_vars(scope) # Output layers for policy and value estimations return value, policy, trainable_vars
def _build_head(self, inp, name): with tf.variable_scope('model/' + name) as scope: if self.pp['dueling_qnet']: h1 = inp # h1 = tf.layers.dense( # inputs=base_net, # units=140, # kernel_initializer=self.kern_init_dense(), # use_bias=False, # name="h1") value = tf.layers.dense( inputs=h1, units=1, kernel_initializer=self.kern_init_dense(), use_bias=False, name="value") advantages = tf.layers.dense( inputs=h1, units=self.n_channels, use_bias=False, kernel_initializer=self.kern_init_dense(), name="advantages") # Avg. dueling supposedly more stable than max according to paper # Max Dueling # q_vals = value + (advantages - tf.reduce_max( # advantages, axis=1, keepdims=True)) # Average Dueling q_vals = value + (advantages - tf.reduce_mean( advantages, axis=1, keepdims=True)) if "online" in name: self.advantages = advantages # if "target" in name: # self.value = value else: q_vals = tf.layers.dense( inputs=inp, units=self.n_channels, kernel_initializer=self.kern_init_dense(), kernel_regularizer=self.regularizer, use_bias=False, name="q_vals") trainable_vars_by_name = get_trainable_vars(scope) return q_vals, trainable_vars_by_name
def _build_net(self, top_inps): with tf.variable_scope('model/' + self.name) as scope: # conv = DepthwiseConv2D(self.depth, self.pp['conv_kernel_sizes'][0]) conv1 = tf.layers.Conv2D( filters=self.pp['conv_nfilters'][0], kernel_size=self.pp['conv_kernel_sizes'][0], padding='SAME', kernel_initializer=self.kern_init_conv(), kernel_regularizer=self.conv_regularizer, use_bias=self.pp['conv_bias'], bias_initializer=tf.constant_initializer(0.1), activation=self.act_fn, name="vconv", _reuse=False) conv2 = tf.layers.Conv2D( filters=self.pp['conv_nfilters'][1], kernel_size=self.pp['conv_kernel_sizes'][1], padding='SAME', kernel_initializer=self.kern_init_conv(), kernel_regularizer=self.conv_regularizer, use_bias=self.pp['conv_bias'], bias_initializer=tf.constant_initializer(0.1), activation=self.act_fn, name="vconv2", _reuse=False) value_layer = tf.layers.Dense( units=1, kernel_initializer=self.kern_init_dense(), use_bias=False, activation=None) val = value_layer.apply( tf.layers.flatten(conv2.apply(conv1.apply(top_inps[0])))) nval = value_layer.apply( tf.layers.flatten(conv2.apply(conv1.apply(top_inps[1])))) self.weight_vars.append(value_layer.kernel) self.weight_names.append(value_layer.name) # self.weight_vars.append(conv.filters) # self.weight_names.append(conv.name) trainable_vars_by_name = get_trainable_vars(scope) return val, nval, trainable_vars_by_name
def _build_pnet(self, freps, name): with tf.variable_scope('model/' + name) as scope: # policy = tf.keras.layers.LocallyConnected2D( # filters=70, # kernel_size=1, # padding="valid", # kernel_initializer=tf.zeros_initializer(), # use_bias=self.pp['conv_bias'], # activation=None)(freps) # print(policy.shape) policy_layer = tf.layers.Dense( units=70, kernel_initializer=tf.zeros_initializer(), kernel_regularizer=self.dense_regularizer, use_bias=False, activation=None) policy = policy_layer.apply(tf.layers.flatten(freps)) # self.weight_vars.append(policy_layer.kernel) # self.weight_names.append(policy_layer.name) trainable_vars = get_trainable_vars(scope) return policy, trainable_vars
def build(self): # frepshape = [None, self.rows, self.cols, self.n_channels * 3 + 1] self.frep = tf.placeholder(tf.int32, [None, *self.frepshape], "feature_reps") self.grads = tf.placeholder(tf.float32, [self.wdim, 1], "grad_corr") frep = tf.cast(self.frep, tf.float32) if self.grid_inp: grid_depth = 2 * self.n_channels self.grid = tf.placeholder( tf.bool, [None, self.rows, self.cols, grid_depth], "grid") grid = tf.cast(self.grid, tf.float32) top_inp = tf.concat([grid, frep], axis=3) self.depth = self.frepshape[-1] + grid_depth else: top_inp = frep self.depth = self.frepshape[-1] with tf.variable_scope('model/' + self.name) as scope: self.value = tf.layers.dense( inputs=tf.layers.flatten(top_inp), units=1, kernel_initializer=tf.zeros_initializer(), kernel_regularizer=None, bias_initializer=tf.zeros_initializer(), use_bias=False, activation=None, name="vals") online_vars = tuple(get_trainable_vars(scope).values()) self.grads = [(tf.placeholder(tf.float32, [self.wdim, 1]), online_vars[0])] trainer, self.lr, global_step = build_default_trainer(**self.pp) self.do_train = trainer.apply_gradients(self.grads, global_step=global_step) return None, None