def linear(x, out_size, do_bias=True, alpha=1.0, identity_if_possible=False, normalized=False, name=None, collections=None): """Linear (affine) transformation, y = x W + b, for a variety of configurations. Args: x: input The tensor to tranformation. out_size: The integer size of non-batch output dimension. do_bias (optional): Add a learnable bias vector to the operation. alpha (optional): A multiplicative scaling for the weight initialization of the matrix, in the form \alpha * 1/\sqrt{x.shape[1]}. identity_if_possible (optional): just return identity, if x.shape[1] == out_size. normalized (optional): Option to divide out by the norms of the rows of W. name (optional): The name prefix to add to variables. collections (optional): List of additional collections. (Placed in tf.GraphKeys.GLOBAL_VARIABLES already, so no need for that.) Returns: In the equation, y = x W + b, returns the tensorflow op that yields y. """ in_size = int(x.get_shape()[1]) # from Dimension(10) -> 10 stddev = alpha / np.sqrt(float(in_size)) mat_init = tf.random_normal_initializer(0.0, stddev) wname = (name + "/W") if name else "/W" if identity_if_possible and in_size == out_size: # Sometimes linear layers are nothing more than size adapters. return tf.identity(x, name=(wname + '_ident')) W, b = init_linear(in_size, out_size, do_bias=do_bias, alpha=alpha, normalized=normalized, name=name, collections=collections) if do_bias: return tf.matmul(x, W) + b else: return tf.matmul(x, W)
def build(self, image): image = self.image_conversion_scaling(image) conv1_1 = self.conv2d_depth_or_not(image, "conv1_1", nonlinearity=tf.nn.relu) conv1_2 = self.conv2d_depth_or_not(conv1_1, "conv1_2", nonlinearity=tf.nn.relu) pool1 = tf.nn.max_pool(conv1_2, ksize=cnn_param.pool_window, strides=cnn_param.pool_stride, padding='SAME', name='pool1') conv2_1 = self.conv2d_depth_or_not(pool1, "conv2_1", nonlinearity=tf.nn.relu) conv2_2 = self.conv2d_depth_or_not(conv2_1, "conv2_2", nonlinearity=tf.nn.relu) pool2 = tf.nn.max_pool(conv2_2, ksize=cnn_param.pool_window, strides=cnn_param.pool_stride, padding='SAME', name='pool2') conv3_1 = self.conv2d_depth_or_not(pool2, "conv3_1", nonlinearity=tf.nn.relu) conv3_2 = self.conv2d_depth_or_not(conv3_1, "conv3_2", nonlinearity=tf.nn.relu) conv3_3 = self.conv2d_depth_or_not(conv3_2, "conv3_3", nonlinearity=tf.nn.relu) pool3 = tf.nn.max_pool(conv3_3, ksize=cnn_param.pool_window, strides=cnn_param.pool_stride, padding='SAME', name='pool3') conv4_1 = self.conv2d_depth_or_not(pool3, "conv4_1", nonlinearity=tf.nn.relu) conv4_2 = self.conv2d_depth_or_not(conv4_1, "conv4_2", nonlinearity=tf.nn.relu) conv4_3 = self.conv2d_depth_or_not(conv4_2, "conv4_3", nonlinearity=tf.nn.relu) pool4 = tf.nn.max_pool(conv4_3, ksize=cnn_param.pool_window, strides=cnn_param.pool_stride, padding='SAME', name='pool4') conv5_1 = self.conv2d_depth_or_not(pool4, "conv5_1", nonlinearity=tf.nn.relu) conv5_2 = self.conv2d_depth_or_not(conv5_1, "conv5_2", nonlinearity=tf.nn.relu) conv5_3 = self.conv2d_depth_or_not(conv5_2, "conv5_3", nonlinearity=tf.nn.relu) # feature wise convolution layers, no non-linearity conv_depth_1 = self.conv2d_depth_or_not(conv5_3, "conv6_1") # two layer of feature-wise convolution, a cubic feature transformation conv_depth = self.conv2d_depth_or_not(conv_depth_1, "depth") # this is a replcement of last FCL layer from VGG (common in GAP & GMP models) # this layer does not have non-nonlinearity conv_last = self.conv2d_depth_or_not(conv_depth, "conv6") gap = tf.reduce_mean(conv_last, [1,2]) with tf.variable_scope("GAP",reuse=tf.AUTO_REUSE): gap_w = tf.get_variable("W", shape=cnn_param.layer_shapes['GAP/W'], initializer=tf.random_normal_initializer(stddev=hyper.stddev)) class_prob = tf.matmul(gap, gap_w) # print_model_params() return conv_last, gap, class_prob
def nn_layer(input_tensor, input_dim, output_dim, act=_tf.nn.tanh, initial_bias=None, name='layer', precision=_tf.float32): with _tf.variable_scope(name): weights = _tf.get_variable('w', dtype=precision, shape=[input_dim, output_dim], initializer=_tf.random_normal_initializer( stddev=1. / _np.sqrt(input_dim), dtype=precision), collections=[ _tf.GraphKeys.MODEL_VARIABLES, _tf.GraphKeys.REGULARIZATION_LOSSES, _tf.GraphKeys.GLOBAL_VARIABLES ]) if initial_bias is None: biases = _tf.get_variable('b', dtype=precision, shape=[output_dim], initializer=_tf.constant_initializer( 0.0, dtype=precision), collections=[ _tf.GraphKeys.MODEL_VARIABLES, _tf.GraphKeys.GLOBAL_VARIABLES ]) else: biases = _tf.get_variable('b', dtype=precision, shape=[output_dim], initializer=_tf.constant_initializer( initial_bias, dtype=precision), collections=[ _tf.GraphKeys.MODEL_VARIABLES, _tf.GraphKeys.GLOBAL_VARIABLES ]) preactivate = _tf.nn.xw_plus_b(input_tensor, weights, biases) if act is None: activations = preactivate else: activations = act(preactivate) _tf.summary.histogram('weights', weights) _tf.summary.histogram('biases', biases) _tf.summary.histogram('activations', activations) return activations, weights, biases
def _build_net(self, s, a, scope, trainable): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): init_weights = tf.random_normal_initializer(0., 0.01) init_bias = tf.constant_initializer(0.1) n_l1 = 1024 w1_s = tf.get_variable('w1_s', [self.s_dim, n_l1], initializer=init_weights, trainable=trainable) w1_a = tf.get_variable('w1_a', [self.a_dim, n_l1], initializer=init_weights, trainable=trainable) b1 = tf.get_variable('b1', [1, n_l1], initializer=init_bias, trainable=trainable) net_1 = tf.nn.leaky_relu( (tf.matmul(s, w1_s) + tf.matmul(a, w1_a) + b1), alpha=0.01) net_2 = tf.layers.dense(net_1, 512, kernel_initializer=init_weights, bias_initializer=init_bias, name='l2', trainable=trainable) net_2 = tf.nn.leaky_relu(net_2, alpha=0.01) net_3 = tf.layers.dense(net_2, 256, kernel_initializer=init_weights, bias_initializer=init_bias, name='l3', trainable=trainable) net_3 = tf.nn.leaky_relu(net_3, alpha=0.01) net_4 = tf.layers.dense(net_3, 128, kernel_initializer=init_weights, bias_initializer=init_bias, name='l4', trainable=trainable) net_4 = tf.nn.leaky_relu(net_4, alpha=0.01) with tf.variable_scope('q', reuse=tf.AUTO_REUSE): q = tf.layers.dense(net_4, 1, kernel_initializer=init_weights, bias_initializer=init_bias, trainable=trainable) return q
def __init__(self): self.sess = tf.Session() self.tfs = tf.placeholder(tf.float32, [None, S_DIM], 'state') # critic w_init = tf.random_normal_initializer(0., .1) lc = tf.layers.dense(self.tfs, 200, tf.nn.relu, kernel_initializer=w_init, name='lc') self.v = tf.layers.dense(lc, 1) self.tfdc_r = tf.placeholder(tf.float32, [None, 1], 'discounted_r') self.advantage = self.tfdc_r - self.v self.closs = tf.reduce_mean(tf.square(self.advantage)) self.ctrain_op = tf.train.AdamOptimizer(C_LR).minimize(self.closs) # actor self.pi, pi_params = self._build_anet('pi', trainable=True) oldpi, oldpi_params = self._build_anet('oldpi', trainable=False) self.update_oldpi_op = [ oldp.assign(p) for p, oldp in zip(pi_params, oldpi_params) ] self.tfa = tf.placeholder(tf.int32, [ None, ], 'action') self.tfadv = tf.placeholder(tf.float32, [None, 1], 'advantage') a_indices = tf.stack( [tf.range(tf.shape(self.tfa)[0], dtype=tf.int32), self.tfa], axis=1) pi_prob = tf.gather_nd(params=self.pi, indices=a_indices) # shape=(None, ) oldpi_prob = tf.gather_nd(params=oldpi, indices=a_indices) # shape=(None, ) ratio = pi_prob / oldpi_prob surr = ratio * self.tfadv # surrogate loss self.aloss = -tf.reduce_mean( tf.minimum( # clipped surrogate objective surr, tf.clip_by_value(ratio, 1. - EPSILON, 1. + EPSILON) * self.tfadv)) self.atrain_op = tf.train.AdamOptimizer(A_LR).minimize(self.aloss) self.sess.run(tf.global_variables_initializer())
def class_net(images, level, num_classes, num_anchors, num_filters, is_training, separable_conv=True, repeats=4, survival_prob=None): """Class prediction network.""" if separable_conv: conv_op = functools.partial( tf.layers.separable_conv2d, depth_multiplier=1, pointwise_initializer=tf.initializers.variance_scaling(), depthwise_initializer=tf.initializers.variance_scaling()) else: conv_op = functools.partial( tf.layers.conv2d, kernel_initializer=tf.random_normal_initializer(stddev=0.01)) for i in range(repeats): orig_images = images images = conv_op(images, num_filters, kernel_size=3, bias_initializer=tf.zeros_initializer(), activation=None, padding='same', name='class-%d' % i) images = utils.batch_norm_relu(images, is_training, relu=True, init_zero=False, name='class-%d-bn-%d' % (i, level)) if i > 0 and survival_prob: images = utils.drop_connect(images, is_training, survival_prob) images = images + orig_images classes = conv_op( images, num_classes * num_anchors, kernel_size=3, bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)), padding='same', name='class-predict') return classes
def _batch_norm(self, x, name="bnorm"): """Batch normalization.""" with tf.variable_scope(name): params_shape = [x.get_shape()[-1]] moving_mean = tf.get_variable( "moving_mean", params_shape, tf.float32, initializer=tf.constant_initializer(0.0), trainable=False) moving_variance = tf.get_variable( "moving_variance", params_shape, tf.float32, initializer=tf.constant_initializer(self._bnorm_init_var), trainable=False, ) self.variables_list.append(moving_mean) self.variables_list.append(moving_variance) gamma = tf.get_variable( "gamma", params_shape, tf.float32, initializer=tf.random_normal_initializer( stddev=self._bnorm_init_gamma), ) self.variables_list.append(gamma) self.trainable_list.append(gamma) local_mean, local_variance = tf.nn.moments(x, [0, 1, 2], name="moments") mean, variance = tf.cond(self.falg_train, lambda: (local_mean, local_variance), lambda: (moving_mean, moving_variance)) self.extra_train.append( moving_mean.assign_sub( (1.0 - self._bnorm_decay) * (moving_mean - local_mean))) self.extra_train.append( moving_variance.assign_sub((1.0 - self._bnorm_decay) * (moving_variance - local_variance))) y = tf.nn.batch_normalization(x, mean, variance, None, gamma, self._bnorm_epsilon) y.set_shape(x.get_shape()) return y
def _build_net(self): # ------------------ build evaluate_net ------------------ self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target') # for calculating loss with tf.variable_scope('eval_net'): # c_names(collections_names) are the collections to store variables c_names, n_l1, w_initializer, b_initializer = \ ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 10, \ tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # config of layers # with tf.variable_scope('embedding'): # embeddings = tf.Variable(tf.random_uniform([VOCAB_LEN, EMBED_SIZE])) # embed = tf.nn.embedding_lookup(embeddings, words_ids) # first layer. collections is used later when assign to target net with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1) # second layer. collections is used later when assign to target net with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) self.q_eval = tf.matmul(l1, w2) + b2 with tf.variable_scope('loss'): self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval)) with tf.variable_scope('train'): self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) # ------------------ build target_net ------------------ self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input with tf.variable_scope('target_net'): # c_names(collections_names) are the collections to store variables c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] # first layer. collections is used later when assign to target net with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(self.s_, w1) + b1) # second layer. collections is used later when assign to target net with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) self.q_next = tf.matmul(l1, w2) + b2
def _build_net(self): def build_layers(s, c_names, n_l1, w_initializer, b_initializer): with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(s, w1) + b1) with tf.variable_scope('Q'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) out = tf.matmul(l1, w2) + b2 return out # -------------- 创建 eval 神经网络, 及时提升参数 -------------- self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # 用来接收 observation self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target') # 用来接收 q_target 的值, 这个之后会通过计算得到 # c_names(collections_names) 是在更新 target_net 参数时会用到 #定义W,b的初始值 #############################prioritized#################################################### if self.prioritized: self.ISWeights = tf.placeholder(tf.float32, [None, 1], name='IS_weights')#重要性采样权重 #############################prioritized#################################################### with tf.variable_scope('eval_net'): c_names, n_l1, w_initializer, b_initializer = \ ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 10, \ tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # config of layers self.q_eval = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer) with tf.variable_scope('loss'): # 求误差 #############################prioritized#################################################### if self.prioritized: self.abs_errors = tf.reduce_sum(tf.abs(self.q_target - self.q_eval), axis=1) # for updating Sumtree self.loss = tf.reduce_mean(self.ISWeights * tf.squared_difference(self.q_target, self.q_eval))#定义一个w乘在 loss 前,来根据抽到的概率改变 loss 的缩放程度。 #############################prioritized#################################################### else: self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval)) with tf.variable_scope('train'): # 梯度下降 self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) # ---------------- 创建 target 神经网络, 提供 target Q --------------------- self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # 接收下个 observation with tf.variable_scope('target_net'): #c_names(collections_names) 是在更新 target_net 参数时会用到 c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] self.q_next = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer)
def _atrous_conv(self, name, x, filter_size, in_filters, out_filters, dilation, pad='SAME'): '''2D膨胀卷积''' # 基于不同的输入和卷积核,定义膨胀卷积函数 convolve = lambda i, k: tf.nn.atrous_conv2d(i, k, dilation, padding=pad) with tf.variable_scope(name) as scope: n = filter_size * filter_size * out_filters # 获取或新建卷积核,正态随机初始化 kernel = tf.get_variable( 'DW', [filter_size, filter_size, in_filters, out_filters], tf.float32, initializer=tf.random_normal_initializer(stddev=0.01)) # np.sqrt(2.0/n))) output = convolve(x, kernel) return output
def __init__(self, input_, units, activation=tf.nn.relu, keep_prob=None, w_init=tf.random_normal_initializer(stddev=0.01), b_init=tf.constant_initializer(0.), name=""): self.activation = activation self.keep_prob = keep_prob self.w = tf.get_variable(shape=(int(input_.shape[-1]), units), name='w{}'.format(name), initializer=w_init) self.b = tf.get_variable(shape=(units, ), name='b{}'.format(name), initializer=b_init)
def keyboard_conv(x, n_rot, n_p, name='keyboard_conv', activation=None): x = tf.layers.conv2d( x, n_rot * n_p, (x.shape.as_list()[1], 3), name=name, padding='valid', activation=None, kernel_initializer=tf.zeros_initializer(), bias_initializer=tf.random_normal_initializer(0, 0.00001), ) X = [x[:, :, :, p * n_p:(p + 1) * n_p] for p in range(n_rot)] x = tf.concat(X, axis=1) if activation is not None: x = activation(x) return x
def conv2d_specnorm( input_, num_outputs, kernel_size=[4, 4], stride=[1, 1], pad='SAME', if_bias=True, trainable=True, reuse=False, scope='conv2d', weight_initializer=None, bias_initializer=None, u_weight=None, weight_initializer_type=tf.random_normal_initializer(stddev=0.02)): print(scope) with tf.variable_scope(scope, reuse=reuse): if weight_initializer is None: print("Initializing weights") w = tf.get_variable(name='weights', shape=kernel_size + [input_.get_shape()[-1]] + [num_outputs], initializer=weight_initializer_type, dtype=tf.float32, trainable=trainable) else: print("Loading weights") w = tf.get_variable(name='weights', initializer=weight_initializer, dtype=tf.float32, trainable=trainable) conv = tf.nn.conv2d(input_, spectral_norm(w, 1, u_weight=u_weight), padding=pad, strides=[1] + stride + [1]) if if_bias: if bias_initializer is None: print("Initializing biases") conv = conv + bias_variable([num_outputs], trainable=trainable) else: print("Loading biases") conv = conv + bias_variable([num_outputs], trainable=trainable, bias_initializer=bias_initializer) return conv
def discriminator(self, x, is_training, reuse=False): """Discriminator architecture based on InfoGAN. Args: x: input images, shape [bs, h, w, channels] is_training: boolean, are we in train or eval model. reuse: boolean, should params be re-used. Returns: out_logit: the output logits (before sigmoid). """ hparams = self.hparams with tf.variable_scope( "discriminator", reuse=reuse, initializer=tf.random_normal_initializer(stddev=0.02)): batch_size, height, width = common_layers.shape_list(x)[:3] # Mapping x from [bs, h, w, c] to [bs, 1] net = tf.layers.conv2d(x, 64, (4, 4), strides=(2, 2), padding="SAME", name="d_conv1") # [bs, h/2, w/2, 64] net = lrelu(net) net = tf.layers.conv2d(net, 128, (4, 4), strides=(2, 2), padding="SAME", name="d_conv2") # [bs, h/4, w/4, 128] if hparams.discriminator_batchnorm: net = tf.layers.batch_normalization(net, training=is_training, momentum=0.999, name="d_bn2") net = lrelu(net) size = height * width net = tf.reshape(net, [batch_size, size * 8]) # [bs, h * w * 8] net = tf.layers.dense(net, 1024, name="d_fc3") # [bs, 1024] if hparams.discriminator_batchnorm: net = tf.layers.batch_normalization(net, training=is_training, momentum=0.999, name="d_bn3") net = lrelu(net) return net
def spectral_normed_weight(w, name, lower_bound=False, iteration=1, fc=False): if fc: iteration = 2 w_shape = w.shape.as_list() w = tf.reshape(w, [-1, w_shape[-1]]) iteration = FLAGS.spec_iter sigma_new = FLAGS.spec_norm_val u = tf.get_variable(name + "_u", [1, w_shape[-1]], initializer=tf.random_normal_initializer(), trainable=False) u_hat = u v_hat = None for i in range(iteration): """ power iteration Usually iteration = 1 will be enough """ v_ = tf.matmul(u_hat, tf.transpose(w)) v_hat = tf.nn.l2_normalize(v_) u_ = tf.matmul(v_hat, w) u_hat = tf.nn.l2_normalize(u_) u_hat = tf.stop_gradient(u_hat) v_hat = tf.stop_gradient(v_hat) sigma = tf.matmul(tf.matmul(v_hat, w), tf.transpose(u_hat)) if FLAGS.spec_eval: dep = [] else: dep = [u.assign(u_hat)] with tf.control_dependencies(dep): if lower_bound: sigma = sigma + 1e-6 w_norm = w / sigma * tf.minimum(sigma, 1) * sigma_new else: w_norm = w / sigma * sigma_new w_norm = tf.reshape(w_norm, w_shape) return w_norm
def _build_net(self, s): # ------------------ build evaluate_net ------------------ # self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input #self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target') # for calculating loss with tf.variable_scope('eval_net'): # c_names(collections_names) are the collections to store variables c_names, n_l1, n_l2, n_l3, w_initializer, b_initializer = \ ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 200, 100, 50,\ tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # config of layers # first layer. collections is used later when assign to target net with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(s, w1) + b1) # second layer. collections is used later when assign to target net with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, n_l2], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, n_l2], initializer=b_initializer, collections=c_names) l2 = tf.nn.relu(tf.matmul(l1, w2) + b2) with tf.variable_scope('l3'): w3 = tf.get_variable('w3', [n_l2, n_l3], initializer=w_initializer, collections=c_names) b3 = tf.get_variable('b3', [1, n_l3], initializer=b_initializer, collections=c_names) l3 = tf.nn.relu(tf.matmul(l2, w3) + b3) # output layer. with tf.variable_scope('l4'): w4 = tf.get_variable('w4', [n_l3, self.n_actions], initializer=w_initializer, collections=c_names) b4 = tf.get_variable('b4', [1, self.n_actions], initializer=b_initializer, collections=c_names) self.q_eval = tf.matmul(l3, w4) + b4
def dense(name, x, n_out, dtype=tf.float32, init_w=0.05): """Dense layer.""" n_in = common_layers.shape_list(x)[2] with tf.variable_scope(name, reuse=tf.AUTO_REUSE): w = tf.get_variable("w", [n_in, n_out], dtype, initializer=tf.random_normal_initializer( 0.0, init_w), trainable=True) b = tf.get_variable("b", [ n_out, ], dtype, initializer=tf.zeros_initializer, trainable=True) x = tf.matmul(x, w) + b return x
def make_convolutions(inp): with tf.variable_scope('CNN') as cur_scope: convolutions = [] for i_f, (width, num) in enumerate(filters): if cnn_options['activation'] == 'relu': # He initialization for ReLU activation # with char embeddings init between -1 and 1 # w_init = tf.random_normal_initializer( # mean=0.0, # stddev=np.sqrt(2.0 / (width * char_embed_dim)) # ) # Kim et al 2015, +/- 0.05 w_init = tf.random_uniform_initializer( minval=-0.05, maxval=0.05) elif cnn_options['activation'] == 'tanh': # glorot init w_init = tf.random_normal_initializer( mean=0.0, stddev=np.sqrt(1.0 / (width * char_embed_dim)) ) w = tf.get_variable( "W_cnn_%s" % i_f, [1, width, char_embed_dim, num], initializer=w_init, dtype=DTYPE) b = tf.get_variable( "b_cnn_%s" % i_f, [num], dtype=DTYPE, initializer=tf.constant_initializer(0.0)) conv = tf.nn.conv2d( inp, w, strides=[1, 1, 1, 1], padding="VALID") + b # now max pool conv = tf.nn.max_pool( conv, [1, 1, max_chars - width + 1, 1], [1, 1, 1, 1], 'VALID') # activation conv = activation(conv) conv = tf.squeeze(conv, axis=[2]) convolutions.append(conv) return tf.concat(convolutions, 2)
def _build_net(self): # ------------------ build evaluate_net ------------------ # state s as input self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='q_target') with tf.variable_scope('eval_net'): collection_names = ['evaluate_net_parameters', tf.GraphKeys.GLOBAL_VARIABLES] n_l1 = self.n_hidden w_initializer = tf.random_normal_initializer(0., 0.3) b_initializer = tf.constant_initializer(0.1) # first layer. collections is used later when assign to target net with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=collection_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=collection_names) l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1) # second layer. collections is used later when assign to target net with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=collection_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=collection_names) self.q_eval = tf.matmul(l1, w2) + b2 with tf.variable_scope('loss'): self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval)) with tf.variable_scope('train'): self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) # ------------------ build target_net ------------------ # new state s_ as input self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input with tf.variable_scope('target_net'): # collections_names are the collections to store variables collection_names = ['target_net_parameters', tf.GraphKeys.GLOBAL_VARIABLES] # first layer. collections is used later when assign to target net with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer = w_initializer, collections = collection_names) b1 = tf.get_variable('b1', [1, n_l1], initializer = b_initializer, collections = collection_names) l1 = tf.nn.relu(tf.matmul(self.s_, w1) + b1) # second layer. collections is used later when assign to target net with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer = w_initializer, collections = collection_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer = b_initializer, collections = collection_names) self.q_next = tf.matmul(l1, w2) + b2
def dense(x, n1, n2, name): """ Used to create a dense layer. :param x: input tensor to the dense layer :param n1: no. of input neurons :param n2: no. of output neurons :param name: name of the entire dense layer.i.e, variable scope name. :return: tensor with shape [batch_size, n2] """ with tf.variable_scope(name, reuse=None): # np.random.seed(1) tf.set_random_seed(1) weights = tf.get_variable("weights", shape=[n1, n2], initializer=tf.random_normal_initializer(mean=0., stddev=0.01)) bias = tf.get_variable("bias", shape=[n2], initializer=tf.constant_initializer(0.0)) out = tf.add(tf.matmul(x, weights), bias, name='matmul') return out
def feed_forward_gaussian(config, action_size, observations, unused_length, state=None): """Independent feed forward networks for policy and value. The policy network outputs the mean action and the log standard deviation is learned as independent parameter vector. Args: config: Configuration object. action_size: Length of the action vector. observations: Sequences of observations. unused_length: Batch of sequence lengths. state: Batch of initial recurrent states. Returns: NetworkOutput tuple. """ mean_weights_initializer = tf.contrib.layers.variance_scaling_initializer( factor=config.init_mean_factor) logstd_initializer = tf.random_normal_initializer(config.init_logstd, 1e-10) flat_observations = tf.reshape(observations, [ tf.shape(observations)[0], tf.shape(observations)[1], functools.reduce(operator.mul, observations.shape.as_list()[2:], 1) ]) with tf.variable_scope('policy'): x = flat_observations for size in config.policy_layers: x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu) mean = tf.contrib.layers.fully_connected(x, action_size, tf.tanh, weights_initializer=mean_weights_initializer) logstd = tf.get_variable('logstd', mean.shape[2:], tf.float32, logstd_initializer) logstd = tf.tile(logstd[None, None], [tf.shape(mean)[0], tf.shape(mean)[1]] + [1] * (mean.shape.ndims - 2)) with tf.variable_scope('value'): x = flat_observations for size in config.value_layers: x = tf.contrib.layers.fully_connected(x, size, tf.nn.relu) value = tf.contrib.layers.fully_connected(x, 1, None)[..., 0] mean = tf.check_numerics(mean, 'mean') logstd = tf.check_numerics(logstd, 'logstd') value = tf.check_numerics(value, 'value') policy = tf.contrib.distributions.MultivariateNormalDiag(mean, tf.exp(logstd)) return NetworkOutput(policy, mean, logstd, value, state)
def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False): shape = input_.get_shape().as_list() with tf.variable_scope(scope or "Linear"): matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32, tf.random_normal_initializer(stddev=stddev)) bias = tf.get_variable("bias", [output_size], initializer=tf.constant_initializer(bias_start)) if with_w: return tf.matmul(input_, matrix) + bias, matrix, bias else: return tf.matmul(input_, matrix) + bias
def _conv(self, x, filter_size, out_filters, stride, name='conv'): """Convolution.""" with tf.variable_scope(name): in_filters = int(x.get_shape()[-1]) n = filter_size * filter_size * np.maximum(in_filters, out_filters) kernel = tf.get_variable( 'weights', [filter_size, filter_size, in_filters, out_filters], tf.float32, initializer=tf.random_normal_initializer(stddev=np.sqrt(2.0 / n))) self.variables_list.append(kernel) self.trainable_list.append(kernel) self.decay_list.append(kernel) y = tf.nn.conv2d(x, kernel, [1, stride, stride, 1], padding=self.padding) return y
def new_fc_layer(self, bottom, output_size, name): shape = bottom.get_shape().as_list() dim = np.prod(shape[1:]) x = tf.reshape(bottom, [-1, dim]) input_size = dim with tf.variable_scope(name): w = tf.get_variable("W", shape=[input_size, output_size], initializer=tf.random_normal_initializer( 0., 0.005)) b = tf.get_variable("b", shape=[output_size], initializer=tf.constant_initializer(0.)) fc = tf.nn.bias_add(tf.matmul(x, w), b) return fc
def layer(self, tf_input, num_hidden_units, variable_name, trainable=True): # tf_input: batch_size x n_features # num_hidden_units: number of hidden units tf_weight_initializer = tf.random_normal_initializer(mean=0, stddev=0.01) num_features = tf_input.get_shape()[1] W = tf.get_variable(name=variable_name + '_W', dtype=tf.float32, shape=[num_features, num_hidden_units], initializer=tf_weight_initializer, trainable=trainable) b = tf.get_variable(name=variable_name + '_b', dtype=tf.float32, shape=[num_hidden_units], trainable=trainable) out = tf.add(tf.matmul(tf_input, W), b) return out
def __init__(self, feature_dims=(2048, 128), activation=tf.nn.relu, normalize_output=True, kernel_initializer=tf.random_normal_initializer(stddev=.01), bias_initializer=tf.zeros_initializer(), use_batch_norm=False, batch_norm_momentum=blocks.BATCH_NORM_MOMENTUM, use_batch_norm_beta=False, use_global_batch_norm=True, name='ProjectionHead', **kwargs): super(ProjectionHead, self).__init__(name=name, **kwargs) self.normalize_output = normalize_output self.num_layers = len(feature_dims) for layer_idx, layer_dim in enumerate(feature_dims): is_last_layer = (layer_idx + 1) == len(feature_dims) # We can't just add all layers to a list, since keras.Layer uses # __setattr__ to monitor for sublayers that it needs to track, but it # doesn't handle lists of sublayers. We use setattr to enable using # dynamic variable naming given that the number of sublayers not # statically known. setattr( self, f'dense_{layer_idx}', tf.layers.Dense(layer_dim, activation=None, use_bias=not is_last_layer and not use_batch_norm, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)) if not is_last_layer: if use_batch_norm: # Cross-replica TPU BatchNorm. setattr( self, f'batchnorm_{layer_idx}', blocks.batch_norm( use_trainable_beta=use_batch_norm_beta, batch_norm_momentum=batch_norm_momentum, use_global_batch_norm=use_global_batch_norm)) setattr(self, f'activation_{layer_idx}', tf.keras.layers.Activation(activation))
def __init__(self, min_level, max_level, anchors_per_location, num_convs=2, num_filters=256, use_separable_conv=False, use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu()): """Initialize params to build Region Proposal Network head. Args: min_level: `int` number of minimum feature level. max_level: `int` number of maximum feature level. anchors_per_location: `int` number of number of anchors per pixel location. num_convs: `int` number that represents the number of the intermediate conv layers before the prediction. num_filters: `int` number that represents the number of filters of the intermediate conv layers. use_separable_conv: `bool`, indicating whether the separable conv layers is used. use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._min_level = min_level self._max_level = max_level self._anchors_per_location = anchors_per_location self._num_convs = num_convs self._num_filters = num_filters if use_separable_conv: self._conv2d_op = functools.partial( tf.layers.separable_conv2d, depth_multiplier=1, bias_initializer=tf.zeros_initializer()) else: self._conv2d_op = functools.partial( tf.layers.conv2d, kernel_initializer=tf.random_normal_initializer(stddev=0.01), bias_initializer=tf.zeros_initializer()) self._use_batch_norm = use_batch_norm self._batch_norm_relu = batch_norm_relu
def transform(input_, alpha=1.0, normalizer_fn=ops.conditional_instance_norm, normalizer_params=None, reuse=False): """Maps content images to stylized images. Args: input_: Tensor. Batch of input images. alpha: Float. Width multiplier to reduce the number of filters in the model and slim it down. Defaults to 1.0, which results in the hyper-parameters used in the published paper. normalizer_fn: normalization layer function. Defaults to ops.conditional_instance_norm. normalizer_params: dict of parameters to pass to the conditional instance normalization op. reuse: bool. Whether to reuse model parameters. Defaults to False. Returns: Tensor. The output of the transformer network. """ if normalizer_params is None: normalizer_params = {'center': True, 'scale': True} with tf.variable_scope('transformer', reuse=reuse): with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=tf.random_normal_initializer(0.0, 0.01), biases_initializer=tf.constant_initializer(0.0)): with tf.variable_scope('contract'): h = conv2d(input_, 9, 1, int(alpha * 32), 'conv1') h = conv2d(h, 3, 2, int(alpha * 64), 'conv2') h = conv2d(h, 3, 2, int(alpha * 128), 'conv3') with tf.variable_scope('residual'): h = residual_block(h, 3, 'residual1') h = residual_block(h, 3, 'residual2') h = residual_block(h, 3, 'residual3') h = residual_block(h, 3, 'residual4') h = residual_block(h, 3, 'residual5') with tf.variable_scope('expand'): h = upsampling(h, 3, 2, int(alpha * 64), 'conv1') h = upsampling(h, 3, 2, int(alpha * 32), 'conv2') return upsampling(h, 9, 1, 3, 'conv3', activation_fn=tf.nn.sigmoid)
def conv2d(inputs, num_filters_out, kernel_size, stride=1, scope=None, reuse=None): """Adds a 2D convolution. conv2d creates a variable called 'weights', representing the convolutional kernel, that is convolved with the input. Args: inputs: a 4D tensor in NHWC format. num_filters_out: the number of output filters. kernel_size: an int specifying the kernel height and width size. stride: an int specifying the height and width stride. scope: Optional scope for variable_scope. reuse: whether or not the layer and its variables should be reused. Returns: a tensor that is the result of a convolution being applied to `inputs`. """ with tf.variable_scope(scope, 'Conv', [inputs], reuse=reuse): num_filters_in = int(inputs.shape[3]) weights_shape = [ kernel_size, kernel_size, num_filters_in, num_filters_out ] # Initialization n = int(weights_shape[0] * weights_shape[1] * weights_shape[3]) weights_initializer = tf.random_normal_initializer(stddev=np.sqrt(2.0 / n)) weights = variable(name='weights', shape=weights_shape, dtype=tf.float32, initializer=weights_initializer, trainable=True) strides = stride_arr(stride, stride) outputs = tf.nn.conv2d(inputs, weights, strides, padding='SAME', data_format='NHWC') return outputs
def tfdense(layer_number, imsize, givesize, take, resize, scope): with tf.variable_scope(scope or "Linear"): take_size = take.shape[-1] shape_in = take_size if resize: shape_in = shape_in * imsize * imsize take = tf.reshape(take, [-1, shape_in]) W_fc = tf.get_variable( "W_fc" + str(layer_number), shape=[shape_in, givesize], initializer=tf.random_normal_initializer(stddev=0.02)) b_fc = tf.get_variable("b_fc" + str(layer_number), shape=[givesize]) h_fc = tf.matmul(take, W_fc) + b_fc return h_fc