def testCreateMulticlone(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(0) tf_inputs = tf.constant(self._inputs, dtype=tf.float32) tf_labels = tf.constant(self._labels, dtype=tf.float32) model_fn = BatchNormClassifier clone_args = (tf_inputs, tf_labels) num_clones = 4 deploy_config = model_deploy.DeploymentConfig( num_clones=num_clones) self.assertEqual(framework.get_variables(), []) clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) self.assertEqual(len(framework.get_variables()), 5) for v in framework.get_variables(): self.assertDeviceEqual(v.device, 'CPU:0') self.assertDeviceEqual(v.value().device, 'CPU:0') self.assertEqual(len(clones), num_clones) for i, clone in enumerate(clones): self.assertEqual( clone.outputs.op.name, 'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope) self.assertEqual(len(update_ops), 2) self.assertEqual(clone.scope, 'clone_%d/' % i) self.assertDeviceEqual(clone.device, 'GPU:%d' % i)
def testCreateOnecloneWithPS(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(0) tf_inputs = tf.constant(self._inputs, dtype=tf.float32) tf_labels = tf.constant(self._labels, dtype=tf.float32) model_fn = BatchNormClassifier model_args = (tf_inputs, tf_labels) deploy_config = model_deploy.DeploymentConfig(num_clones=1, num_ps_tasks=1) self.assertEqual(framework.get_variables(), []) clones = model_deploy.create_clones(deploy_config, model_fn, model_args) self.assertEqual(len(framework.get_variables()), 5) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.assertEqual(len(update_ops), 2) optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) self.assertEqual(total_loss.op.name, 'total_loss') for g, v in grads_and_vars: self.assertDeviceEqual(g.device, '/job:worker/device:GPU:0') self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0')
def testCreateLogisticClassifier(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(0) tf_inputs = tf.constant(self._inputs, dtype=tf.float32) tf_labels = tf.constant(self._labels, dtype=tf.float32) model_fn = LogisticClassifier clone_args = (tf_inputs, tf_labels) deploy_config = model_deploy.DeploymentConfig(num_clones=1) self.assertEqual(framework.get_variables(), []) clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) clone = clones[0] self.assertEqual(len(framework.get_variables()), 2) for v in framework.get_variables(): self.assertDeviceEqual(v.device, 'CPU:0') self.assertDeviceEqual(v.value().device, 'CPU:0') self.assertEqual(clone.outputs.op.name, 'LogisticClassifier/fully_connected/Sigmoid') self.assertEqual(clone.scope, '') self.assertDeviceEqual(clone.device, 'GPU:0') self.assertEqual(len(tf.losses.get_losses()), 1) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.assertEqual(update_ops, [])
def testCreateOnecloneWithPS(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(0) tf_inputs = tf.constant(self._inputs, dtype=tf.float32) tf_labels = tf.constant(self._labels, dtype=tf.float32) model_fn = BatchNormClassifier clone_args = (tf_inputs, tf_labels) deploy_config = model_deploy.DeploymentConfig(num_clones=1, num_ps_tasks=1) self.assertEqual(framework.get_variables(), []) clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) self.assertEqual(len(clones), 1) clone = clones[0] self.assertEqual(clone.outputs.op.name, 'BatchNormClassifier/fully_connected/Sigmoid') self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:0') self.assertEqual(clone.scope, '') self.assertEqual(len(framework.get_variables()), 5) for v in framework.get_variables(): self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0') self.assertDeviceEqual(v.device, v.value().device)
def testCreateMulticloneWithPS(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(0) tf_inputs = tf.constant(self._inputs, dtype=tf.float32) tf_labels = tf.constant(self._labels, dtype=tf.float32) model_fn = BatchNormClassifier clone_args = (tf_inputs, tf_labels) deploy_config = model_deploy.DeploymentConfig(num_clones=2, num_ps_tasks=2) self.assertEqual(framework.get_variables(), []) clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) self.assertEqual(len(framework.get_variables()), 5) for i, v in enumerate(framework.get_variables()): t = i % 2 self.assertDeviceEqual(v.device, '/job:ps/task:%d/device:CPU:0' % t) self.assertDeviceEqual(v.device, v.value().device) self.assertEqual(len(clones), 2) for i, clone in enumerate(clones): self.assertEqual( clone.outputs.op.name, 'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i) self.assertEqual(clone.scope, 'clone_%d/' % i) self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:%d' % i)
def testNonReuseVars(self): height, width = 7, 9 with self.cached_session(): images = tf.random_uniform((5, height, width, 3), seed=1) conv2d_ws.conv2d(images, 32, [3, 3]) self.assertEqual(len(contrib_framework.get_variables()), 2) conv2d_ws.conv2d(images, 32, [3, 3]) self.assertEqual(len(contrib_framework.get_variables()), 4)
def testCreateConvCreatesWeightsAndBiasesVarsWithRateTwo(self): height, width = 7, 9 images = tf.random_uniform((5, height, width, 3), seed=1) with self.cached_session(): self.assertFalse(contrib_framework.get_variables('conv1/weights')) self.assertFalse(contrib_framework.get_variables('conv1/biases')) conv2d_ws.conv2d(images, 32, [3, 3], rate=2, scope='conv1') self.assertTrue(contrib_framework.get_variables('conv1/weights')) self.assertTrue(contrib_framework.get_variables('conv1/biases'))
def testReuseConvWithBatchNorm(self): height, width = 7, 9 with self.cached_session(): images = tf.random_uniform((5, height, width, 32), seed=1) with contrib_framework.arg_scope([conv2d_ws.conv2d], normalizer_fn=contrib_layers.batch_norm, normalizer_params={'decay': 0.9}): net = conv2d_ws.conv2d(images, 32, [3, 3], scope='Conv') net = conv2d_ws.conv2d(net, 32, [3, 3], scope='Conv', reuse=True) self.assertEqual(len(contrib_framework.get_variables()), 4) self.assertEqual( len(contrib_framework.get_variables('Conv/BatchNorm')), 3) self.assertEqual( len(contrib_framework.get_variables('Conv_1/BatchNorm')), 0)
def testReuseConvWithWD(self): height, width = 7, 9 with self.cached_session(): images = tf.random_uniform((5, height, width, 3), seed=1) weight_decay = contrib_layers.l2_regularizer(0.01) with contrib_framework.arg_scope([conv2d_ws.conv2d], weights_regularizer=weight_decay): conv2d_ws.conv2d(images, 32, [3, 3], scope='conv1') self.assertEqual(len(contrib_framework.get_variables()), 2) self.assertEqual( len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)), 1) conv2d_ws.conv2d(images, 32, [3, 3], scope='conv1', reuse=True) self.assertEqual(len(contrib_framework.get_variables()), 2) self.assertEqual( len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)), 1)
def _build(self): self.x = tf.placeholder( shape=(None, self.cfg.status_size), dtype=tf.float32, name='x') self.a = tf.placeholder( shape=(None, self.cfg.action_size), dtype=tf.float32, name='a') self.y = tf.placeholder(shape=(None, ), dtype=tf.float32, name='y') x = self.x h = tfl.fully_connected(x, 400) h = tfl.fully_connected(tf.concat_v2([h, self.a], 1), 300) q = tfl.fully_connected( h, 1, activation_fn=None, weights_regularizer=tfl.l2_regularizer(1e-2)) self.q = tf.squeeze(q) self.network_params = get_variables(self.scope) self.loss = tf.reduce_mean(tf.squared_difference(self.y, self.q)) self.train_op = tf.train.AdamOptimizer(self.cfg.learning_rate).minimize( self.loss, global_step=get_or_create_global_step()) batch_size = tf.cast(tf.shape(self.a)[0], tf.float32) self.action_gradient = tf.div(tf.gradients(self.q, self.a), batch_size) self.summaries = tf.summary.merge([ tf.summary.scalar("loss", self.loss), tf.summary.histogram("q", self.q), ])
def make_actor(self, states, dout, bounds, name='online', reuse=False): """Build an actor network mu, the policy function approximator.""" is_batch = tf.shape(states)[0] > 1 dout = np.prod(dout) with tf.variable_scope(name, reuse=reuse) as scope: net = states if self.actor_batch_normalization: net = tf.layers.batch_normalization(net, training=is_batch, epsilon=1e-7, momentum=.95) net = self.dense('0', net, self.h1, tf.nn.relu) if self.actor_batch_normalization: net = tf.layers.batch_normalization(net, training=is_batch, epsilon=1e-7, momentum=.95) net = self.dense('1', net, self.h2, tf.nn.relu) if self.actor_batch_normalization: net = tf.layers.batch_normalization(net, training=is_batch, epsilon=1e-7, momentum=.95) y = self.dense('2', net, dout, tf.nn.tanh, minmax=1e-4) # 3e-3) scaled = self.scale(y, bounds_in=(-1, 1), bounds_out=bounds) ops = scope.get_collection(tf.GraphKeys.UPDATE_OPS) losses = scope.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) return Network(scaled, get_variables(scope), ops, losses)
def _build_model(self): self.X = tf.placeholder(shape=[None, 84, 84, 4], dtype=tf.uint8, name="X") self.y = tf.placeholder(shape=[None], dtype=tf.float32, name="y") self.a = tf.placeholder(shape=[None], dtype=tf.int32, name="a") X = tf.to_float(self.X) / 255.0 conv1 = tfl.conv2d(X, 32, 8, 4) conv2 = tfl.conv2d(conv1, 64, 4, 2) conv3 = tfl.conv2d(conv2, 64, 3, 1) flattened = tfl.flatten(conv3) fc1 = tfl.fully_connected(flattened, 512) self.predictions = tfl.fully_connected(fc1, self.nA, activation_fn=None) batch_size = tf.shape(self.a)[0] ind = tf.pack([tf.range(batch_size), self.a], axis=1) self.action_predictions = tf.gather_nd(self.predictions, ind) self.network_params = get_variables(self.scope) self.loss = tf.reduce_mean( tf.squared_difference(self.y, self.action_predictions)) self.train_op = tf.train.RMSPropOptimizer( 0.0025, 0.99, 0.0, 1e-6).minimize(self.loss, global_step=get_or_create_global_step()) self.summaries = tf.summary.merge([ tf.summary.scalar("loss", self.loss), tf.summary.histogram("a", tf.argmax(self.predictions, axis=1)), tf.summary.histogram("max_q", tf.reduce_max(self.predictions)), ])
def default_init_from_checkpoint_fn(checkpoint, allow_partial_restore = False): """init_from_checkpoint_fn that can be used to init a model from a checkpoint. Args: checkpoint: String pointing to path of TF checkpoint. allow_partial_restore: If True, we allow partial restore, otherwise we raise an error if a variable cannot be restored. Raises: A ValueError if a variable(s) is missing and partial restore is not explicitly enabled. """ logging.info('Initializing model weights from %s', checkpoint) reader = tf.train.load_checkpoint(checkpoint) variables_to_restore = contrib_framework.get_variables() assignment_map = {} for v in variables_to_restore: op_name = v.op.name if reader.has_tensor(op_name): logging.info('Loading variable %s from checkpoint', op_name) assignment_map[op_name] = v elif allow_partial_restore: logging.warning('Variable %s is not in the checkpoint, skipping.', op_name) else: raise ValueError('Attempting to restore variable {} which is ' 'not in the checkpoint.'.format(op_name)) tf.train.init_from_checkpoint(checkpoint, assignment_map)
def Trainer_Graph(self): # with tf.variable_scope(self.model_name): self.graph = tf.Graph() with self.graph.as_default() as g: with tf.device(self.device): with tf.variable_scope('Main_net'): self.imageIn, self.conv1, self.conv2, self.conv3, self.pool1, self.conv4, \ self.Advantage, self.Value, self.Qout, self.predict, self.actions, \ self.LSTM_state, self. init_lstim_state, _\ = self.__create_graph() with tf.variable_scope('Target_net'): self.imageInT, _,_,_,_,_,_, self.ValueT,_,_,_, \ self.LSTM_stateT, self.init_lstim_stateT, _ = self.__create_graph() self.MainNet_vars = get_variables('Main_net') self.TargetNet_vars = get_variables('Target_net') # var = tf.global_variables() self.createTrainingMethod() self.createupdateTargetNetOp() self.sess = tf.Session( graph=self.graph, config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, gpu_options=tf.GPUOptions(allow_growth=True))) self.sess.run(tf.global_variables_initializer()) if configure.TENSORBOARD: self._create_tensor_board() self.saver = tf.train.Saver() checkpoint = tf.train.get_checkpoint_state(self.model_name) if checkpoint and checkpoint.model_checkpoint_path: self.saver.restore(self.sess, checkpoint.model_checkpoint_path) print "Successfully loaded:", checkpoint.model_checkpoint_path mypath = str(checkpoint.model_checkpoint_path) stepmatch = re.split('-', mypath)[2] self.episode = int(stepmatch) # pass else: print "Could not find old network weights"
def testDefaults(self): deploy_config = model_deploy.DeploymentConfig() self.assertEqual(framework.get_variables(), []) self.assertEqual(deploy_config.caching_device(), None) self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0') self.assertEqual(deploy_config.clone_scope(0), '') self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0') self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0') self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0')
def Agent_Graph(self): # with tf.variable_scope(self.model_name): with self.graph.as_default() as g: with tf.device(self.device): # with tf.variable_scope('Main_net'): with tf.variable_scope(self.model_name): self.imageIn, self.conv1, self.conv2, self.conv3, self.pool1, self.conv4, \ self.Advantage, self.Value, self.Qout, self.predict \ = self.__create_graph() self.MainNet_vars = get_variables(self.model_name) self.sess.run(tf.variables_initializer(self.MainNet_vars))
def get_temporal_mean_pooled_feats(inputs, is_training=True): # Temporal Average pooling with tf.variable_scope('temporal_mean_pool'): pooled_features = slim.avg_pool2d(inputs, (14, 1), stride=1, padding='VALID', scope='AvgPool_8x1') features = slim.flatten(pooled_features) tvars = framework.get_variables('temporal_mean_pool') return features, tvars
def mlp_normalized_advantage_function(x, act_dim, hidden_sizes=(100, 100), activation=tf.tanh, output_activation=tf.tanh, action_space=None, weight_init=None, act_multiplier=1, scope=None): with tf.variable_scope(scope): # act_dim = a.shape.as_list()[-1] act_dim = act_dim[0] # act_limit = action_space.high[0] * act_multiplier x_ph = tf.placeholder(tf.float32, (None, ) + tuple(x), name='observations') a_ph = tf.placeholder(tf.float32, (None, act_dim), name='actions') # create a shared network for the variables with tf.name_scope('hidden'): h = x_ph for idx, hidden_dim in enumerate(hidden_sizes): h = fc(h, hidden_dim, scope='hid%d' % idx) with tf.name_scope('value'): V = fc(h, 1, scope='V') with tf.name_scope('advantage'): l = fc(h, (act_dim * (act_dim + 1) / 2), scope='l') mu = fc(h, act_dim, scope='mu') pivot = 0 rows = [] for idx in range(act_dim): count = act_dim - idx diag_elem = tf.exp(tf.slice(l, (0, pivot), (-1, 1))) non_diag_elems = tf.slice(l, (0, pivot + 1), (-1, count - 1)) row = tf.pad(tf.concat((diag_elem, non_diag_elems), 1), ((0, 0), (idx, 0))) rows.append(row) pivot += count L = tf.transpose(tf.stack(rows, axis=1), (0, 2, 1)) P = tf.matmul(L, tf.transpose(L, (0, 2, 1))) tmp = tf.expand_dims(a_ph - mu, -1) A = -tf.matmul(tf.transpose(tmp, [0, 2, 1]), tf.matmul(P, tmp)) / 2 A = tf.reshape(A, [-1, 1]) with tf.name_scope('Q'): Q = A + V # print(mu.name, V.name, Q.name, P.name, A.name, h.name) vars = get_variables(scope) return x_ph, a_ph, mu, V, Q, P, A, vars
def get_pretrained_model_feats(inputs, scopename='', is_training=True): # VGG 19 for feature extraction scope = vgg_arg_scope() with slim.arg_scope(scope): with tf.variable_scope(scopename): _, end_points = vgg_19(inputs) features = end_points[scopename + '/vgg_19/conv5/conv5_1'] # 14 x 14 x 512 restore_vars = framework.get_variables(scopename) tvars = [] return features, restore_vars, tvars
def testLocalTrainOp(self): g = tf.Graph() with g.as_default(): tf.set_random_seed(0) tf_inputs = tf.constant(self._inputs, dtype=tf.float32) tf_labels = tf.constant(self._labels, dtype=tf.float32) model_fn = BatchNormClassifier model_args = (tf_inputs, tf_labels) deploy_config = model_deploy.DeploymentConfig(num_clones=2, clone_on_cpu=True) optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) self.assertEqual(framework.get_variables(), []) model = model_deploy.deploy(deploy_config, model_fn, model_args, optimizer=optimizer) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.assertEqual(len(update_ops), 4) self.assertEqual(len(model.clones), 2) self.assertEqual(model.total_loss.op.name, 'total_loss') self.assertEqual(model.summary_op.op.name, 'summary_op/summary_op') self.assertEqual(model.train_op.op.name, 'train_op') with tf.Session() as sess: sess.run(tf.global_variables_initializer()) moving_mean = framework.get_variables_by_name('moving_mean')[0] moving_variance = framework.get_variables_by_name( 'moving_variance')[0] initial_loss = sess.run(model.total_loss) initial_mean, initial_variance = sess.run( [moving_mean, moving_variance]) self.assertAllClose(initial_mean, [0.0, 0.0, 0.0, 0.0]) self.assertAllClose(initial_variance, [1.0, 1.0, 1.0, 1.0]) for _ in range(10): sess.run(model.train_op) final_loss = sess.run(model.total_loss) self.assertLess(final_loss, initial_loss / 5.0) final_mean, final_variance = sess.run( [moving_mean, moving_variance]) expected_mean = np.array([0.125, 0.25, 0.375, 0.25]) expected_var = np.array([0.109375, 0.1875, 0.234375, 0.1875]) expected_var = self._addBesselsCorrection(16, expected_var) self.assertAllClose(final_mean, expected_mean) self.assertAllClose(final_variance, expected_var)
def resnet_init_from_checkpoint_fn(checkpoint): """init_from_checkpoint_fn that can be used to init a model from a checkpoint. Args: checkpoint: String pointing to path of TF checkpoint. Raises: A ValueError if a variable(s) is missing and partial restore is not explicitly enabled. """ logging.info('Initializing model weights from %s', checkpoint) assignment_map = {} resnet_scope = _get_resnet_scope() for var in contrib_framework.get_variables( scope=resnet_scope, collection=tf.GraphKeys.TRAINABLE_VARIABLES): if 'dense' not in var.op.name: # Remove the parent scope prefix. name_in_ckpt = var.op.name.replace(resnet_scope, 'resnet_model/') assignment_map[name_in_ckpt] = var tf.train.init_from_checkpoint(checkpoint, assignment_map)
def build_model(self): self.images = tf.placeholder(tf.float32, [self.batch_size] + [self.image_size, self.image_size, self.c_dim], name = 'images') self.y_vec = tf.placeholder(tf.float32, [self.batch_size, self.y_dim], name = 'y_vec') self.res, self.feats = self.net(self.images) self.feature_sum = tf.histogram_summary("feature", self.feats) self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(self.res, self.y_vec)) self.loss_sum = tf.scalar_summary("loss", self.loss) self.accuracy = tf.reduce_mean(tf.cast(tf.nn.in_top_k(self.res, tf.cast(tf.argmax(self.y_vec, dimension=1), dtype=tf.int32), 1), tf.float32)) self.accuracy_sum = tf.scalar_summary("accuracy", self.accuracy) self.all_sum = tf.merge_summary([self.feature_sum, self.loss_sum, self.accuracy_sum]) self.train_vars = get_variables(scope="net_"+self.dataset_name) self.saver = tf.train.Saver(var_list=self.train_vars)
def default_init_from_checkpoint_fn( checkpoint, allow_partial_restore = False, filter_restorables_fn = None): """init_from_checkpoint_fn that can be used to init a model from a checkpoint. Args: checkpoint: String pointing to path of TF checkpoint. allow_partial_restore: If True, we allow partial restore, otherwise we raise an error if a variable cannot be restored. filter_restorables_fn: (Optional) A function that takes a restorable TensorFlow variable and returns whether it should be restored or not. By default, all restorable variables are updated. Note that allow_partial_restore is about how to handle variables are in the checkpoint, but not in the graph. The filter_restorables_fn argument is about variables that are in the checkpoint and the graph, which we don't want to restore into the graph. Raises: A ValueError if a variable(s) is missing and partial restore is not explicitly enabled. """ logging.info('Initializing model weights from %s', checkpoint) reader = tf.train.load_checkpoint(checkpoint) variables_to_restore = contrib_framework.get_variables() assignment_map = {} for v in variables_to_restore: if filter_restorables_fn is not None and not filter_restorables_fn(v): continue op_name = v.op.name if reader.has_tensor(op_name): logging.info('Loading variable %s from checkpoint', op_name) assignment_map[op_name] = v elif allow_partial_restore: logging.warning('Variable %s is not in the checkpoint, skipping.', op_name) else: raise ValueError('Attempting to restore variable {} which is ' 'not in the checkpoint.'.format(op_name)) tf.train.init_from_checkpoint(checkpoint, assignment_map)
def make_critic(self, states, actions, name='online', reuse=False): """Build a critic network q, the value function approximator.""" is_batch = tf.shape(states)[0] > 1 with tf.variable_scope(name, reuse=reuse) as scope: net = states if self.critic_batch_normalization: net = tf.layers.batch_normalization(net, training=is_batch, epsilon=1e-7, momentum=.95) net = self.dense('0', net, self.h1, tf.nn.relu, decay=True) if self.critic_batch_normalization: net = tf.layers.batch_normalization(net, training=is_batch, epsilon=1e-7, momentum=.95) net = tf.concat([net, actions], axis=1) # Actions enter the net net = self.dense('1', net, self.h2, tf.nn.relu, decay=True) y = self.dense('2_q', net, 1, decay=True, minmax=1e-4) # 3e-3) ops = scope.get_collection(tf.GraphKeys.UPDATE_OPS) losses = scope.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) return Network(tf.squeeze(y), get_variables(scope), ops, losses)
def _build(self): self.x = tf.placeholder( shape=(None, self.cfg.status_size), dtype=tf.float32, name='x') self.action_gradient = tf.placeholder( shape=(None, self.cfg.action_size), dtype=tf.float32, name='action_gradient') x = self.x h = tfl.fully_connected(x, 400) h = tfl.fully_connected(h, 300) self.a = tfl.fully_connected( h, self.cfg.action_size, activation_fn=tf.nn.tanh # [-1, 1] ) self.network_params = get_variables(self.scope) # combine the gradients gradient = tf.gradients(self.a, self.network_params, -self.action_gradient) self.train_op = tf.train.AdamOptimizer(self.cfg.learning_rate).apply_gradients( zip(gradient, self.network_params)) self.summaries = tf.summary.merge([tf.summary.histogram("a", self.a)])
def get_classifier_logits(inputs, num_classes, is_training=True, lscope='', reuse=None): # Primary Classifier scope = common_arg_scope() with slim.arg_scope(scope): with tf.variable_scope(lscope, reuse=reuse): plogits = slim.fully_connected(inputs, 2048, activation_fn=tf.nn.relu, scope='PreLogits') dropout = slim.dropout(plogits, 0.8, is_training=is_training, scope='Logits_dropout') logits = slim.fully_connected(dropout, num_classes, activation_fn=None, scope='Final_Logits') tvars = framework.get_variables(lscope) return logits, tvars
def resnet_model(images, is_training, num_classes, resnet_size=50, return_intermediate_values=False, film_generator_fn=None, film_generator_input=None, pretrain_checkpoint=None): """Returns resnet model, optionally returning intermediate endpoint tensors. Args: images: A Tensor representing a batch [N,H,W,C] of input images. is_training: A boolean. Set to True to add operations required only when training the classifier. num_classes: Dimensionality of output logits emitted by final dense layer. resnet_size: Size of resnet. One of [18, 34, 50, 101, 152, 200]. return_intermediate_values: If True, returns a dictionary of output and intermediate activation values. film_generator_fn: Callable that returns a List (for each block layer) of Lists (per ResNet block) of FiLM conditioning vectors. film_generator_input: Embedding tensor to be passed to film_generator_fn. pretrain_checkpoint: String to initialize model weights from. Does *NOT* initialize final logits layer. ResNet checkpoints can be found here: https://github.com/tensorflow/models/tree/master/official/r1/resnet. """ # For bigger models, we want to use "bottleneck" layers if resnet_size < 50: bottleneck = False else: bottleneck = True model = resnet_lib.Model( resnet_size=resnet_size, bottleneck=bottleneck, num_classes=num_classes, num_filters=64, kernel_size=7, conv_stride=2, first_pool_size=3, first_pool_stride=2, block_sizes=_get_block_sizes(resnet_size), block_strides=[1, 2, 2, 2], resnet_version=resnet_lib.DEFAULT_VERSION, data_format='channels_last', dtype=resnet_lib.DEFAULT_DTYPE ) final_dense = model(images, is_training, film_generator_fn, film_generator_input) if pretrain_checkpoint: # Initialize variables in ResNet, excluding the final dense layer and any # optimization-specific variables (e.g. Momentum, Adam Beta). assignment_map = {} resnet_scope = _get_resnet_scope() for var in contrib_framework.get_variables( scope=resnet_scope, collection=tf.GraphKeys.TRAINABLE_VARIABLES): if 'dense' not in var.op.name: # Remove the parent scope prefix. name_in_ckpt = var.op.name.replace(resnet_scope, 'resnet_model/') assignment_map[name_in_ckpt] = var tf.train.init_from_checkpoint(pretrain_checkpoint, assignment_map) if return_intermediate_values: return resnet_endpoints(model) else: return final_dense
def __init__(self, input_producer, embed_mat, config, is_train): x_enc = input_producer.x_enc x_dec = input_producer.x_dec y_dec = input_producer.y_dec len_enc = input_producer.len_enc len_dec = input_producer.len_dec self.answer = input_producer.answ_disc max_len = input_producer.seq_max_length vocab_num = input_producer.vocab_num config.update(**dict(max_len=max_len, vocab_num=vocab_num)) # import ipdb; ipdb.set_trace() self.kl_weight = tf.Variable(0.0, "KL_weight") self.input_ids = y_dec modeler = CtrlVAEModelingHelper(config, embed_mat) with tf.variable_scope("CtrlVAE"): ### VAE ############################################################ # encoder x_enc_onehot = tf.one_hot(x_enc, vocab_num) out_tuple = modeler.encoder(x_enc_onehot=x_enc_onehot, len_enc=len_enc) (vae_z, vae_mu, vae_logvar) = out_tuple # holistic representation with tf.device("/cpu:0"): vae_c = embedding_lookup(modeler.embed, self.answer) vae_c = tf.reshape(vae_c, [config.batch_size, -1]) vae_represent = tf.concat([vae_z, vae_c], axis=1) # decoder x_dec_onehot = tf.one_hot(x_dec, config.vocab_num) out_tuple = modeler.decoder(initial_state=vae_represent, x_dec_onehot=x_dec_onehot, len_dec=len_dec, is_teacher_forcing=True) (vae_outputs, vae_state, vae_outputs_len) = out_tuple # final (self.vae_output, self.vae_sample) = vae_outputs ### Generator ###################################################### # random z and c from the prior self.gen_z = tf.random_normal( [config.batch_size, config.hidden_size]) self.gen_c = vae_c gen_represent = tf.concat([self.gen_z, self.gen_c], axis=1) # generator (decoder) x_dec_onehot = tf.one_hot(x_dec, config.vocab_num) out_tuple = modeler.decoder(initial_state=gen_represent, x_dec_onehot=x_dec_onehot, len_dec=len_dec, is_teacher_forcing=True, reuse=True) (gen_outputs, gen_state, gen_outputs_len) = out_tuple # final (self.gen_output, self.gen_sample) = gen_outputs gen_outputs_onehot = softmax(self.gen_output / ALMOST_ZERO) # discriminator (for c code) out_tuple = modeler.discriminator(inputs=gen_outputs_onehot, inputs_length=gen_outputs_len) (self.gen_c_output, self.gen_c_sample) = out_tuple # encoder again (for z code ; additional discriminator) out_tuple = modeler.encoder(x_enc_onehot=gen_outputs_onehot, len_enc=gen_outputs_len, reuse=True) (gen_z, dis_mu, dis_logvar) = out_tuple ### Discriminator ################################################## # discriminator (for training) x_dis_onehot = tf.one_hot(x_enc, config.vocab_num) out_tuple = modeler.discriminator(inputs=x_dis_onehot, inputs_length=gen_outputs_len, reuse=True) (self.dis_outputs, self.dis_sample) = out_tuple ######################################################################## # get all the variables in this scope self.vars = get_variables("CtrlVAE") self.enc_vars = get_variables("CtrlVAE/encoder") self.gen_vars = get_variables("CtrlVAE/decoder") self.dis_vars = get_variables("CtrlVAE/discriminator") self.vae_vars = self.enc_vars + self.gen_vars ######################################################################## # compute AE loss (reconstruction) len_out = tf.reduce_max(vae_outputs_len) targets = y_dec[:, :len_out] weights = tf.sequence_mask(vae_outputs_len, dtype=tf.float32) softmax_loss = sequence_loss(logits=self.vae_output, targets=targets, weights=weights, average_across_timesteps=False, average_across_batch=False) # NOTE: fix later! loss_sum = tf.reduce_sum(softmax_loss, axis=1) self.ae_loss = self.ae_loss_mean = tf.reduce_mean(loss_sum, axis=0) #self.ae_loss_mean = tf.reduce_mean(softmax_loss) # compute KL loss (regularization) KL_term = 1 + vae_logvar - tf.pow(vae_mu, 2) - tf.exp(vae_logvar) self.kl_loss = -0.5 * tf.reduce_sum(KL_term, reduction_indices=1) self.kl_loss_mean = tf.reduce_mean(self.kl_loss) # VAE total loss self.vae_loss = self.ae_loss + self.kl_weight * self.kl_loss_mean ######################################################################## # c code loss answer_labels = tf.one_hot(self.answer, config.vocab_num) c_loss = softmax_cross_entropy_with_logits(labels=answer_labels, logits=self.gen_c_output) self.c_loss = tf.reduce_mean(c_loss) # z code loss mu_loss = mean_pairwise_squared_error(vae_mu, dis_mu) logvar_loss = mean_pairwise_squared_error(vae_logvar, dis_logvar) self.z_loss = (mu_loss + logvar_loss) / 2 # generator total loss self.gen_loss = self.c_loss + self.z_loss ######################################################################## # discriminator training loss dis_loss = softmax_cross_entropy_with_logits(labels=answer_labels, logits=self.dis_outputs) self.dis_loss = tf.reduce_mean(dis_loss) ######################################################################## # optimization lr = config.learning_rate self.vae_lr = tf.Variable(lr, trainable=False, name="vae_lr") self.gen_lr = tf.Variable(0.0, trainable=False, name="gen_lr") self.dis_lr = tf.Variable(lr, trainable=False, name="dis_lr") vae_optim = tf.train.AdamOptimizer(self.vae_lr) gen_optim = tf.train.AdamOptimizer(self.gen_lr) dis_optim = tf.train.AdamOptimizer(self.dis_lr) vae_grads = tf.gradients(self.vae_loss, self.vae_vars) gen_grads = tf.gradients(self.gen_loss, self.gen_vars) dis_grads = tf.gradients(self.dis_loss, self.dis_vars) vae_grads, _ = tf.clip_by_global_norm(vae_grads, config.max_grad_norm) gen_grads, _ = tf.clip_by_global_norm(gen_grads, config.max_grad_norm) dis_grads, _ = tf.clip_by_global_norm(dis_grads, config.max_grad_norm) self.global_step = get_or_create_global_step() self.vae_train = vae_optim.apply_gradients( zip(vae_grads, self.vae_vars)) self.gen_train = gen_optim.apply_gradients( zip(gen_grads, self.gen_vars)) self.dis_train = dis_optim.apply_gradients( zip(dis_grads, self.dis_vars), self.global_step) # learning_rate update self.new_gen_lr = tf.placeholder(tf.float32, shape=[], name="new_gen_lr") self.gen_lr_update = tf.assign(self.gen_lr, self.new_gen_lr) # KL weight update self.new_kl_weight = tf.placeholder(tf.float32, shape=[], name="new_kl") self.kl_weight_update = tf.assign(self.kl_weight, self.new_kl_weight) # summaries tf.summary.scalar("Loss/ae_mean", self.ae_loss_mean) tf.summary.scalar("Loss/kl_mean", self.kl_loss_mean) tf.summary.scalar("Loss/Total", self.ae_loss_mean + self.kl_loss_mean) tf.summary.scalar("Misc/kl_weight", self.kl_weight) tf.summary.scalar("Misc/mu_mean", tf.reduce_mean(vae_mu)) tf.summary.scalar("Misc/logvar_mean", tf.reduce_mean(vae_logvar)) tf.summary.scalar("Misc/gen_lr", self.gen_lr) self.summary_op = tf.summary.merge_all()
def begin(self): """Captures all variables to be read out during the session run.""" self._variables_to_log = contrib_framework.get_variables()
def __init__(self, sess, input_shape, action_size, hidden_dims, use_batch_norm, use_seperate_networks, hidden_w, action_w, hidden_fn, action_fn, w_reg, scope='NAF'): self.sess = sess with tf.variable_scope(scope): x = tf.placeholder(tf.float32, (None,) + tuple(input_shape), name='observations') u = tf.placeholder(tf.float32, (None, action_size), name='actions') is_train = tf.placeholder(tf.bool, name='is_train') hid_outs = {} with tf.name_scope('hidden'): if use_seperate_networks: logger.info("Creating seperate networks for v, l, and mu") for scope in ['v', 'l', 'mu']: with tf.variable_scope(scope): if use_batch_norm: h = batch_norm(x, is_training=is_train) else: h = x for idx, hidden_dim in enumerate(hidden_dims): h = fc(h, hidden_dim, is_train, hidden_w, weight_reg=w_reg, activation_fn=hidden_fn, use_batch_norm=use_batch_norm, scope='hid%d' % idx) hid_outs[scope] = h else: logger.info("Creating shared networks for v, l, and mu") if use_batch_norm: h = batch_norm(x, is_training=is_train) else: h = x for idx, hidden_dim in enumerate(hidden_dims): h = fc(h, hidden_dim, is_train, hidden_w, weight_reg=w_reg, activation_fn=hidden_fn, use_batch_norm=use_batch_norm, scope='hid%d' % idx) hid_outs['v'], hid_outs['l'], hid_outs['mu'] = h, h, h with tf.name_scope('value'): V = fc(hid_outs['v'], 1, is_train, hidden_w, use_batch_norm=use_batch_norm, scope='V') with tf.name_scope('advantage'): l = fc(hid_outs['l'], (action_size * (action_size + 1))/2, is_train, hidden_w, use_batch_norm=use_batch_norm, scope='l') mu = fc(hid_outs['mu'], action_size, is_train, action_w, activation_fn=action_fn, use_batch_norm=use_batch_norm, scope='mu') pivot = 0 rows = [] for idx in xrange(action_size): count = action_size - idx diag_elem = tf.exp(tf.slice(l, (0, pivot), (-1, 1))) non_diag_elems = tf.slice(l, (0, pivot+1), (-1, count-1)) row = tf.pad(tf.concat((diag_elem, non_diag_elems), 1), ((0, 0), (idx, 0))) rows.append(row) pivot += count L = tf.transpose(tf.stack(rows, axis=1), (0, 2, 1)) P = tf.matmul(L, tf.transpose(L, (0, 2, 1))) tmp = tf.expand_dims(u - mu, -1) A = -tf.matmul(tf.transpose(tmp, [0, 2, 1]), tf.matmul(P, tmp))/2 A = tf.reshape(A, [-1, 1]) with tf.name_scope('Q'): Q = A + V with tf.name_scope('optimization'): self.target_y = tf.placeholder(tf.float32, [None], name='target_y') self.loss = tf.reduce_mean(tf.squared_difference(self.target_y, tf.squeeze(Q)), name='loss') self.is_train = is_train self.variables = get_variables(scope) self.x, self.u, self.mu, self.V, self.Q, self.P, self.A = x, u, mu, V, Q, P, A
def create_network(self, name): networks = {} with tf.variable_scope(name): # Input parameters networks['x'] = tf.placeholder(tf.float32, shape=[None, self.states], \ name='states') networks['u'] = tf.placeholder(tf.float32, shape=[None, self.actions], \ name='actions') # hidden layers init = 1./self.hidden_nodes/self.actions hid = networks['x'] hid = fully_connected(hid, self.hidden_nodes, \ weights_initializer=tf.random_normal_initializer(init, init/5), \ biases_initializer=tf.random_normal_initializer(init, init/5), \ activation_fn=tf.tanh) for i in range(self.hidden_layers-1): hid = fully_connected(hid, self.hidden_nodes, \ weights_initializer=tf.random_normal_initializer(init, init/5), \ biases_initializer=tf.random_normal_initializer(init, init/5), \ activation_fn=tf.nn.relu) #hid = tf.nn.softmax(hid) # Output parameters networks['V'] = fully_connected(hid, self.actions, \ weights_initializer=tf.random_normal_initializer(1., 0.1), \ biases_initializer=tf.random_normal_initializer(0., 0.1)) networks['mu'] = fully_connected(hid, self.actions, \ weights_initializer=tf.random_normal_initializer(1., 0.1), \ biases_initializer=tf.random_normal_initializer(0., 0.1)) networks['mu_out'] = tf.nn.softmax(networks['mu']) # Linear output layer l = fully_connected(hid, int((self.actions * (self.actions + 1))/2), \ weights_initializer=tf.random_normal_initializer(1., 0.1), \ biases_initializer=tf.random_normal_initializer(0., 0.1)) # Build A(x, u) axis_T = 0 rows = [] # Identify diagonal for i in range(self.actions): count = self.actions - i # Create a row with the diagonal elements exponentiated. diag = tf.exp(tf.slice(l, (0, axis_T), (-1, 1))) # Create the "other" elements of the row others = tf.slice(l, (0, axis_T + 1), (-1, count - 1)) # Assemble them into a full row. row = tf.pad(tf.concat((diag, others), axis=1), \ ((0, 0), (i, 0))) # Add each row to a list for L(x) rows.append(row) axis_T += count # Assemble L(x) and matmul by its transpose. networks['L'] = tf.transpose(tf.stack(rows, axis=1), (0, 2, 1)) networks['P'] = P = tf.matmul(networks['L'], \ tf.transpose(networks['L'], (0, 2, 1))) mu_u = tf.expand_dims(networks['u'] - networks['mu'], -1) # Combine the terms p_mu_u = tf.matmul(P, mu_u, name='Pxmu_u') p_mess = tf.matmul(tf.transpose(mu_u, [0, 2, 1]), p_mu_u, name='mu_u_TxPxmu_u') networks['A'] = tf.multiply(-1./2., p_mess, name='A') networks['Q'] = tf.add(networks['A'], networks['V'], name='Q_func') # Describe loss functions. networks['y_'] = tf.placeholder(tf.float32, [None, 1], name='y_i') networks['loss'] = tf.reduce_mean(tf.squared_difference(networks['y_'], \ tf.squeeze(networks['Q'])), name='loss') # GradientDescent networks['gdo'] = tf.train.AdamOptimizer(learning_rate=self.alpha, epsilon=0.5).minimize(networks['loss']) self.network[name] = networks self.network[name]['vars'] = get_variables(name) return