def inference_conv_test(images): conv1 = _conv(images, 3, 64, 7, 7, 2, 2, 'SAME') resh1 = tf.reshape(conv1, [-1, 147456]) affn = _affine(resh1, 147456, 128) # Affine layer not needed to reproduce the error return affn
def imagenet_inputs(batch_size, image_size, num_readers=1, num_preprocess_threads=4): """Loads a batch of imagenet inputs. Used as a replacement for inception.image_processing.inputs in tensorflow/models in order to get around the use of hard-coded flags in the image_processing module. Args: batch_size: int, batch size. image_size: int. The images will be resized bilinearly to shape [image_size, image_size]. num_readers: int, number of preprocessing threads per tower. Must be a multiple of 4. num_preprocess_threads: int, number of parallel readers. Returns: 4-D tensor of images of shape [batch_size, image_size, image_size, 3], with values in [0, 1]. Raises: IOError: If ImageNet data files cannot be found. ValueError: If `num_preprocess_threads is not a multiple of 4 or `num_readers` is less than 1. """ imagenet = imagenet_data.ImagenetData('train') with tf.name_scope('batch_processing'): data_files = imagenet.data_files() if data_files is None: raise IOError('No ImageNet data files found') # Create filename_queue. filename_queue = tf.train.string_input_producer(data_files, shuffle=True, capacity=16) if num_preprocess_threads % 4: raise ValueError('Please make num_preprocess_threads a multiple ' 'of 4 (%d %% 4 != 0).' % num_preprocess_threads) if num_readers < 1: raise ValueError('Please make num_readers at least 1') # Approximate number of examples per shard. examples_per_shard = 1024 # Size the random shuffle queue to balance between good global # mixing (more examples) and memory use (fewer examples). # 1 image uses 299*299*3*4 bytes = 1MB # The default input_queue_memory_factor is 16 implying a shuffling queue # size: examples_per_shard * 16 * 1MB = 17.6GB input_queue_memory_factor = 16 min_queue_examples = examples_per_shard * input_queue_memory_factor examples_queue = tf.RandomShuffleQueue( capacity=min_queue_examples + 3 * batch_size, min_after_dequeue=min_queue_examples, dtypes=[tf.string]) # Create multiple readers to populate the queue of examples. enqueue_ops = [] for _ in range(num_readers): reader = imagenet.reader() _, value = reader.read(filename_queue) enqueue_ops.append(examples_queue.enqueue([value])) tf.train.queue_runner.add_queue_runner( tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops)) example_serialized = examples_queue.dequeue() images_and_labels = [] for _ in range(num_preprocess_threads): # Parse a serialized Example proto to extract the image and metadata. image_buffer, label_index, _, _ = _parse_example_proto( example_serialized) image = tf.image.decode_jpeg(image_buffer, channels=3) # pylint: disable=protected-access image = _aspect_preserving_resize(image, image_size + 2) image = _central_crop([image], image_size, image_size)[0] # pylint: enable=protected-access image.set_shape([image_size, image_size, 3]) image = tf.to_float(image) / 255.0 images_and_labels.append([image, label_index]) images, label_index_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size, capacity=2 * num_preprocess_threads * batch_size) images = tf.reshape(images, shape=[batch_size, image_size, image_size, 3]) # Display the training images in the visualizer. tf.summary.image('images', images) return images, tf.reshape(label_index_batch, [batch_size])
def __init__( self, num_unique_documents, vocab_size, num_topics, freqs, embedding_size=128, num_sampled=40, learning_rate=1e-3, lmbda=150.0, alpha=None, power=0.75, batch_size=32, clip_gradients=5.0, **kwargs ): device = get_device(**kwargs) _graph = tf.Graph() with _graph.as_default(): with tf.device(device): moving_avgs = tf.train.ExponentialMovingAverage(0.9) self.batch_size = batch_size self.freqs = freqs self.X = tf.placeholder(tf.int32, shape=[None]) self.Y = tf.placeholder(tf.int64, shape=[None]) self.DOC = tf.placeholder(tf.int32, shape=[None]) self.switch_loss = tf.Variable(0, trainable=False) train_labels = tf.reshape(self.Y, [-1, 1]) sampler = tf.nn.fixed_unigram_candidate_sampler( train_labels, num_true=1, num_sampled=num_sampled, unique=True, range_max=vocab_size, distortion=power, unigrams=self.freqs, ) self.word_embedding = tf.Variable( tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0) ) self.nce_weights = tf.Variable( tf.truncated_normal( [vocab_size, embedding_size], stddev=tf.sqrt(1 / embedding_size), ) ) self.nce_biases = tf.Variable(tf.zeros([vocab_size])) scalar = 1 / np.sqrt(num_unique_documents + num_topics) self.doc_embedding = tf.Variable( tf.random_normal( [num_unique_documents, num_topics], mean=0, stddev=50 * scalar, ) ) self.topic_embedding = tf.get_variable( 'topic_embedding', shape=[num_topics, embedding_size], dtype=tf.float32, initializer=tf.orthogonal_initializer(gain=scalar), ) pivot = tf.nn.embedding_lookup(self.word_embedding, self.X) proportions = tf.nn.embedding_lookup( self.doc_embedding, self.DOC ) doc = tf.matmul(proportions, self.topic_embedding) doc_context = doc word_context = pivot context = tf.add(word_context, doc_context) loss_word2vec = tf.reduce_mean( tf.nn.nce_loss( weights=self.nce_weights, biases=self.nce_biases, labels=self.Y, inputs=context, num_sampled=num_sampled, num_classes=vocab_size, num_true=1, sampled_values=sampler, ) ) self.fraction = tf.Variable( 1, trainable=False, dtype=tf.float32 ) n_topics = self.doc_embedding.get_shape()[1].value log_proportions = tf.nn.log_softmax(self.doc_embedding) if alpha is None: alpha = 1.0 / n_topics loss = (alpha - 1) * log_proportions prior = tf.reduce_sum(loss) loss_lda = lmbda * self.fraction * prior global_step = tf.Variable( 0, trainable=False, name='global_step' ) self.cost = tf.cond( global_step < self.switch_loss, lambda: loss_word2vec, lambda: loss_word2vec + loss_lda, ) loss_avgs_op = moving_avgs.apply( [loss_lda, loss_word2vec, self.cost] ) with tf.control_dependencies([loss_avgs_op]): optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate ) gvs = optimizer.compute_gradients(self.cost) capped_gvs = [ ( tf.clip_by_value( grad, -clip_gradients, clip_gradients ), var, ) for grad, var in gvs ] self.optimizer = optimizer.apply_gradients(capped_gvs) self.sess = generate_session(_graph, **kwargs) self.sess.run(tf.global_variables_initializer())
def simulate(self, action): with tf.name_scope("environment/simulate"): actions = tf.concat([tf.expand_dims(action, axis=1)] * self._num_frames, axis=1) history = self.history_buffer.get_all_elements() with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): # We only need 1 target frame here, set it. hparams_target_frames = self._model.hparams.video_num_target_frames self._model.hparams.video_num_target_frames = 1 model_output = self._model.infer({ "inputs": history, "input_action": actions, "reset_internal_states": self._reset_model.read_value() }) self._model.hparams.video_num_target_frames = hparams_target_frames observ = tf.cast(tf.squeeze(model_output["targets"], axis=1), self.observ_dtype) reward = tf.to_float(model_output["target_reward"]) reward = tf.reshape(reward, shape=(self.batch_size, )) + self._min_reward if self._intrinsic_reward_scale: # Use the model's uncertainty about its prediction as an intrinsic # reward. The uncertainty is measured by the log probability of the # predicted pixel value. if "targets_logits" not in model_output: raise ValueError( "The use of intrinsic rewards requires access to " "the logits. Ensure that model.infer returns " "'targets_logits'") uncertainty_reward = compute_uncertainty_reward( model_output["targets_logits"], model_output["targets"]) uncertainty_reward = tf.minimum( 1., self._intrinsic_reward_scale * uncertainty_reward) uncertainty_reward = tf.Print(uncertainty_reward, [uncertainty_reward], message="uncertainty_reward", first_n=1, summarize=8) reward += uncertainty_reward done = tf.constant(False, tf.bool, shape=(self.batch_size, )) with tf.control_dependencies([observ]): dump_frame_op = tf.cond( self._video_condition, lambda: tf.py_func( self._video_dump_frame, # pylint: disable=g-long-lambda [observ, reward], []), tf.no_op) with tf.control_dependencies([ self._observ.assign(observ), self.history_buffer.move_by_one_element(observ), dump_frame_op ]): clear_reset_model_op = tf.assign(self._reset_model, tf.constant(0.0)) with tf.control_dependencies([clear_reset_model_op]): return tf.identity(reward), tf.identity(done)
def GetProjectLastDim(cls, inputs, weight, input_dim, output_dim, proj_obj): """Linear projection on the last dim of the input tensor along with pruning. This is a TPU efficient implementation to avoid reshaping inputs to Rank-2 tensor by using Einsum for the compute. Args: inputs: An input Tensor, the last dimension of which is input_dim. weight: A weight matrix with shape [input_dim, output_dim]. input_dim: An integer or a symbolic dim, the last dimension of the inputs. output_dim: An integer or a symbolic dim, the last dimension of the outputs. proj_obj: a ProjectionLayer object. Returns: An output Tensor of the same rank as inputs, the last dimension is output_dim. """ theta = proj_obj.theta p = proj_obj.params input_dim = int( symbolic.ToStatic(input_dim) if symbolic.IsExpr(input_dim ) else input_dim) output_dim = int( symbolic.ToStatic(output_dim) if symbolic.IsExpr(output_dim ) else output_dim) if (py_utils.use_tpu() and inputs.shape is not None and inputs.shape.rank is not None and inputs.shape.rank < 26): # Avoids reshape if feasible and uses Einsum. if inputs.shape.rank == 2: outputs = tf.matmul(inputs, weight) else: outputs = cls.GetEinSumResult(inputs, proj_obj) else: if p.pruning_hparams_dict[ 'compression_option'] == 9 and p.pruning_hparams_dict[ 'compress_input']: blocked_inputs = tf.reshape( inputs, py_utils.ToStaticShape( [-1, p.pruning_hparams_dict['input_block_size']])) compressed_inputs = tf.reshape( py_utils.Matmul(blocked_inputs, theta.b_matrix_tfvar), py_utils.ToStaticShape([ -1, input_dim // p.pruning_hparams_dict['input_compression_factor'] ])) else: compressed_inputs = tf.reshape(inputs, py_utils.ToStaticShape([-1, input_dim])) if p.pruning_hparams_dict['compression_option'] == 10: if p.pruning_hparams_dict['block_method'] == 'mask': intermediate_result = py_utils.Matmul( compressed_inputs, tf.multiply(theta.c_matrix_tfvar, theta.c_mask_tfvar)) elif p.pruning_hparams_dict['block_method'] == 'loop': num_blocks = p.pruning_hparams_dict['block_compression_factor'] input_splitted = tf.split(compressed_inputs, num_blocks, axis=-1) output_splitted = [] for i, input_i in enumerate(input_splitted): output_splitted.append( py_utils.Matmul(input_i, theta.c_matrix_tfvar[i, :, :])) intermediate_result = tf.concat(output_splitted, axis=-1) else: intermediate_result = py_utils.Matmul(compressed_inputs, theta.c_matrix_tfvar) if p.pruning_hparams_dict[ 'compression_option'] == 9 and p.pruning_hparams_dict[ 'compress_output']: blocked_intermediate_result = tf.reshape( intermediate_result, py_utils.ToStaticShape([ -1, p.pruning_hparams_dict['output_block_size'] // p.pruning_hparams_dict['output_compression_factor'] ])) outputs = py_utils.Matmul(blocked_intermediate_result, theta.d_matrix_tfvar) else: outputs = intermediate_result outputs = tf.reshape( outputs, tf.concat([ tf.cast(py_utils.GetShape(inputs)[:-1], tf.int32), py_utils.ToStaticShape([output_dim]) ], axis=0)) return outputs
def detection_loss(cls_outputs, box_outputs, labels, params): """Computes total detection loss. Computes total detection loss including box and class loss from all levels. Args: cls_outputs: an OrderDict with keys representing levels and values representing logits in [batch_size, height, width, num_anchors]. box_outputs: an OrderDict with keys representing levels and values representing box regression targets in [batch_size, height, width, num_anchors * 4]. labels: the dictionary that returned from dataloader that includes groundtruth targets. params: the dictionary including training parameters specified in default_haprams function in this file. Returns: total_loss: an integer tensor representing total loss reducing from class and box losses from all levels. cls_loss: an integer tensor representing total class loss. box_loss: an integer tensor representing total box regression loss. box_iou_loss: an integer tensor representing total box iou loss. """ # Sum all positives in a batch for normalization and avoid zero # num_positives_sum, which would lead to inf loss during training num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0 levels = cls_outputs.keys() cls_losses = [] box_losses = [] box_iou_losses = [] for level in levels: if params['data_format'] == 'channels_first': labels['cls_targets_%d' % level] = tf.transpose( labels['cls_targets_%d' % level], [0, 3, 1, 2]) labels['box_targets_%d' % level] = tf.transpose( labels['box_targets_%d' % level], [0, 3, 1, 2]) # Onehot encoding for classification labels. cls_targets_at_level = tf.one_hot( labels['cls_targets_%d' % level], params['num_classes']) if params['data_format'] == 'channels_first': bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list() cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, -1, width, height]) else: bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list() cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, width, height, -1]) box_targets_at_level = labels['box_targets_%d' % level] cls_loss = _classification_loss( cls_outputs[level], cls_targets_at_level, num_positives_sum, alpha=params['alpha'], gamma=params['gamma']) if params['data_format'] == 'channels_first': cls_loss = tf.reshape(cls_loss, [bs, -1, width, height, params['num_classes']]) else: cls_loss = tf.reshape(cls_loss, [bs, width, height, -1, params['num_classes']]) cls_loss *= tf.cast(tf.expand_dims( tf.not_equal(labels['cls_targets_%d' % level], -2), -1), tf.float32) cls_losses.append(tf.reduce_sum(cls_loss)) box_losses.append( _box_loss( box_outputs[level], box_targets_at_level, num_positives_sum, delta=params['delta'])) if params['iou_loss_type']: box_iou_losses.append( _box_iou_loss(box_outputs[level], box_targets_at_level, num_positives_sum, params['iou_loss_type'])) # Sum per level losses to total loss. cls_loss = tf.add_n(cls_losses) box_loss = tf.add_n(box_losses) box_iou_loss = tf.add_n(box_iou_losses) if box_iou_losses else 0.0 total_loss = ( cls_loss + params['box_loss_weight'] * box_loss + params['iou_loss_weight'] * box_iou_loss) return total_loss, cls_loss, box_loss, box_iou_loss
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None): """Model definition entry. Args: features: the input image tensor with shape [batch_size, height, width, 3]. The height and width are fixed and equal. labels: the input labels in a dictionary. The labels include class targets and box targets which are dense label maps. The labels are generated from get_input_fn function in data/dataloader.py mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT. params: the dictionary defines hyperparameters of model. The default settings are in default_hparams function in this file. model: the model outputs class logits and box regression outputs. variable_filter_fn: the filter function that takes trainable_variables and returns the variable list after applying the filter rule. Returns: tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction. Raises: RuntimeError: if both ckpt and backbone_ckpt are set. """ # Convert params (dict) to Config for easier access. if params['data_format'] == 'channels_first': features = tf.transpose(features, [0, 3, 1, 2]) def _model_outputs(inputs): return model(inputs, config=hparams_config.Config(params)) cls_outputs, box_outputs = utils.build_model_with_precision( params['precision'], _model_outputs, features) levels = cls_outputs.keys() for level in levels: cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32) box_outputs[level] = tf.cast(box_outputs[level], tf.float32) # First check if it is in PREDICT mode. if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'image': features, } for level in levels: predictions['cls_outputs_%d' % level] = cls_outputs[level] predictions['box_outputs_%d' % level] = box_outputs[level] return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Set up training loss and learning rate. update_learning_rate_schedule_parameters(params) global_step = tf.train.get_or_create_global_step() learning_rate = learning_rate_schedule(params, global_step) # cls_loss and box_loss are for logging. only total_loss is optimized. det_loss, cls_loss, box_loss, box_iou_loss = detection_loss( cls_outputs, box_outputs, labels, params) l2loss = reg_l2_loss(params['weight_decay']) total_loss = det_loss + l2loss if mode == tf.estimator.ModeKeys.TRAIN: utils.scalar('lrn_rate', learning_rate) utils.scalar('trainloss/cls_loss', cls_loss) utils.scalar('trainloss/box_loss', box_loss) utils.scalar('trainloss/box_iou_loss', box_iou_loss) utils.scalar('trainloss/det_loss', det_loss) utils.scalar('trainloss/l2_loss', l2loss) utils.scalar('trainloss/loss', total_loss) moving_average_decay = params['moving_average_decay'] if moving_average_decay: ema = tf.train.ExponentialMovingAverage( decay=moving_average_decay, num_updates=global_step) ema_vars = utils.get_ema_vars() if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.MomentumOptimizer( learning_rate, momentum=params['momentum']) if params['use_tpu']: optimizer = tf.tpu.CrossShardOptimizer(optimizer) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = tf.trainable_variables() if variable_filter_fn: var_list = variable_filter_fn(var_list) if params.get('clip_gradients_norm', 0) > 0: logging.info('clip gradients norm by %f', params['clip_gradients_norm']) grads_and_vars = optimizer.compute_gradients(total_loss, var_list) with tf.name_scope('clip'): grads = [gv[0] for gv in grads_and_vars] tvars = [gv[1] for gv in grads_and_vars] clipped_grads, gnorm = tf.clip_by_global_norm( grads, params['clip_gradients_norm']) utils.scalar('gnorm', gnorm) grads_and_vars = list(zip(clipped_grads, tvars)) with tf.control_dependencies(update_ops): train_op = optimizer.apply_gradients(grads_and_vars, global_step) else: with tf.control_dependencies(update_ops): train_op = optimizer.minimize( total_loss, global_step, var_list=var_list) if moving_average_decay: with tf.control_dependencies([train_op]): train_op = ema.apply(ema_vars) else: train_op = None eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(**kwargs): """Returns a dictionary that has the evaluation metrics.""" batch_size = params['batch_size'] if params['use_tpu']: batch_size = params['batch_size'] * params['num_shards'] eval_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) anchor_labeler = anchors.AnchorLabeler(eval_anchors, params['num_classes']) cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat']) box_loss = tf.metrics.mean(kwargs['box_loss_repeat']) if params.get('testdev_dir', None): logging.info('Eval testdev_dir %s', params['testdev_dir']) coco_metrics = coco_metric_fn( batch_size, anchor_labeler, params['val_json_file'], testdev_dir=params['testdev_dir'], disable_pyfun=params.get('disable_pyfun', None), **kwargs) else: logging.info('Eval val with groudtruths %s.', params['val_json_file']) coco_metrics = coco_metric_fn(batch_size, anchor_labeler, params['val_json_file'], **kwargs) # Add metrics to output. output_metrics = { 'cls_loss': cls_loss, 'box_loss': box_loss, } output_metrics.update(coco_metrics) return output_metrics cls_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(cls_loss, 0), [params['batch_size'],]), [params['batch_size'], 1]) box_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(box_loss, 0), [params['batch_size'],]), [params['batch_size'], 1]) metric_fn_inputs = { 'cls_loss_repeat': cls_loss_repeat, 'box_loss_repeat': box_loss_repeat, 'source_ids': labels['source_ids'], 'groundtruth_data': labels['groundtruth_data'], 'image_scales': labels['image_scales'], } add_metric_fn_inputs(params, cls_outputs, box_outputs, metric_fn_inputs) eval_metrics = (metric_fn, metric_fn_inputs) checkpoint = params.get('ckpt') or params.get('backbone_ckpt') if checkpoint and mode == tf.estimator.ModeKeys.TRAIN: # Initialize the model from an EfficientDet or backbone checkpoint. if params.get('ckpt') and params.get('backbone_ckpt'): raise RuntimeError( '--backbone_ckpt and --checkpoint are mutually exclusive') if params.get('backbone_ckpt'): var_scope = params['backbone_name'] + '/' if params['ckpt_var_scope'] is None: # Use backbone name as default checkpoint scope. ckpt_scope = params['backbone_name'] + '/' else: ckpt_scope = params['ckpt_var_scope'] + '/' else: # Load every var in the given checkpoint var_scope = ckpt_scope = '/' def scaffold_fn(): """Loads pretrained model through scaffold function.""" logging.info('restore variables from %s', checkpoint) var_map = utils.get_ckpt_var_map( ckpt_path=checkpoint, ckpt_scope=ckpt_scope, var_scope=var_scope, var_exclude_expr=params.get('var_exclude_expr', None)) tf.train.init_from_checkpoint(checkpoint, var_map) return tf.train.Scaffold() elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay: def scaffold_fn(): """Load moving average variables for eval.""" logging.info('Load EMA vars with ema_decay=%f', moving_average_decay) restore_vars_dict = ema.variables_to_restore(ema_vars) saver = tf.train.Saver(restore_vars_dict) return tf.train.Scaffold(saver=saver) else: scaffold_fn = None return tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, eval_metrics=eval_metrics, host_call=utils.get_tpu_host_call(global_step, params), scaffold_fn=scaffold_fn)
def DenseAR(x, h=None, hidden_layers=[], activation=tf.nn.relu, log_scale_clip=None, log_scale_clip_pre=None, train=False, dropout_rate=0.0, sigmoid_scale=False, log_scale_factor=1.0, log_scale_reg=0.0, shift_only=False, *args, **kwargs): input_depth = x.shape.with_rank_at_least(1)[-1].value if input_depth is None: raise NotImplementedError( "Rightmost dimension must be known prior to graph execution.") input_shape = (np.int32(x.shape.as_list()) if x.shape.is_fully_defined() else tf.shape(x)) for i, units in enumerate(hidden_layers): x = tfb.masked_dense(inputs=x, units=units, num_blocks=input_depth, exclusive=True if i == 0 else False, activation=activation, *args, **kwargs) if h is not None: x += tf.layers.dense(h, units, use_bias=False, *args, **kwargs) if dropout_rate > 0: x = tf.layers.dropout(x, dropout_rate, training=train) if shift_only: shift = tfb.masked_dense(inputs=x, units=input_depth, num_blocks=input_depth, activation=None, *args, **kwargs) return shift, None else: if log_scale_factor == 1.0 and log_scale_reg == 0.0 and not log_scale_clip_pre: x = tfb.masked_dense(inputs=x, units=2 * input_depth, num_blocks=input_depth, activation=None, *args, **kwargs) if h is not None: x += tf.layers.dense(h, 2 * input_depth, use_bias=False, *args, **kwargs) x = tf.reshape(x, shape=tf.concat([input_shape, [2]], axis=0)) shift, log_scale = tf.unstack(x, num=2, axis=-1) else: shift = tfb.masked_dense(inputs=x, units=input_depth, num_blocks=input_depth, activation=None, *args, **kwargs) if log_scale_reg > 0.0: regularizer = lambda w: log_scale_reg * 2.0 * tf.nn.l2_loss(w) else: regularizer = None log_scale = tfb.masked_dense(inputs=x, units=input_depth, num_blocks=input_depth, activation=None, use_bias=False, kernel_regularizer=regularizer, *args, **kwargs) log_scale *= log_scale_factor if log_scale_clip_pre: log_scale = log_scale_clip_pre * tf.nn.tanh( log_scale / log_scale_clip_pre) log_scale += tf.get_variable("log_scale_bias", [1, input_depth], initializer=tf.zeros_initializer()) if h is not None: shift += tf.layers.dense(h, input_depth, use_bias=False, *args, **kwargs) log_scale += tf.layers.dense(h, input_depth, use_bias=False, *args, **kwargs) if sigmoid_scale: log_scale = tf.log_sigmoid(log_scale) if log_scale_clip: log_scale = log_scale_clip * tf.nn.tanh(log_scale / log_scale_clip) return shift, log_scale
def _predict(self, image_features, proposal_boxes, **kwargs): """Computes encoded object locations and corresponding confidences. Args: image_features: A list of float tensors of shape [batch_size, height_i, width_i, channels_i] containing features for a batch of images. proposal_boxes: A float tensor of shape [batch_size, num_proposals, box_code_size]. **kwargs: Unused Keyword args Returns: box_encodings: A list of float tensors of shape [batch_size, num_anchors_i, q, code_size] representing the location of the objects, where q is 1 or the number of classes. Each entry in the list corresponds to a feature map in the input `image_features` list. class_predictions_with_background: A list of float tensors of shape [batch_size, num_anchors_i, num_classes + 1] representing the class predictions for the proposals. Each entry in the list corresponds to a feature map in the input `image_features` list. Raises: ValueError: if num_predictions_per_location is not 1 or if len(image_features) is not 1. """ if len(image_features) != 1: raise ValueError('length of `image_features` must be 1. Found {}'. format(len(image_features))) image_feature = image_features[0] batch_size = tf.shape(proposal_boxes)[0] num_boxes = tf.shape(proposal_boxes)[1] net = image_feature for layer in self._shared_conv_layers: net = layer(net) # Location predictions. box_net = net for layer in self._box_encoder_layers: box_net = layer(box_net) box_encodings = ops.batch_position_sensitive_crop_regions( box_net, boxes=proposal_boxes, crop_size=self._crop_size, num_spatial_bins=self._num_spatial_bins, global_pool=True) box_encodings = tf.squeeze(box_encodings, axis=[2, 3]) box_encodings = tf.reshape(box_encodings, [batch_size * num_boxes, 1, self.num_classes, self._box_code_size]) # Class predictions. class_net = net for layer in self._class_predictor_layers: class_net = layer(class_net) class_predictions_with_background = ( ops.batch_position_sensitive_crop_regions( class_net, boxes=proposal_boxes, crop_size=self._crop_size, num_spatial_bins=self._num_spatial_bins, global_pool=True)) class_predictions_with_background = tf.squeeze( class_predictions_with_background, axis=[2, 3]) class_predictions_with_background = tf.reshape( class_predictions_with_background, [batch_size * num_boxes, 1, self._total_classes]) return {BOX_ENCODINGS: [box_encodings], CLASS_PREDICTIONS_WITH_BACKGROUND: [class_predictions_with_background]}
def DecodeLabelAndImage(r): r = tf.decode_raw(r, tf.uint8) return tf.to_float( tf.transpose(tf.reshape(r[1:], [3, 32, 32]), [1, 2, 0])) / 255.0, tf.to_int32(r[0])
def EffectiveSampleSize(states, filter_beyond_lag=300, filter_threshold=0.05, center=True, normalize=True): """ESS computation for one single Tensor argument.""" def _axis_size(x, axis=None): """Get number of elements of `x` in `axis`, as type `x.dtype`.""" if axis is None: return tf.cast(tf.size(x), x.dtype) return tf.cast(tf.reduce_prod(tf.gather(tf.shape(x), axis)), x.dtype) with tf.name_scope("effective_sample_size_single_state", values=[states, filter_beyond_lag, filter_threshold]): states = tf.convert_to_tensor(states, name="states") dt = states.dtype # filter_beyond_lag == None ==> auto_corr is the full sequence. auto_corr = SanitizedAutoCorrelation(states, axis=0, center=center, normalize=normalize, max_lags=filter_beyond_lag) auto_corr = tf.reduce_mean(auto_corr, 1) if filter_threshold is not None: filter_threshold = tf.convert_to_tensor(filter_threshold, dtype=dt, name="filter_threshold") # Get a binary mask to zero out values of auto_corr below the threshold. # mask[i, ...] = 1 if auto_corr[j, ...] > threshold for all j <= i, # mask[i, ...] = 0, otherwise. # So, along dimension zero, the mask will look like [1, 1, ..., 0, 0,...] # Building step by step, # Assume auto_corr = [1, 0.5, 0.0, 0.3], and filter_threshold = 0.2. # Step 1: mask = [False, False, True, False] mask = tf.abs(auto_corr) < filter_threshold # Step 2: mask = [0, 0, 1, 1] mask = tf.cast(mask, dtype=dt) # Step 3: mask = [0, 0, 1, 2] mask = tf.cumsum(mask, axis=0) # Step 4: mask = [1, 1, 0, 0] mask = tf.maximum(1. - mask, 0.) auto_corr *= mask # With R[k] := auto_corr[k, ...], # ESS = N / {1 + 2 * Sum_{k=1}^N (N - k) / N * R[k]} # = N / {-1 + 2 * Sum_{k=0}^N (N - k) / N * R[k]} (since R[0] = 1) # approx N / {-1 + 2 * Sum_{k=0}^M (N - k) / N * R[k]} # where M is the filter_beyond_lag truncation point chosen above. # Get the factor (N - k) / N, and give it shape [M, 1,...,1], having total # ndims the same as auto_corr n = _axis_size(states, axis=0) k = tf.range(0., _axis_size(auto_corr, axis=0)) nk_factor = (n - k) / n if auto_corr.shape.ndims is not None: new_shape = [-1] + [1] * (auto_corr.shape.ndims - 1) else: new_shape = tf.concat( ([-1], tf.ones([tf.rank(auto_corr) - 1], dtype=tf.int32)), axis=0) nk_factor = tf.reshape(nk_factor, new_shape) #return tf.reduce_mean(n / (-1 + 2 * tf.reduce_sum(nk_factor * auto_corr, axis=0)), 0) return n / (1.0 + 2 * tf.reduce_sum( nk_factor[1:, Ellipsis] * auto_corr[1:, Ellipsis], axis=0))
def DecodeLabel(label): label = tf.decode_raw(label, tf.uint8) label = tf.reshape(label, []) return tf.to_int32(label)
def DecodeImage(image): image = tf.decode_raw(image, tf.uint8) image = tf.cast(image, tf.float32) image = tf.reshape(image, [28, 28, 1]) return image / 255.0
if activation == None: result = layer else: result = activation(layer, name='layer') return result def CalcMistake(labels, logits): return tf.abs(tf.subtract(labels, logits)) with tf.name_scope('Input'): xs = tf.placeholder(tf.float32, [None, stepSize, inputSize], name='inputX') ys = tf.placeholder(tf.float32, [None, stepSize, inputSize], name='inputY') with tf.name_scope('Layer01'): input01 = tf.reshape(xs, [-1, inputSize], name='dim2Input01') weight01 = GetWeight([inputSize, hiddenSize]) bias01 = GetBias([hiddenSize]) layer01 = GetLayer(input01, weight01, bias01, nLayer=1) with tf.name_scope('RnnLayer'): input02 = tf.reshape(layer01, [-1, stepSize, hiddenSize], name='dim3Input02') rnnFrame = tf.nn.rnn_cell.BasicLSTMCell(hiddenSize, forget_bias=1.0, state_is_tuple=True) theState = rnnFrame.zero_state(batch_size=batchSize, dtype=tf.float32) outputs, finalState = tf.nn.dynamic_rnn(rnnFrame, input02, initial_state=theState, time_major=False) with tf.name_scope('Layer02'): input03 = tf.reshape(outputs, [-1, hiddenSize], name='dim2Input03') weight02 = GetWeight([hiddenSize, outputSize]) bias02 = GetBias([outputSize]) prediction = GetLayer(input03, weight02, bias02, nLayer=2)
[None, TIME_STEP, INPUT_SIZE]) # shape(batch, 5, 1) tf_y = tf.placeholder(tf.float32, [None, TIME_STEP, INPUT_SIZE]) # input y # RNN rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=CELL_SIZE) init_s = rnn_cell.zero_state(batch_size=BATCH_SIZE, dtype=tf.float32) # very first hidden state outputs, final_s = tf.nn.dynamic_rnn( rnn_cell, # cell you have chosen tf_x, # input initial_state=init_s, # the initial hidden state time_major= False, # False: (batch, time step, input); True: (time step, batch, input) ) outs2D = tf.reshape( outputs, [-1, CELL_SIZE]) # reshape 3D output to 2D for fully connected layer net_outs2D = tf.layers.dense(outs2D, INPUT_SIZE) outs = tf.reshape(net_outs2D, [-1, TIME_STEP, INPUT_SIZE]) # reshape back to 3D loss = tf.losses.mean_squared_error(labels=tf_y, predictions=outs) # compute cost train_op = tf.train.AdamOptimizer(LR).minimize(loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) # initialize var in graph plt.figure(1, figsize=(12, 5)) plt.ion() # continuously plot
def test_reshape(self): input = tf.placeholder(shape=(4, 32, 32, 3), dtype=tf.float32) output = tf.reshape(input, shape=(4, 32 * 32 * 3)) self._test_conversion('reshape')
def loss(model, cartpoleUtil, t_interior, X_interior, t_terminal, X_terminal): ''' Compute total loss for training. Args: model: DGM model object t_interior: sampled time points in the interior of the function's domain X_interior: sampled space points in the interior of the function's domain t_terminal: sampled time points at terminal point (vector of terminal times) X_terminal: sampled space points at terminal time ''' # Loss term #1: PDE # compute function value and derivatives at current sampled points # \frac{\partial u}{\partial t}(t, x) + \Delta u(t, x) - \lambda \| \nabla u(t, x) \|^2 = 0 # => V_t + V_xx - lambda * L2_norm(V_x)^2 matmul, multiply, rowsum = getTFUtils() V = model(t_interior, X_interior) V_t = tf.gradients(V, t_interior)[0] print('V_t=%s' % V_t) # f = phi1 + phi2 const = tf.constant print('X_interior=%s' % X_interior) phi1 = tf.constant(0.5) * quadraticForm(X_interior, Q) A, B = cartpoleUtil.f(X_interior) print('A=%s' % A) print('B=%s' % B) V_x = tf.gradients(V, X_interior)[0] print('V_x=%s' % V_x) Bt_gradV = BtXgradV(B, V_x) print('Bt_gradV=%s' % Bt_gradV) phi2 = const(0.5) * tf.square(Bt_gradV) / const(R * 1.0) print('phi1=%s' % phi1) print('phi2=%s' % phi2) f = phi1 + phi2 # mu^T uinput = const(1.0 / R) * Bt_gradV inputs = tf.repeat(tf.reshape(uinput, (-1, 1)), repeats=D, axis=1) print('inputs=%s' % inputs) mu_t = A - multiply(inputs, B) V_xx = tf.gradients(V_x, X_interior)[0] print('V_t=%s' % V_t) print('f=%s' % f) print('mu_t=%s' % mu_t) print('V_x=%s' % V_x) print('snoise=%s' % snoise) print('V_xx=%s' % V_xx) mul = rowsum(multiply(mu_t, V_x)) diff_V = V_t + f + mul + 0.5 * (snoise**2) * tf.linalg.trace(V_xx) # compute average L2-norm of differential operator L1 = tf.reduce_mean(tf.square(diff_V)) # Loss term #2: boundary condition # no boundary condition for this problem # Loss term #3: initial/terminal condition target_terminal = u(X_terminal) fitted_terminal = model(t_terminal, X_terminal) L3 = tf.reduce_mean(tf.square(fitted_terminal - target_terminal)) return L1, L3
def test_flatten(self): input = tf.placeholder(shape=(4, 32, 32, 3), dtype=tf.float32) output = tf.reshape(input, shape=(4, -1)) self._test_conversion('flatten')
def add_metric_fn_inputs(params, cls_outputs, box_outputs, metric_fn_inputs, max_detection_points=anchors.MAX_DETECTION_POINTS): """Selects top-k predictions and adds the selected to metric_fn_inputs. Args: params: a parameter dictionary that includes `min_level`, `max_level`, `batch_size`, and `num_classes`. cls_outputs: an OrderDict with keys representing levels and values representing logits in [batch_size, height, width, num_anchors]. box_outputs: an OrderDict with keys representing levels and values representing box regression targets in [batch_size, height, width, num_anchors * 4]. metric_fn_inputs: a dictionary that will hold the top-k selections. max_detection_points: an integer specifing the maximum detection points to keep before NMS. Keep all anchors if max_detection_points <= 0. """ batch_size = params['batch_size'] num_classes = params['num_classes'] cls_outputs_all = [] box_outputs_all = [] # Concatenates class and box of all levels into one tensor. for level in range(params['min_level'], params['max_level'] + 1): if params['data_format'] == 'channels_first': cls_outputs[level] = tf.transpose(cls_outputs[level], [0, 2, 3, 1]) box_outputs[level] = tf.transpose(box_outputs[level], [0, 2, 3, 1]) cls_outputs_all.append(tf.reshape( cls_outputs[level], [batch_size, -1, num_classes])) box_outputs_all.append(tf.reshape(box_outputs[level], [batch_size, -1, 4])) cls_outputs_all = tf.concat(cls_outputs_all, 1) box_outputs_all = tf.concat(box_outputs_all, 1) if max_detection_points > 0: # Prune anchors and detections to only keep max_detection_points. # Due to some issues, top_k is currently slow in graph model. cls_outputs_all_reshape = tf.reshape(cls_outputs_all, [batch_size, -1]) _, cls_topk_indices = tf.math.top_k(cls_outputs_all_reshape, k=max_detection_points, sorted=False) indices = cls_topk_indices // num_classes classes = cls_topk_indices % num_classes cls_indices = tf.stack([indices, classes], axis=2) cls_outputs_all_after_topk = tf.gather_nd( cls_outputs_all, cls_indices, batch_dims=1) box_outputs_all_after_topk = tf.gather_nd( box_outputs_all, tf.expand_dims(indices, 2), batch_dims=1) else: # Keep all anchors, but for each anchor, just keep the max probablity for # each class. cls_outputs_idx = tf.math.argmax(cls_outputs_all, axis=-1) num_anchors = cls_outputs_all.shape[1] classes = cls_outputs_idx indices = tf.tile(tf.expand_dims(tf.range(num_anchors), axis=0), [batch_size, 1]) cls_outputs_all_after_topk = tf.reduce_max(cls_outputs_all, -1) box_outputs_all_after_topk = box_outputs_all metric_fn_inputs['cls_outputs_all'] = cls_outputs_all_after_topk metric_fn_inputs['box_outputs_all'] = box_outputs_all_after_topk metric_fn_inputs['indices_all'] = indices metric_fn_inputs['classes_all'] = classes
def __call__(self, images_saccader, images_classnet, num_times, is_training_saccader=False, is_training_classnet=False, policy="learned", stop_gradient_after_representation=False): logits, locations_t, best_locations_t, endpoints = Saccader.__call__( self, images_saccader, num_times, is_training=is_training_saccader, policy=policy, stop_gradient_after_representation= stop_gradient_after_representation) self.glimpse_shape_saccader = self.glimpse_shape image_size_saccader = images_saccader.shape.as_list()[1] image_size_classnet = images_classnet.shape.as_list()[1] if self.glimpse_shape_classnet[0] < 0: self.glimpse_shape_classnet = tuple([ int(image_size_classnet / image_size_saccader * self.glimpse_shape[0]) ] * 2) self.glimpse_shape = self.glimpse_shape_classnet images_glimpse_t = [] for locations in locations_t: images_glimpse = utils.extract_glimpse( images_classnet, size=self.glimpse_shape_classnet, offsets=locations) images_glimpse_t.append(images_glimpse) batch_size = tf.shape(images_classnet)[0] images_glimpse_t = tf.concat(images_glimpse_t, axis=0) variables_before = set(tf.global_variables()) reuse = True if self.var_list_classnet else False with tf.variable_scope(self.variable_scope_classnet, reuse=reuse): if self.classnet_type == "nasnet": classnet_config = nasnet.large_imagenet_config() classnet_config.use_aux_head = 0 classnet_config.drop_path_keep_prob = 1.0 with slim.arg_scope(nasnet.nasnet_large_arg_scope()): classnet_logits, endpoints_ = nasnet.build_nasnet_large( images_glimpse_t, self.num_classes, is_training=is_training_classnet, config=classnet_config) elif self.classnet_type == "resnet_v2_50": network = nets_factory.get_network_fn( "resnet_v2_50", self.num_classes, is_training=is_training_classnet) classnet_logits, endpoints_ = network(images_glimpse_t) endpoints["classnet"] = endpoints_ variables_after = set(tf.global_variables()) logits_t = tf.reshape(classnet_logits, (num_times, batch_size, -1)) logits = tf.reduce_mean(logits_t, axis=0) if not reuse: self.var_list_saccader = self.var_list_classification + self.var_list_location self.var_list_classnet = [ v for v in list(variables_after - variables_before) if "global_step" not in v.op.name ] self.var_list.extend(self.var_list_classnet) self.init_op = tf.variables_initializer(var_list=self.var_list) return logits, locations_t, best_locations_t, endpoints
def tf_nn(nx, nt, num_hidden_neurons, activations, num_iter=100000, eta=0.01): tf.reset_default_graph() # Set a seed to ensure getting the same results from every run tf.set_random_seed(4155) nx = 10 nt = 10 x_np = np.linspace(0, 1, nx) t_np = np.linspace(0, 1, nt) X, T = np.meshgrid(x_np, t_np) x = X.ravel() t = T.ravel() ## The construction phase zeros = tf.reshape(tf.convert_to_tensor(np.zeros(x.shape)), shape=(-1, 1)) x = tf.reshape(tf.convert_to_tensor(x), shape=(-1, 1)) t = tf.reshape(tf.convert_to_tensor(t), shape=(-1, 1)) pts = tf.concat([x, t], 1) # input layer num_hidden_layers = len(num_hidden_neurons) X = tf.convert_to_tensor(X) T = tf.convert_to_tensor(T) # Define layer structure with tf.name_scope('dnn'): num_hidden_layers = np.size(num_hidden_neurons) previous_layer = pts for l in range(num_hidden_layers): current_layer = tf.layers.dense(previous_layer, num_hidden_neurons[l], name=('hidden%d' % (l + 1)), activation=activations[l]) previous_layer = current_layer dnn_output = tf.layers.dense(previous_layer, 1, name='output', activation=None) # Define loss function # trial function satisfies boundary conditions and initial condition with tf.name_scope('loss'): g_t = (1 - t) * u(x) + x * (1 - x) * t * dnn_output g_t_d2x = tf.gradients(tf.gradients(g_t, x), x) g_t_dt = tf.gradients(g_t, t) loss = tf.losses.mean_squared_error(zeros, g_t_dt[0] - g_t_d2x[0]) # Define optimizer with tf.name_scope('train'): optimizer = tf.train.AdamOptimizer(eta) training_op = optimizer.minimize(loss) init = tf.global_variables_initializer() g_e = u_e(x, t) # g_dnn = None with tf.Session() as sess: init.run() for i in range(num_iter): sess.run(training_op) if i % 1000 == 0: print(loss.eval()) # g_e = g_e.eval() # g_dnn = g_t.eval() # # plot_g_e = g_e.eval().reshape((nt, nx)) # plot_g_dnn = g_t.eval().reshape((nt, nx)) # # plt.plot(x_np, plot_g_e[int(nt/2), :]) # plt.plot(x_np, plot_g_dnn[int(nt/2), :]) # plt.axis([0,1,0,0.1]) # plt.pause(0.001) # plt.clf() g_e = g_e.eval() # analytical solution g_dnn = g_t.eval() # NN solution diff = np.abs(g_e - g_dnn) print( 'Max absolute difference between analytical solution and TensorFlow DNN ', np.max(diff)) G_e = g_e.reshape((nt, nx)) G_dnn = g_dnn.reshape((nt, nx)) diff = diff.reshape((nt, nx)) # Plot the results X, T = np.meshgrid(x_np, t_np) fig = plt.figure(figsize=(10, 10)) ax = fig.gca(projection='3d') ax.set_title('Solution from the deep neural network w/ %d layer' % len(num_hidden_neurons)) s = ax.plot_surface(X, T, G_dnn, linewidth=0, antialiased=False, cmap=cm.viridis) ax.set_ylabel('Time $t$') ax.set_xlabel('Position $x$') fig = plt.figure(figsize=(10, 10)) ax = fig.gca(projection='3d') ax.set_title('Analytical solution') s = ax.plot_surface(X, T, G_e, linewidth=0, antialiased=False, cmap=cm.viridis) ax.set_ylabel('Time $t$') ax.set_xlabel('Position $x$') fig = plt.figure(figsize=(10, 10)) ax = fig.gca(projection='3d') ax.set_title('Difference') s = ax.plot_surface(X, T, diff, linewidth=0, antialiased=False, cmap=cm.viridis) ax.set_ylabel('Time $t$') ax.set_xlabel('Position $x$') ## Take some 3D slices indx1 = 0 indx2 = int(nt / 2) indx3 = nt - 1 t1 = t_np[indx1] t2 = t_np[indx2] t3 = t_np[indx3] # Slice the results from the DNN res1 = G_dnn[indx1, :] res2 = G_dnn[indx2, :] res3 = G_dnn[indx3, :] # Slice the analytical results res_analytical1 = G_e[indx1, :] res_analytical2 = G_e[indx2, :] res_analytical3 = G_e[indx3, :] # Plot the slices plt.figure(figsize=(10, 10)) plt.title("Computed solutions at time = %g" % t1) plt.plot(x_np, res1) plt.plot(x_np, res_analytical1) plt.legend(['dnn', 'analytical']) plt.figure(figsize=(10, 10)) plt.title("Computed solutions at time = %g" % t2) plt.plot(x_np, res2) plt.plot(x_np, res_analytical2) plt.legend(['dnn', 'analytical']) plt.figure(figsize=(10, 10)) plt.title("Computed solutions at time = %g" % t3) plt.plot(x_np, res3) plt.plot(x_np, res_analytical3) plt.legend(['dnn', 'analytical']) plt.show() return diff
def test_padded_image_result_dict(self): input_data_fields = fields.InputDataFields detection_fields = fields.DetectionResultFields key = tf.constant([str(i) for i in range(2)]) detection_boxes = np.array( [[[0., 0., 1., 1.]], [[0.0, 0.0, 0.5, 0.5]]], dtype=np.float32) detection_keypoints = np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]], dtype=np.float32) detections = { detection_fields.detection_boxes: tf.constant(detection_boxes), detection_fields.detection_scores: tf.constant([[1.], [1.]]), detection_fields.detection_classes: tf.constant([[1], [2]]), detection_fields.num_detections: tf.constant([1, 1]), detection_fields.detection_keypoints: tf.tile(tf.reshape(tf.constant(detection_keypoints), shape=[1, 1, 3, 2]), multiples=[2, 1, 1, 1]) } gt_boxes = detection_boxes groundtruth = { input_data_fields.groundtruth_boxes: tf.constant(gt_boxes), input_data_fields.groundtruth_classes: tf.constant([[1.], [1.]]), input_data_fields.groundtruth_keypoints: tf.tile(tf.reshape(tf.constant(detection_keypoints), shape=[1, 1, 3, 2]), multiples=[2, 1, 1, 1]) } image = tf.zeros((2, 100, 100, 3), dtype=tf.float32) true_image_shapes = tf.constant([[100, 100, 3], [50, 100, 3]]) original_image_spatial_shapes = tf.constant([[200, 200], [150, 300]]) result = eval_util.result_dict_for_batched_example( image, key, detections, groundtruth, scale_to_absolute=True, true_image_shapes=true_image_shapes, original_image_spatial_shapes=original_image_spatial_shapes, max_gt_boxes=tf.constant(1)) with self.test_session() as sess: result = sess.run(result) self.assertAllEqual( [[[0., 0., 200., 200.]], [[0.0, 0.0, 150., 150.]]], result[input_data_fields.groundtruth_boxes]) self.assertAllClose( [[[[0., 0.], [100., 100.], [200., 200.]]], [[[0., 0.], [150., 150.], [300., 300.]]]], result[input_data_fields.groundtruth_keypoints]) # Predictions from the model are not scaled. self.assertAllEqual( [[[0., 0., 200., 200.]], [[0.0, 0.0, 75., 150.]]], result[detection_fields.detection_boxes]) self.assertAllClose([[[[0., 0.], [100., 100.], [200., 200.]]], [[[0., 0.], [75., 150.], [150., 300.]]]], result[detection_fields.detection_keypoints])
def multilevel_roi_align(features, boxes, box_levels, output_size, num_samples_per_cell_y=1, num_samples_per_cell_x=1, align_corners=False, extrapolation_value=0.0, scope=None): """Applies RoI Align op and returns feature for boxes. Given multiple features maps indexed by different levels, and a set of boxes where each box is mapped to a certain level, this function selectively crops and resizes boxes from the corresponding feature maps. We follow the RoI Align technique in https://arxiv.org/pdf/1703.06870.pdf figure 3. Specifically, each box is subdivided uniformly into a grid consisting of output_size[0] x output_size[1] rectangular cells. Within each cell we select `num_points` points uniformly and compute feature values using bilinear interpolation. Finally, we average pool the interpolated values in each cell to obtain a [output_size[0], output_size[1], channels] feature. If `align_corners` is true, sampling points are uniformly spread such that corner points exactly overlap corners of the boxes. In this function we also follow the convention of treating feature pixels as point objects with no spatial extent. Args: features: A list of 4D float tensors of shape [batch_size, max_height, max_width, channels] containing features. Note that each feature map must have the same number of channels. boxes: A 3D float tensor of shape [batch_size, num_boxes, 4] containing boxes of the form [ymin, xmin, ymax, xmax] in normalized coordinates. box_levels: A 3D int32 tensor of shape [batch_size, num_boxes] representing the feature level index for each box. output_size: An list of two integers [size_y, size_x] indicating the output feature size for each box. num_samples_per_cell_y: Number of grid points to sample along y axis in each cell. num_samples_per_cell_x: Number of grid points to sample along x axis in each cell. align_corners: Whether to align the corner grid points exactly with box corners. extrapolation_value: a float value to use for extrapolation. scope: Scope name to use for this op. Returns: A 5D float tensor of shape [batch_size, num_boxes, output_size[0], output_size[1], channels] representing the cropped features. """ with tf.name_scope(scope, 'MultiLevelRoIAlign'): features, true_feature_shapes = pad_to_max_size(features) batch_size = tf.shape(features)[0] num_levels = features.get_shape().as_list()[1] max_feature_height = tf.shape(features)[2] max_feature_width = tf.shape(features)[3] num_filters = features.get_shape().as_list()[4] num_boxes = tf.shape(boxes)[1] # Convert boxes to absolute co-ordinates. true_feature_shapes = tf.cast(true_feature_shapes, dtype=boxes.dtype) true_feature_shapes = tf.gather(true_feature_shapes, box_levels) boxes *= tf.concat([true_feature_shapes - 1] * 2, axis=-1) size_y = output_size[0] * num_samples_per_cell_y size_x = output_size[1] * num_samples_per_cell_x box_grid_y, box_grid_x = box_grid_coordinate_vectors( boxes, size_y=size_y, size_x=size_x, align_corners=align_corners) (feature_grid_y0, feature_grid_x0, feature_grid_y1, feature_grid_x1) = feature_grid_coordinate_vectors(box_grid_y, box_grid_x) feature_grid_y = tf.reshape( tf.stack([feature_grid_y0, feature_grid_y1], axis=3), [batch_size, num_boxes, -1]) feature_grid_x = tf.reshape( tf.stack([feature_grid_x0, feature_grid_x1], axis=3), [batch_size, num_boxes, -1]) feature_coordinates = ravel_indices(feature_grid_y, feature_grid_x, num_levels, max_feature_height, max_feature_width, box_levels) valid_indices = _valid_indicator(feature_grid_y, feature_grid_x, true_feature_shapes) feature_coordinates = tf.where(valid_indices, feature_coordinates, -1 * tf.ones_like(feature_coordinates)) flattened_features = tf.reshape(features, [-1, num_filters]) flattened_feature_values = _gather_valid_indices(flattened_features, feature_coordinates, extrapolation_value) features_per_box = tf.reshape( flattened_feature_values, [batch_size, num_boxes, size_y * 2, size_x * 2, num_filters]) # Cast tensors into dtype of features. box_grid_y = tf.cast(box_grid_y, dtype=features_per_box.dtype) box_grid_x = tf.cast(box_grid_x, dtype=features_per_box.dtype) feature_grid_y0 = tf.cast(feature_grid_y0, dtype=features_per_box.dtype) feature_grid_x0 = tf.cast(feature_grid_x0, dtype=features_per_box.dtype) # RoI Align operation is a bilinear interpolation of four # neighboring feature points f0, f1, f2, and f3 onto point y, x given by # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T # [f10, f11]] # # Unrolling the matrix multiplies gives us: # f(y, x) = (hy * hx) f00 + (hy * lx) f01 + (ly * hx) f10 + (lx * ly) f11 # f(y, x) = w00 * f00 + w01 * f01 + w10 * f10 + w11 * f11 # # This can be computed by applying pointwise multiplication and sum_pool in # a 2x2 window. ly = box_grid_y - feature_grid_y0 lx = box_grid_x - feature_grid_x0 hy = 1.0 - ly hx = 1.0 - lx kernel_y = tf.reshape( tf.stack([hy, ly], axis=3), [batch_size, num_boxes, size_y * 2, 1]) kernel_x = tf.reshape( tf.stack([hx, lx], axis=3), [batch_size, num_boxes, 1, size_x * 2]) # Multiplier 4 is to make tf.nn.avg_pool behave like sum_pool. interpolation_kernel = kernel_y * kernel_x * 4 # Interpolate the gathered features with computed interpolation kernels. features_per_box *= tf.expand_dims(interpolation_kernel, axis=4), features_per_box = tf.reshape( features_per_box, [batch_size * num_boxes, size_y * 2, size_x * 2, num_filters]) # This combines the two pooling operations - sum_pool to perform bilinear # interpolation and avg_pool to pool the values in each bin. features_per_box = tf.nn.avg_pool( features_per_box, [1, num_samples_per_cell_y * 2, num_samples_per_cell_x * 2, 1], [1, num_samples_per_cell_y * 2, num_samples_per_cell_x * 2, 1], 'VALID') features_per_box = tf.reshape( features_per_box, [batch_size, num_boxes, output_size[0], output_size[1], num_filters]) return features_per_box
#function to declare easily the bias only by shape def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) #input variable x = tf.placeholder(tf.float32, [None, vectorSize]) #keep probability to change from dropout 0.50 to 1.0 in validation and test keep_prob = tf.placeholder(tf.float32) #expected outputs variable y_ = tf.placeholder(tf.float32, [None, labelSize]) #arrange the tensor as an image (1*31029) 1 channel x_image0 = tf.reshape(x, [-1, 1, vectorSize, 1]) x_image = tf.transpose(x_image0, perm=[0, 3, 2, 1]) #arrange the tensor into 1 channels (1*31029) #1 LAYER************************************************************************************* #1 Convolutional Layer Explicit for regularization of the weights #weigth first layer 1 input channels, 12 output channels, 1x21 filter window size W_conv1 = weight_variable([1, wd1, 1, w1]) #bias declaration the size has to be the same as the output channels 12 b_conv1 = bias_variable([w1]) #convolution (input weights) moving 1 step each time with a relu h_conv1 = tf.nn.relu( tf.nn.conv2d(x_image, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1) #max pooling with a 148 width window size, moving 148 in width by step h_pool1 = tf.nn.max_pool(h_conv1,
def define_loss(self, features, outputs): """Obtain the loss of the model.""" # Intents. # Shape: (batch_size, max_num_intents + 1). intent_logits = outputs["logit_intent_status"] # Shape: (batch_size, max_num_intents). intent_labels = features["intent_status"] # Add label corresponding to NONE intent. num_active_intents = tf.expand_dims(tf.reduce_sum(intent_labels, axis=1), axis=1) none_intent_label = tf.ones_like( num_active_intents) - num_active_intents # Shape: (batch_size, max_num_intents + 1). onehot_intent_labels = tf.concat([none_intent_label, intent_labels], axis=1) intent_loss = tf.losses.softmax_cross_entropy( onehot_intent_labels, intent_logits, weights=features["is_real_example"]) # Requested slots. # Shape: (batch_size, max_num_slots). requested_slot_logits = outputs["logit_req_slot_status"] requested_slot_labels = features["req_slot_status"] max_num_requested_slots = requested_slot_labels.get_shape().as_list( )[-1] weights = tf.sequence_mask(features["req_slot_num"], maxlen=max_num_requested_slots) # Sigmoid cross entropy is used because more than one slots can be requested # in a single utterance. requested_slot_loss = tf.losses.sigmoid_cross_entropy( requested_slot_labels, requested_slot_logits, weights=weights) # Categorical slot status. # Shape: (batch_size, max_num_cat_slots, 3). cat_slot_status_logits = outputs["logit_cat_slot_status"] cat_slot_status_labels = features["cat_slot_status"] max_num_cat_slots = cat_slot_status_labels.get_shape().as_list()[-1] one_hot_labels = tf.one_hot(cat_slot_status_labels, 3, dtype=tf.int32) cat_weights = tf.sequence_mask(features["cat_slot_num"], maxlen=max_num_cat_slots, dtype=tf.float32) cat_slot_status_loss = tf.losses.softmax_cross_entropy( tf.reshape(one_hot_labels, [-1, 3]), tf.reshape(cat_slot_status_logits, [-1, 3]), weights=tf.reshape(cat_weights, [-1])) # Categorical slot values. # Shape: (batch_size, max_num_cat_slots, max_num_slot_values). cat_slot_value_logits = outputs["logit_cat_slot_value"] cat_slot_value_labels = features["cat_slot_value"] max_num_slot_values = cat_slot_value_logits.get_shape().as_list()[-1] one_hot_labels = tf.one_hot(cat_slot_value_labels, max_num_slot_values, dtype=tf.int32) # Zero out losses for categorical slot value when the slot status is not # active. cat_loss_weight = tf.cast( tf.equal(cat_slot_status_labels, data_utils.STATUS_ACTIVE), tf.float32) cat_slot_value_loss = tf.losses.softmax_cross_entropy( tf.reshape(one_hot_labels, [-1, max_num_slot_values]), tf.reshape(cat_slot_value_logits, [-1, max_num_slot_values]), weights=tf.reshape(cat_weights * cat_loss_weight, [-1])) # Non-categorical slot status. # Shape: (batch_size, max_num_noncat_slots, 3). noncat_slot_status_logits = outputs["logit_noncat_slot_status"] noncat_slot_status_labels = features["noncat_slot_status"] max_num_noncat_slots = noncat_slot_status_labels.get_shape().as_list( )[-1] one_hot_labels = tf.one_hot(noncat_slot_status_labels, 3, dtype=tf.int32) noncat_weights = tf.sequence_mask(features["noncat_slot_num"], maxlen=max_num_noncat_slots, dtype=tf.float32) # Logits for padded (invalid) values are already masked. noncat_slot_status_loss = tf.losses.softmax_cross_entropy( tf.reshape(one_hot_labels, [-1, 3]), tf.reshape(noncat_slot_status_logits, [-1, 3]), weights=tf.reshape(noncat_weights, [-1])) # Non-categorical slot spans. # Shape: (batch_size, max_num_noncat_slots, max_num_tokens). span_start_logits = outputs["logit_noncat_slot_start"] span_start_labels = features["noncat_slot_value_start"] max_num_tokens = span_start_logits.get_shape().as_list()[-1] onehot_start_labels = tf.one_hot(span_start_labels, max_num_tokens, dtype=tf.int32) # Shape: (batch_size, max_num_noncat_slots, max_num_tokens). span_end_logits = outputs["logit_noncat_slot_end"] span_end_labels = features["noncat_slot_value_end"] onehot_end_labels = tf.one_hot(span_end_labels, max_num_tokens, dtype=tf.int32) # Zero out losses for non-categorical slot spans when the slot status is not # active. noncat_loss_weight = tf.cast( tf.equal(noncat_slot_status_labels, data_utils.STATUS_ACTIVE), tf.float32) span_start_loss = tf.losses.softmax_cross_entropy( tf.reshape(onehot_start_labels, [-1, max_num_tokens]), tf.reshape(span_start_logits, [-1, max_num_tokens]), weights=tf.reshape(noncat_weights * noncat_loss_weight, [-1])) span_end_loss = tf.losses.softmax_cross_entropy( tf.reshape(onehot_end_labels, [-1, max_num_tokens]), tf.reshape(span_end_logits, [-1, max_num_tokens]), weights=tf.reshape(noncat_weights * noncat_loss_weight, [-1])) losses = { "intent_loss": intent_loss, "requested_slot_loss": requested_slot_loss, "cat_slot_status_loss": cat_slot_status_loss, "cat_slot_value_loss": cat_slot_value_loss, "noncat_slot_status_loss": noncat_slot_status_loss, "span_start_loss": span_start_loss, "span_end_loss": span_end_loss, } for loss_name, loss in losses.items(): tf.summary.scalar(loss_name, loss) return sum(losses.values()) / len(losses)
def decode_image(image): # Normalize from [0, 255] to [0.0, 1.0] image = tf.decode_raw(image, tf.uint8) image = tf.cast(image, tf.float32) image = tf.reshape(image, [784]) return image / 255.0
#delY formation del_Y = -tf.divide(Y, Y_pred) #del_W3 Calculation #delz3_delW3 formation delz3_delW3_elem = tf.concat([H2, tf.zeros_like(H2)], 1) for k in range(8): delz3_delW3_elem = tf.concat([delz3_delW3_elem, tf.zeros_like(H2)], 1) delz3_delW3_list = [] for k in range(10): delz3_delW3_list.append(tf.roll(delz3_delW3_elem, k, 1)) delz3_delW3 = tf.stack(delz3_delW3_list, 2) #dely_delz3 formation temp = -tf.matmul(Y_pred, Y_pred, transpose_b=True) temp_diag = tf.reshape(tf.diag(Y_pred), [Y_pred.shape[0], Y_pred.shape[0]]) dely_delz3 = tf.add(temp, temp_diag) tempz3 = tf.matmul(dely_delz3, del_Y) del_W3 = tf.reshape(tf.matmul(delz3_delW3, tempz3), W3.shape) #del_H2 Calculation del_H2 = tf.reshape(tf.matmul(W3, tempz3), H2.shape) #del_W3_0 Calculation del_W3_0 = tempz3 #%-----------Backward Propagation for Second Hidden Layer--------------- #del_W2 Calculation
def decode_label(label): label = tf.decode_raw(label, tf.uint8) # tf.string -> [tf.uint8] label = tf.reshape(label, []) # label is a scalar return tf.to_int32(label)
def _build_loss(self): """Builds the loss tensor, to be minimized by the optimizer.""" self.reader = reader.DataReader(self.data_dir, self.batch_size, self.img_height, self.img_width, SEQ_LENGTH, 1, self.file_extension, self.random_scale_crop, reader.FLIP_RANDOM, self.random_color, self.imagenet_norm, self.shuffle, self.input_file, queue_size=self.queue_size) (self.image_stack, self.image_stack_norm, self.seg_stack, self.intrinsic_mat, _) = self.reader.read_data() if self.learn_intrinsics: self.intrinsic_mat = None if self.intrinsic_mat is None and not self.learn_intrinsics: raise RuntimeError( 'Could not read intrinsic matrix. Turn learn_intrinsics on to learn it instead of loading it.' ) self.export('self.image_stack', self.image_stack) object_masks = [] for i in range(self.batch_size): object_ids = tf.unique(tf.reshape(self.seg_stack[i], [-1]))[0] object_masks_i = [] for j in range(SEQ_LENGTH): current_seg = self.seg_stack[i, :, :, j * 3] # (H, W) def process_obj_mask(obj_id): """Create a mask for obj_id, skipping the background mask.""" mask = tf.logical_and( tf.equal(current_seg, obj_id), # pylint: disable=cell-var-from-loop tf.not_equal(tf.cast(0, tf.uint8), obj_id)) # Leave out vert small masks, that are most often errors. size = tf.reduce_sum(tf.to_int32(mask)) mask = tf.logical_and(mask, tf.greater(size, MIN_OBJECT_AREA)) if not self.boxify: return mask # Complete the mask to its bounding box. binary_obj_masks_y = tf.reduce_any(mask, axis=1, keepdims=True) binary_obj_masks_x = tf.reduce_any(mask, axis=0, keepdims=True) return tf.logical_and(binary_obj_masks_y, binary_obj_masks_x) object_mask = tf.map_fn( # (N, H, W) process_obj_mask, object_ids, dtype=tf.bool) object_mask = tf.reduce_any(object_mask, axis=0) object_masks_i.append(object_mask) object_masks.append(tf.stack(object_masks_i, axis=-1)) self.seg_stack = tf.to_float(tf.stack(object_masks, axis=0)) tf.summary.image('Masks', self.seg_stack) with tf.variable_scope(DEPTH_SCOPE): # Organized by ...[i][scale]. Note that the order is flipped in variables in build_loss() below. self.disp = {} self.depth = {} # Parabolic rampup of he noise over LAYER_NORM_NOISE_RAMPUP_STEPS steps. # We stop at 0.5 because this is the value above which the multiplicative # noise we use can become negative. Further experimentation is needed to # find if non-negativity is indeed needed. noise_stddev = 0.5 * tf.square( tf.minimum( tf.to_float(self.global_step) / float(LAYER_NORM_NOISE_RAMPUP_STEPS), 1.0)) def _normalizer_fn(x, is_train, name='bn'): return randomized_layer_normalization.normalize( x, is_train=is_train, name=name, stddev=noise_stddev) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): for i in range(SEQ_LENGTH): image = self.image_stack_norm[:, :, :, 3 * i:3 * (i + 1)] self.depth[ i] = depth_prediction_net.depth_prediction_resnet18unet( image, True, self.weight_reg, _normalizer_fn) self.disp[i] = 1.0 / self.depth[i] with tf.name_scope('compute_loss'): self.reconstr_loss = 0 self.smooth_loss = 0 self.ssim_loss = 0 self.depth_consistency_loss = 0 # Smoothness. if self.smooth_weight > 0: for i in range(SEQ_LENGTH): disp_smoothing = self.disp[i] # Perform depth normalization, dividing by the mean. mean_disp = tf.reduce_mean(disp_smoothing, axis=[1, 2, 3], keep_dims=True) disp_input = disp_smoothing / mean_disp self.smooth_loss += _depth_smoothness( disp_input, self.image_stack[:, :, :, 3 * i:3 * (i + 1)]) self.rot_loss = 0.0 self.trans_loss = 0.0 def add_result_to_loss_and_summaries(endpoints, i, j): tf.summary.image( 'valid_mask%d%d' % (i, j), tf.expand_dims(endpoints['depth_proximity_weight'], -1)) self.depth_consistency_loss += endpoints['depth_error'] self.reconstr_loss += endpoints['rgb_error'] self.ssim_loss += 0.5 * endpoints['ssim_error'] self.rot_loss += endpoints['rotation_error'] self.trans_loss += endpoints['translation_error'] self.motion_smoothing = 0.0 with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): for i in range(SEQ_LENGTH - 1): j = i + 1 depth_i = self.depth[i][:, :, :, 0] depth_j = self.depth[j][:, :, :, 0] image_j = self.image_stack[:, :, :, 3 * j:3 * (j + 1)] image_i = self.image_stack[:, :, :, i * 3:(i + 1) * 3] # We select a pair of consecutive images (and their respective predicted depth maps). # Now we have the network predict a motion field that connects the two. # We feed the pair of images into the network, once in forward order and then in reverse order. # The results are fed into the loss calculation. # The following losses are calculated: # - RGB and SSIM photometric consistency. # - Cycle consistency of rotations and translations for every pixel. # - L1 smoothness of the disparity and the motion field. # - Depth consistency rot, trans, trans_res, mat = motion_prediction_net.motion_field_net( images=tf.concat([image_i, image_j], axis=-1), weight_reg=self.weight_reg) inv_rot, inv_trans, inv_trans_res, inv_mat = ( motion_prediction_net.motion_field_net( images=tf.concat([image_j, image_i], axis=-1), weight_reg=self.weight_reg)) if self.learn_intrinsics: intrinsic_mat = 0.5 * (mat + inv_mat) else: intrinsic_mat = self.intrinsic_mat[:, 0, :, :] def dilate(x): # Dilation by n pixels is roughtly max pooling by 2 * n + 1. p = self.foreground_dilation * 2 + 1 return tf.nn.max_pool(x, [1, p, p, 1], [1] * 4, 'SAME') trans += trans_res * dilate(self.seg_stack[:, :, :, j:j + 1]) inv_trans += inv_trans_res * dilate( self.seg_stack[:, :, :, i:i + 1]) tf.summary.image('trans%d%d' % (i, i + 1), trans) tf.summary.image('trans%d%d' % (i + 1, i), inv_trans) tf.summary.image('trans_res%d%d' % (i + 1, i), inv_trans_res) tf.summary.image('trans_res%d%d' % (i, i + 1), trans_res) self.motion_smoothing += _smoothness(trans) self.motion_smoothing += _smoothness(inv_trans) tf.summary.scalar( 'trans_stdev', tf.sqrt(0.5 * tf.reduce_mean( tf.square(trans) + tf.square(inv_trans)))) transformed_depth_j = transform_depth_map.using_motion_vector( depth_j, trans, rot, intrinsic_mat) add_result_to_loss_and_summaries( consistency_losses.rgbd_and_motion_consistency_loss( transformed_depth_j, image_j, depth_i, image_i, rot, trans, inv_rot, inv_trans), i, j) transformed_depth_i = transform_depth_map.using_motion_vector( depth_i, inv_trans, inv_rot, intrinsic_mat) add_result_to_loss_and_summaries( consistency_losses.rgbd_and_motion_consistency_loss( transformed_depth_i, image_i, depth_j, image_j, inv_rot, inv_trans, rot, trans), j, i) # Build the total loss as composed of L1 reconstruction, SSIM, smoothing # and object size constraint loss as appropriate. self.reconstr_loss *= self.reconstr_weight self.export('self.reconstr_loss', self.reconstr_loss) self.total_loss = self.reconstr_loss if self.smooth_weight > 0: self.smooth_loss *= self.smooth_weight self.total_loss += self.smooth_loss self.export('self.smooth_loss', self.smooth_loss) if self.ssim_weight > 0: self.ssim_loss *= self.ssim_weight self.total_loss += self.ssim_loss self.export('self.ssim_loss', self.ssim_loss) if self.motion_smoothing_weight > 0: self.motion_smoothing *= self.motion_smoothing_weight self.total_loss += self.motion_smoothing self.export('self.motion_sm_loss', self.motion_smoothing) if self.depth_consistency_loss_weight: self.depth_consistency_loss *= self.depth_consistency_loss_weight self.total_loss += self.depth_consistency_loss self.export('self.depth_consistency_loss', self.depth_consistency_loss) self.rot_loss *= self.rotation_consistency_weight self.trans_loss *= self.translation_consistency_weight self.export('rot_loss', self.rot_loss) self.export('trans_loss', self.trans_loss) self.total_loss += self.rot_loss self.total_loss += self.trans_loss self.export('self.total_loss', self.total_loss)
def _split_heads(x, length, num_heads, depth): """Split the last dimension into (num_heads, depth) Input shape: (bs, length, num_heads * depth) Output shape: (bs, num_heads, length, depth)""" x = tf.reshape(x, (-1, length, num_heads, depth)) return tf.transpose(x, perm=[0, 2, 1, 3])