def prune_completely_outside_window(boxlist, window, scope=None): """Prunes bounding boxes that fall completely outside of the given window. The function clip_to_window prunes bounding boxes that fall completely outside the window, but also clips any bounding boxes that partially overflow. This function does not clip partially overflowing boxes. Args: boxlist: a BoxList holding M_in boxes. window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax] of the window scope: name scope. Returns: pruned_boxlist: a new BoxList with all bounding boxes partially or fully in the window. valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes in the input tensor. """ with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'): y_min, x_min, y_max, x_max = tf.split( value=boxlist.get(), num_or_size_splits=4, axis=1) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) coordinate_violations = tf.concat([ tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max), tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min) ], 1) valid_indices = tf.reshape( tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1]) return gather(boxlist, valid_indices), valid_indices
def _get_input_filter(width, width_threshold, length, length_threshold): """Boolean op for discarding input data based on string or image size Input: width : Tensor representing the image width width_threshold : Python numerical value (or None) representing the maximum allowable input image width length : Tensor representing the ground truth string length length_threshold : Python numerical value (or None) representing the maximum allowable input string length Returns: keep_input : Boolean Tensor indicating whether to keep a given input with the specified image width and string length """ keep_input = None if width_threshold!=None: keep_input = tf.less_equal(width, width_threshold) if length_threshold!=None: length_filter = tf.less_equal(length, length_threshold) if keep_input==None: keep_input = length_filter else: keep_input = tf.logical_and( keep_input, length_filter) if keep_input==None: keep_input = True else: keep_input = tf.reshape( keep_input, [] ) # explicitly make a scalar return keep_input
def get_mask(gt, num_classes, ignore_label): less_equal_class = tf.less_equal(gt, num_classes-1) not_equal_ignore = tf.not_equal(gt, ignore_label) mask = tf.logical_and(less_equal_class, not_equal_ignore) indices = tf.squeeze(tf.where(mask), 1) return indices
def getReward_touch(objCoordinates, sampled_locs, numObjsPresented, objSize, batch_size): # preallocate for the reward corner = tf.zeros((2,), dtype=tf.float32, name=None) # reward = np.zeros(batch_size) # loop over all examples in the batch # for b in xrange(batch_size): b = 0 objCoords_b = objCoordinates[b,:,:] sampled_locs_b = sampled_locs[b,:,:] numObjsPres_b = numObjsPresented[b] nObjTouched = 0 # for the ith-example in the batch, loop over all object for j in xrange(maxNumObj): objCoords_cur = objCoords_b[j,:] nTimesObjTouched = 0 # for the j-th objects, loop over all glimpses to determine if it is fixated for i in xrange(nGlimpses): sampledCoord_cur = toMnistCoordinates_tf(sampled_locs_b[i,:], img_size) l2Diff_obj = l2distance(objCoords_cur, sampledCoord_cur) l2Diff_corner = l2distance(corner, sampledCoord_cur) isTouchingObj = tf.less_equal(l2Diff_obj, objSize) isNotTouchingCorner = tf.greater_equal(l2Diff_corner, objSize) # true if the current glimpse is fixated on an object tempTouchFlag = tf.cast(tf.logical_and(isTouchingObj, isNotTouchingCorner), tf.int32) nTimesObjTouched = nTimesObjTouched + tempTouchFlag # for the b-th example in the batch, if all objects are touched, then reward = 1, else reward = 0 nObjTouched = nObjTouched + tf.cast(tf.greater_equal(nTimesObjTouched,1), tf.int32) R_bth = tf.equal(nObjTouched, tf.cast(numObjsPres_b, tf.int32)) return R_bth
def ImageSample(inputs, borderMode='repeat'): """ Sample the images using the given coordinates, by bilinear interpolation. This was described in the paper: `Spatial Transformer Networks <http://arxiv.org/abs/1506.02025>`_. This is equivalent to `torch.nn.functional.grid_sample`, up to some non-trivial coordinate transformation. This implementation returns pixel value at pixel (1, 1) for a floating point coordinate (1.0, 1.0). Note that this may not be what you need. Args: inputs (list): [images, coords]. images has shape NHWC. coords has shape (N, H', W', 2), where each pair of the last dimension is a (y, x) real-value coordinate. borderMode: either "repeat" or "constant" (zero-filled) Returns: tf.Tensor: a tensor named ``output`` of shape (N, H', W', C). """ log_deprecated("ImageSample", "Please implement it in your own code instead!", "2018-12-01") image, mapping = inputs assert image.get_shape().ndims == 4 and mapping.get_shape().ndims == 4 input_shape = image.get_shape().as_list()[1:] assert None not in input_shape, \ "Images in ImageSample layer must have fully-defined shape" assert borderMode in ['repeat', 'constant'] orig_mapping = mapping mapping = tf.maximum(mapping, 0.0) lcoor = tf.floor(mapping) ucoor = lcoor + 1 diff = mapping - lcoor neg_diff = 1.0 - diff # bxh2xw2x2 lcoory, lcoorx = tf.split(lcoor, 2, 3) ucoory, ucoorx = tf.split(ucoor, 2, 3) lyux = tf.concat([lcoory, ucoorx], 3) uylx = tf.concat([ucoory, lcoorx], 3) diffy, diffx = tf.split(diff, 2, 3) neg_diffy, neg_diffx = tf.split(neg_diff, 2, 3) ret = tf.add_n([sample(image, lcoor) * neg_diffx * neg_diffy, sample(image, ucoor) * diffx * diffy, sample(image, lyux) * neg_diffy * diffx, sample(image, uylx) * diffy * neg_diffx], name='sampled') if borderMode == 'constant': max_coor = tf.constant([input_shape[0] - 1, input_shape[1] - 1], dtype=tf.float32) mask = tf.greater_equal(orig_mapping, 0.0) mask2 = tf.less_equal(orig_mapping, max_coor) mask = tf.logical_and(mask, mask2) # bxh2xw2x2 mask = tf.reduce_all(mask, [3]) # bxh2xw2 boolean mask = tf.expand_dims(mask, 3) ret = ret * tf.cast(mask, tf.float32) return tf.identity(ret, name='output')
def __init__(self, embedding=None, hidden_state_d=100, max_length=80, learning_rate=0.001, dropout_rate=0.5, vocab_size=400001, embedding_d=300, num_classes=2): self.data = tf.placeholder(dtype=tf.int32, shape=[None, max_length]) self.len = tf.placeholder(dtype=tf.int32, shape=[None]) self.label = tf.placeholder(dtype=tf.float32, shape=[None]) self.neg_label = 1 - self.label self.co_label = tf.transpose(tf.reshape(tf.concat(0, [self.label, self.neg_label]), [2, -1])) self.init_embedding(embedding, vocab_size, embedding_d) # filter len to maxlength self.maxlen = tf.cast(tf.fill([tf.shape(self.len)[0]], max_length), tf.int64) self.filter = tf.less_equal(tf.cast(self.len, tf.int64), self.maxlen) self.clean_len = tf.select(self.filter, tf.cast(self.len, tf.int64), self.maxlen) self.vec_data = tf.nn.embedding_lookup(self.embedding, self.data) self.reversed_vec_data = tf.reverse_sequence(self.vec_data, seq_dim=1, seq_lengths=self.clean_len) with tf.variable_scope('left2right'): left2right_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_state_d, state_is_tuple=True) self.output, self.state = tf.nn.dynamic_rnn( left2right_lstm_cell, self.vec_data, dtype=tf.float32, sequence_length=self.len, ) with tf.variable_scope('right2left'): right2left_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_state_d, state_is_tuple=True) self.reversed_output, self.reversed_state = tf.nn.dynamic_rnn( right2left_lstm_cell, self.reversed_vec_data, dtype=tf.float32, sequence_length=self.len, ) self.last = BiLSTM.last_relevant(self.output, self.len) self.reversed_last = BiLSTM.last_relevant(self.reversed_output, self.len) self.final_output = tf.concat(1, [self.last, self.reversed_last]) self.dropout_last = tf.nn.dropout(self.final_output, keep_prob=dropout_rate) self.weight = tf.Variable(tf.truncated_normal([hidden_state_d * 2, num_classes], stddev=0.1)) self.bias = tf.Variable(tf.constant(0.1, shape=[num_classes])) self.prediction = tf.nn.softmax(tf.matmul(self.final_output, self.weight) + self.bias) self.cost = tf.nn.softmax_cross_entropy_with_logits(tf.matmul(self.dropout_last, self.weight) + self.bias, self.co_label) self.train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost) self.init_op = tf.initialize_all_variables() self.prediction_a = tf.argmax(self.prediction, dimension=1) self.prediction_b = tf.argmax(self.co_label, dimension=1) self.score = tf.reduce_sum(tf.cast(tf.equal(self.prediction_a, self.prediction_b), dtype=tf.int32)) / tf.size(self.label) self.sess = tf.Session() self.sess.run(self.init_op)
def test_setup(self): # Create queue coordinator. self.coord = tf.train.Coordinator() # Load reader with tf.name_scope("create_inputs"): reader = ImageReader( self.conf.data_dir, self.conf.valid_data_list, None, # the images have different sizes False, # no data-aug False, # no data-aug self.conf.ignore_label, IMG_MEAN, self.coord) image, label = reader.image, reader.label # [h, w, 3 or 1] # Add one batch dimension [1, h, w, 3 or 1] self.image_batch, self.label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Create network if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']: print('encoder_name ERROR!') print("Please input: res101, res50, or deeplab") sys.exit(-1) elif self.conf.encoder_name == 'deeplab': net = Deeplab_v2(self.image_batch, self.conf.num_classes, False) else: net = ResNet_segmentation(self.image_batch, self.conf.num_classes, False, self.conf.encoder_name) # predictions raw_output = net.outputs raw_output = tf.image.resize_bilinear(raw_output, tf.shape(self.image_batch)[1:3,]) raw_output = tf.argmax(raw_output, axis=3) pred = tf.expand_dims(raw_output, dim=3) self.pred = tf.reshape(pred, [-1,]) # labels gt = tf.reshape(self.label_batch, [-1,]) # Ignoring all labels greater than or equal to n_classes. temp = tf.less_equal(gt, self.conf.num_classes - 1) weights = tf.cast(temp, tf.int32) # fix for tf 1.3.0 gt = tf.where(temp, gt, tf.cast(temp, tf.uint8)) # Pixel accuracy self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy( self.pred, gt, weights=weights) # mIoU self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # confusion matrix self.confusion_matrix = tf.contrib.metrics.confusion_matrix( self.pred, gt, num_classes=self.conf.num_classes, weights=weights) # Loader for loading the checkpoint self.loader = tf.train.Saver(var_list=tf.global_variables())
def ImageSample(inputs, borderMode='repeat'): """ Sample the template image using the given coordinate, by bilinear interpolation. This was described in the paper: `Spatial Transformer Networks <http://arxiv.org/abs/1506.02025>`_. Args: inputs (list): [template, coords]. template has shape NHWC. coords has shape (N,H',W',2), where each pair of the last dimension is a (y, x) real-value coordinate. borderMode: either "repeat" or "constant" (zero-filled) Returns: tf.Tensor: a tensor named ``output`` of shape (N,H',W',C). """ # TODO borderValue template, mapping = inputs assert template.get_shape().ndims == 4 and mapping.get_shape().ndims == 4 input_shape = template.get_shape().as_list()[1:] assert None not in input_shape, \ "Images in ImageSample layer must have fully-defined shape" assert borderMode in ['repeat', 'constant'] orig_mapping = mapping mapping = tf.maximum(mapping, 0.0) lcoor = tf.floor(mapping) ucoor = lcoor + 1 diff = mapping - lcoor neg_diff = 1.0 - diff # bxh2xw2x2 lcoory, lcoorx = tf.split(lcoor, 2, 3) ucoory, ucoorx = tf.split(ucoor, 2, 3) lyux = tf.concat([lcoory, ucoorx], 3) uylx = tf.concat([ucoory, lcoorx], 3) diffy, diffx = tf.split(diff, 2, 3) neg_diffy, neg_diffx = tf.split(neg_diff, 2, 3) # prod = tf.reduce_prod(diff, 3, keep_dims=True) # diff = tf.Print(diff, [tf.is_finite(tf.reduce_sum(diff)), tf.shape(prod), # tf.reduce_max(diff), diff], summarize=50) ret = tf.add_n([sample(template, lcoor) * neg_diffx * neg_diffy, sample(template, ucoor) * diffx * diffy, sample(template, lyux) * neg_diffy * diffx, sample(template, uylx) * diffy * neg_diffx], name='sampled') if borderMode == 'constant': max_coor = tf.constant([input_shape[0] - 1, input_shape[1] - 1], dtype=tf.float32) mask = tf.greater_equal(orig_mapping, 0.0) mask2 = tf.less_equal(orig_mapping, max_coor) mask = tf.logical_and(mask, mask2) # bxh2xw2x2 mask = tf.reduce_all(mask, [3]) # bxh2xw2 boolean mask = tf.expand_dims(mask, 3) ret = ret * tf.cast(mask, tf.float32) return tf.identity(ret, name='output')
def example_to_bucket_id(example_input, example_target): """Return int64 bucket id for this example, calculated based on length.""" seq_length = _get_example_length((example_input, example_target)) # TODO: investigate whether removing code branching improves performance. conditions_c = tf.logical_and( tf.less_equal(buckets_min, seq_length), tf.less(seq_length, buckets_max)) bucket_id = tf.reduce_min(tf.where(conditions_c)) return bucket_id
def losses(self, targets, logits, seq_len, scope='ctc_losses'): """Define the network losses. """ with tf.control_dependencies([tf.less_equal(targets.dense_shape[1], tf.reduce_max(tf.cast(seq_len, tf.int64)))]): with tf.name_scope(scope): loss = tf.nn.ctc_loss(targets, logits, seq_len, time_major=False, ignore_longer_outputs_than_inputs=True) cost = tf.reduce_mean(loss) return cost
def _log_prob_single(tensor): stddev = tf.sqrt(scale_factor / calculate_variance_factor(tensor.shape, mode)) z = (tensor - mean) / stddev log_prob_z = - (z ** 2 + tf.log(2 * pi)) / 2 log_prob = tf.reduce_sum(log_prob_z) if truncated: from numpy import inf log_prob -= tf.log(TRUNCATED_NORMALIZER) invalid = tf.logical_or(tf.less_equal(z, -2), tf.greater_equal(z, 2)) log_prob = tf.where(invalid, -inf, log_prob) # Return negative as this is a regularizer return - log_prob
def _log_prob_single(tensor): stddev = tf.sqrt(scale_factor / calculate_variance_factor(tensor.shape, mode)) z1 = (tensor - mean - 3 * stddev) / stddev log_prob_z1 = - (z1 ** 2 + tf.log(2 * pi)) / 2 log_prob1 = tf.reduce_sum(log_prob_z1) z2 = (tensor - mean + 3 * stddev) / stddev log_prob_z2 = - (z2 ** 2 + tf.log(2 * pi)) / 2 log_prob2 = tf.reduce_sum(log_prob_z2) if truncated: from numpy import inf log_prob1 -= tf.log(TRUNCATED_NORMALIZER) invalid = tf.logical_or(tf.less_equal(z1, -2), tf.greater_equal(z1, 2)) log_prob1 = tf.where(invalid, -inf, log_prob1) log_prob2 -= tf.log(TRUNCATED_NORMALIZER) invalid = tf.logical_or(tf.less_equal(z2, -2), tf.greater_equal(z2, 2)) log_prob2 = tf.where(invalid, -inf, log_prob2) # Return negative as this is a regularizer m = tf.maximum(log_prob1, log_prob2) - tf.log(2.0) log_prob1 -= m log_prob2 -= m log_prob = m + tf.log(tf.exp(log_prob1) + tf.exp(log_prob2)) return - log_prob
def example_to_bucket_id(example): """Return int64 id of the length bucket for this example.""" seq_length = example_length_fn(example) boundaries = list(bucket_boundaries) buckets_min = [np.iinfo(np.int32).min] + boundaries buckets_max = boundaries + [np.iinfo(np.int32).max] conditions_c = tf.logical_and( tf.less_equal(buckets_min, seq_length), tf.less(seq_length, buckets_max)) bucket_id = tf.reduce_min(tf.where(conditions_c)) return bucket_id
def _subsample_selection_to_desired_neg_pos_ratio(self, indices, match, max_negatives_per_positive, min_negatives_per_image=0): """Subsample a collection of selected indices to a desired neg:pos ratio. This function takes a subset of M indices (indexing into a large anchor collection of N anchors where M<N) which are labeled as positive/negative via a Match object (matched indices are positive, unmatched indices are negative). It returns a subset of the provided indices retaining all positives as well as up to the first K negatives, where: K=floor(num_negative_per_positive * num_positives). For example, if indices=[2, 4, 5, 7, 9, 10] (indexing into 12 anchors), with positives=[2, 5] and negatives=[4, 7, 9, 10] and num_negatives_per_positive=1, then the returned subset of indices is [2, 4, 5, 7]. Args: indices: An integer tensor of shape [M] representing a collection of selected anchor indices match: A matcher.Match object encoding the match between anchors and groundtruth boxes for a given image, with rows of the Match objects corresponding to groundtruth boxes and columns corresponding to anchors. max_negatives_per_positive: (float) maximum number of negatives for each positive anchor. min_negatives_per_image: minimum number of negative anchors for a given image. Allow sampling negatives in image without any positive anchors. Returns: selected_indices: An integer tensor of shape [M'] representing a collection of selected anchor indices with M' <= M. num_positives: An integer tensor representing the number of positive examples in selected set of indices. num_negatives: An integer tensor representing the number of negative examples in selected set of indices. """ positives_indicator = tf.gather(match.matched_column_indicator(), indices) negatives_indicator = tf.gather(match.unmatched_column_indicator(), indices) num_positives = tf.reduce_sum(tf.to_int32(positives_indicator)) max_negatives = tf.maximum(min_negatives_per_image, tf.to_int32(max_negatives_per_positive * tf.to_float(num_positives))) topk_negatives_indicator = tf.less_equal( tf.cumsum(tf.to_int32(negatives_indicator)), max_negatives) subsampled_selection_indices = tf.where( tf.logical_or(positives_indicator, topk_negatives_indicator)) num_negatives = tf.size(subsampled_selection_indices) - num_positives return (tf.reshape(tf.gather(indices, subsampled_selection_indices), [-1]), num_positives, num_negatives)
def _length_constraints(length, maximum_length): # Work with lists of lengths which correspond to the general multi source case. if not isinstance(length, list): length = [length] if not isinstance(maximum_length, list): maximum_length = [maximum_length] # Unset maximum lengths are set to None (i.e. no constraint). maximum_length += [None] * (len(length) - len(maximum_length)) constraints = [] for l, maxlen in zip(length, maximum_length): constraints.append(tf.greater(l, 0)) if maxlen is not None: constraints.append(tf.less_equal(l, maxlen)) return constraints
def sampling_loop(prev_state, i): """ Loop function performing the scheduled sampling (http://arxiv.org/pdf/1506.03099v3.pdf) with the inverse sigmoid decay. """ threshold = scheduled_sampling / (scheduled_sampling + tf.exp( tf.to_float(self.learning_step) / scheduled_sampling)) condition = tf.less_equal( tf.random_uniform(tf.shape(embedded_gt_inputs[0])), threshold) return tf.select(condition, embedded_gt_inputs[i], loop(prev_state, i))
def _resize_aux(image, new_shorter_edge_tensor): shape = tf.shape(image) height = shape[0] width = shape[1] height_smaller_than_width = tf.less_equal(height, width) new_height_and_width = cf.cond( height_smaller_than_width, lambda: (new_shorter_edge_tensor, _compute_longer_edge(height, width, new_shorter_edge_tensor)), lambda: (_compute_longer_edge(width, height, new_shorter_edge_tensor), new_shorter_edge_tensor) ) # workaround since tf.image.resize_images() does not work image = tf.expand_dims(image, 0) image = tf.image.resize_bilinear(image, tf.pack(new_height_and_width)) return tf.squeeze(image, [0])
def tf_logaddexp(t): tmax = tf.reduce_max(t, 1) tabsmax = tf.reduce_max(tf.abs(t), 1) tmin = tf.reduce_min(t, 1) bools = tf.where(tf.greater(tabsmax, tmax)) c = tf.expand_dims( tf.concat( 0, [ tf.gather(tmax, tf.where(tf.greater(tabsmax, tmax))), tf.gather(tmin, tf.where(tf.less_equal(tabsmax, tmax))), ], ), -1, ) return tf.log(tf.reduce_sum(tf.exp(t - c))) + c
def gather_nstep(self, num_steps, indices, keys=None): """Returns elements at the specified indices from the buffer. Args: num_steps: (integer) length of trajectories to return. indices: (list of rank num_steps int Tensor) indices in the buffer to retrieve elements from for multiple trajectories. Each Tensor in the list represents the indices for a trajectory. keys: List of keys of tensors to retrieve. If None retrieve all. Returns: A list of list-of-tensors, where each element in the list is obtained by indexing one of the tensors in the buffer. Raises: ValueError: If gather is called before calling the add function. tf.errors.InvalidArgumentError: If indices are bigger than the number of items in the buffer. """ if not self._tensors: raise ValueError('The add function must be called before calling gather.') if keys is None: keys = self._tensors.keys() with tf.name_scope('Gather'): index_bound_assert = tf.Assert( tf.less_equal( tf.to_int64(tf.reduce_max(indices) + num_steps), self.get_num_adds()), ['Trajectory indices go out of bounds.']) with tf.control_dependencies([index_bound_assert]): indices = tf.map_fn( lambda x: tf.mod(tf.range(x, x + num_steps), self._buffer_size), indices, dtype=tf.int64) batch = [] for key in keys: def SampleTrajectories(trajectory_indices, key=key, num_steps=num_steps): trajectory_indices.set_shape([num_steps]) return tf.gather(self._tensors[key], trajectory_indices, name=key) batch.append(tf.map_fn(SampleTrajectories, indices, dtype=self._tensors[key].dtype)) return batch
def fast_rcnn_find_positive_negative_samples(self, reference_boxes): ''' when training, we should know each reference box's label and gtbox, in second stage iou >= 0.5 is object iou < 0.5 is background :param reference_boxes: [num_of_input_boxes, 5] :return: reference_boxes_mattached_gtboxes: each reference box mattched gtbox, shape: [num_of_input_boxes, 5] object_mask: indicate box(a row) weather is a object, 1 is object, 0 is background category_label: indicate box's class, one hot encoding. shape: [num_of_input_boxes, num_classes+1] ''' with tf.variable_scope('fast_rcnn_find_positive_negative_samples'): gtboxes = tf.cast( tf.reshape(self.gtboxes_and_label[:, :-1], [-1, 5]), tf.float32) # [M, 5] ious = tf_wrapper.get_iou_matrix_tf(reference_boxes, gtboxes, use_gpu=cfgs.IOU_USE_GPU, gpu_id=0) matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32) # [N, ] reference_boxes_mattached_gtboxes = tf.gather(gtboxes, matchs) # [N, 5] max_iou_each_row = tf.reduce_max(ious, axis=1) # [N, ] if self.use_angle_condition: cond1 = tf.greater_equal(max_iou_each_row, self.fast_rcnn_positives_iou_threshold) # angle condition gtboxes_angles = reference_boxes_mattached_gtboxes[:, -1] # tf.unstack(anchors_matched_gtboxes, axis=1) reference_boxes_angles = reference_boxes[:, -1] # tf.unstack(anchors, axis=1) cond2 = tf.less_equal(tf.abs(gtboxes_angles - reference_boxes_angles), self.boxes_angle_threshold) positives = tf.cast(tf.logical_and(cond1, cond2), tf.int32) else: positives = tf.cast(tf.greater_equal(max_iou_each_row, self.fast_rcnn_positives_iou_threshold), tf.int32) object_mask = tf.cast(positives, tf.float32) # [N, ] # when box is background, not caculate gradient, so give a weight 0 to avoid caculate gradient label = tf.gather(self.gtboxes_and_label[:, -1], matchs) # [N, ] label = tf.cast(label, tf.int32) * positives # background is 0 # label = tf.one_hot(category_label, depth=self.num_classes + 1) return reference_boxes_mattached_gtboxes, object_mask, label
def sub(): if do_prune: mask = self.masks[i] self.masks[i] = tf.assign(mask, tf.where( tf.logical_and( tf.equal(mask, 1), tf.less_equal(w_abs, 0.9 * tf.maximum(w_abs_mean + self.beta * w_abs_std, self.eps)) ), tf.zeros_like(mask), mask )) mask = self.masks[i] self.masks[i] = tf.assign(mask, tf.where( tf.logical_and( tf.equal(mask, 0), tf.greater(w_abs, 1.1 * tf.maximum(w_abs_mean + self.beta * w_abs_std, self.eps)) ), tf.ones_like(mask), mask )) return w * self.masks[i]
def accum_grad_multiple_step(grad, var, train_step, num_accum_steps): """ :param tf.Tensor|tf.IndexedSlices grad: :param tf.Variable var: :param tf.Tensor train_step: int, scalar :param int num_accum_steps: :return: modified grad :rtype: tf.Tensor """ from TFUtil import reuse_name_scope_of_tensor, get_base_name with reuse_name_scope_of_tensor(grad, postfix="/%s_accum_grad" % get_base_name(grad)): shape = var.get_shape().as_list() v = tf.get_variable( name="var_accum_grad", shape=shape, dtype=grad.dtype, initializer=tf.zeros_initializer(), trainable=False) return tf.cond( tf.less_equal(tf.mod(train_step, num_accum_steps), 0), lambda: tf.assign(v, grad), lambda: tf.assign_add(v, grad))
def make_dense_examples_and_variables_dicts(dense_features_values, weights, labels): """Creates examples and variables dictionaries for dense features. Variables shapes are inferred from the list of dense feature values passed as argument. Args: dense_features_values: The values of the dense features weights: The example weights. labels: The example labels. Returns: One dictionary for the examples and one for the variables. """ dense_tensors = [] dense_weights = [] for dense_feature in dense_features_values: dense_tensor = tf.convert_to_tensor(dense_feature, dtype=tf.float32) check_shape_op = tf.Assert( tf.less_equal(tf.rank(dense_tensor), 2), ['dense_tensor shape must be [batch_size, dimension] or [batch_size]']) # Reshape to [batch_size, dense_column_dimension]. with tf.control_dependencies([check_shape_op]): dense_tensor = tf.reshape(dense_tensor, [dense_tensor.get_shape().as_list()[0], -1]) dense_tensors.append(dense_tensor) # Add variables of shape [feature_column_dimension]. dense_weights.append( tf.Variable( tf.zeros( [dense_tensor.get_shape().as_list()[1]], dtype=tf.float32))) examples_dict = dict( sparse_features=[], dense_features=dense_tensors, example_weights=weights, example_labels=labels, example_ids=['%d' % i for i in range(0, len(labels))]) variables_dict = dict( sparse_features_weights=[], dense_features_weights=dense_weights) return examples_dict, variables_dict
def assert_box_normalized(boxes, maximum_normalized_coordinate=1.1): """Asserts the input box tensor is normalized. Args: boxes: a tensor of shape [N, 4] where N is the number of boxes. maximum_normalized_coordinate: Maximum coordinate value to be considered as normalized, default to 1.1. Returns: a tf.Assert op which fails when the input box tensor is not normalized. Raises: ValueError: When the input box tensor is not normalized. """ box_minimum = tf.reduce_min(boxes) box_maximum = tf.reduce_max(boxes) return tf.Assert( tf.logical_and( tf.less_equal(box_maximum, maximum_normalized_coordinate), tf.greater_equal(box_minimum, 0)), [boxes])
def flip_dim(tensor_list, prob=0.5, dim=1): """Randomly flips a dimension of the given tensor. The decision to randomly flip the `Tensors` is made together. In other words, all or none of the images pass in are flipped. Note that tf.random_flip_left_right and tf.random_flip_up_down isn't used so that we can control for the probability as well as ensure the same decision is applied across the images. Args: tensor_list: A list of `Tensors` with the same number of dimensions. prob: The probability of a left-right flip. dim: The dimension to flip, 0, 1, .. Returns: outputs: A list of the possibly flipped `Tensors` as well as an indicator `Tensor` at the end whose value is `True` if the inputs were flipped and `False` otherwise. Raises: ValueError: If dim is negative or greater than the dimension of a `Tensor`. """ random_value = tf.random_uniform([]) def flip(): flipped = [] for tensor in tensor_list: if dim < 0 or dim >= len(tensor.get_shape().as_list()): raise ValueError('dim must represent a valid dimension.') flipped.append(tf.reverse_v2(tensor, [dim])) return flipped is_flipped = tf.less_equal(random_value, prob) outputs = tf.cond(is_flipped, flip, lambda: tensor_list) if not isinstance(outputs, (list, tuple)): outputs = [outputs] outputs.append(is_flipped) return outputs
def provide_dataset(self): """Provides dataset (audio, labels) of nsynth.""" length = 64000 channels = 1 pitch_counts = self.get_pitch_counts() pitches = sorted(pitch_counts.keys()) label_index_table = tf.contrib.lookup.index_table_from_tensor( sorted(pitches), dtype=tf.int64) def _parse_nsynth(record): """Parsing function for NSynth dataset.""" features = { 'pitch': tf.FixedLenFeature([1], dtype=tf.int64), 'audio': tf.FixedLenFeature([length], dtype=tf.float32), 'qualities': tf.FixedLenFeature([10], dtype=tf.int64), 'instrument_source': tf.FixedLenFeature([1], dtype=tf.int64), 'instrument_family': tf.FixedLenFeature([1], dtype=tf.int64), } example = tf.parse_single_example(record, features) wave, label = example['audio'], example['pitch'] wave = spectral_ops.crop_or_pad(wave[tf.newaxis, :, tf.newaxis], length, channels)[0] one_hot_label = tf.one_hot( label_index_table.lookup(label), depth=len(pitches))[0] return wave, one_hot_label, label, example['instrument_source'] dataset = self._get_dataset_from_path() dataset = dataset.map(_parse_nsynth, num_parallel_calls=4) # Filter just acoustic instruments (as in the paper) dataset = dataset.filter(lambda w, l, p, s: tf.equal(s, 1)[0]) # Filter just pitches 24-84 dataset = dataset.filter(lambda w, l, p, s: tf.greater_equal(p, 24)[0]) dataset = dataset.filter(lambda w, l, p, s: tf.less_equal(p, 84)[0]) dataset = dataset.map(lambda w, l, p, s: (w, l)) return dataset
def get_train_choice(state_ph,var_dict,random_t,mask,dropout_keep_prob): score = get_q(state_ph,var_dict,dropout_keep_prob) mid = score # mid = mid + random_t mid = tf.maximum(mid, -2.5) mid = tf.minimum(mid, 1.5) mid = mid - tf.reduce_min(mid) mid = mid + 0.00001 * mask mid = mid / tf.reduce_max(mid) mid = mid * (1-0.05) mid = mid + 0.05 mid = mid * mask weight = mid weight_sum = tf.reduce_sum(weight,reduction_indices=[1]) high = tf.cumsum(weight, axis=1, exclusive=False) low = tf.cumsum(weight, axis=1, exclusive=True) sss0 = tf.reshape(weight_sum,[-1,1]) high0 = high / sss0 low0 = low / sss0 r = tf.random_uniform(tf.shape(sss0), dtype=tf.float32) high1 = tf.less(r, high0) low1 = tf.less_equal(low0, r) good = tf.logical_and(high1,low1) good0 = tf.to_float(good) mid = tf.argmax(good0, dimension=1) train_choice = mid mid = score mid = mid + random_t mid = mid - tf.reduce_min(mid) #mid = tf.exp(mid) mid = mid * mask mid = mid + mask mid = tf.argmax(mid, dimension=1) cal_choice = mid return score, weight, train_choice, cal_choice
def precision_recall_values(xvals, precision, recall, name=None): """Compute values on the precision/recall curve. Args: x: Python list of floats; precision: 1D Tensor decreasing. recall: 1D Tensor increasing. Return: list of precision values. """ with ops.name_scope(name, "precision_recall_values", [precision, recall]) as name: # Add bounds values to precision and recall. precision = tf.concat([[0.], precision, [0.]], axis=0) recall = tf.concat([[0.], recall, [1.]], axis=0) precision = tfe_math.cummax(precision, reverse=True) prec_values = [] for x in xvals: mask = tf.less_equal(recall, x) val = tf.reduce_min(tf.boolean_mask(precision, mask)) prec_values.append(val) return tf.tuple(prec_values)
def RadialCutoff(R, rc): """Calculates radial cutoff matrix. B = batch_size, N = max_num_atoms, M = max_num_neighbors Parameters ---------- R [B, N, M]: tf.Tensor Distance matrix. rc: tf.Variable Interaction cutoff [Angstrom]. Returns ------- FC [B, N, M]: tf.Tensor Radial cutoff matrix. """ T = 0.5 * (tf.cos(np.pi * R / (rc)) + 1) E = tf.zeros_like(T) cond = tf.less_equal(R, rc) FC = tf.where(cond, T, E) return FC
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) coord = tf.train.Coordinator() with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) net = PSPNet50({'data': image_batch}, is_training=True, num_classes=args.num_classes) raw_output = net.layers['conv6'] # According from the prototxt in Caffe implement, learning rate must multiply by 10.0 in pyramid module fc_list = [ 'conv5_3_pool1_conv', 'conv5_3_pool2_conv', 'conv5_3_pool3_conv', 'conv5_3_pool6_conv', 'conv6', 'conv5_4' ] restore_var = [v for v in tf.global_variables()] all_trainable = [ v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma ] fc_trainable = [ v for v in all_trainable if v.name.split('/')[0] in fc_list ] conv_trainable = [ v for v in all_trainable if v.name.split('/')[0] not in fc_list ] # lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax loss. loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) l2_losses = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Using Poly learning rate policy base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS if args.update_mean_var == False: update_ops = None else: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc_w = grads[len(conv_trainable):(len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] train_op_conv = opt_conv.apply_gradients( zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients( zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients( zip(grads_fc_b, fc_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) ckpt = tf.train.get_checkpoint_state(SNAPSHOT_DIR) #if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) #load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, RESTORE_FROM) #ckpt.model_checkpoint_path) # else: # print('No checkpoint file found.') # load_step = 0 # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = {step_ph: step} if step % args.save_pred_every == 0: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) save(saver, sess, args.snapshot_dir, step) else: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) coord.request_stop() coord.join(threads)
def Reward(self, states): new_rewards = tf.select(tf.logical_and(tf.greater_equal(states,self.LOW_BOUND()),tf.less_equal(states,self.HIGH_BOUND())), self.zero, tf.select(tf.less(states,self.LOW_BOUND()), -5*(self.LOW_BOUND()-states), -100*(states-self.HIGH_BOUND())) ) new_rewards+=tf.abs(((self.HIGH_BOUND()+self.LOW_BOUND())/2.0)-states)*(-0.1) return tf.reduce_sum(new_rewards,1,keep_dims=True)
def seek_queue_many_device(ids, hashes, outdir, blacklist, hashes_diff, devices, device): len_hashes = len(hashes) num_devices = len(devices) last_index = 0 num_threads = 8 batch_size = int(len_hashes / num_threads) total_tasks = len_hashes - 1 - len(blacklist) pbar = tf.contrib.keras.utils.Progbar(total_tasks) # Feed data into our queue queue_i = tf.placeholder(tf.int32, shape=[None]) queue_hash_i = tf.placeholder(tf.bool, shape=[None, 64]) queue_hashes_j = tf.placeholder(tf.bool, shape=[batch_size, None ]) #shape=[None, 64] [len_hashes] queue = tf.FIFOQueue(capacity=100, dtypes=[tf.int32, tf.bool], shapes=[[], [64]]) enqueue_op = queue.enqueue_many([queue_i, queue_hash_i]) dequeue_op = queue.dequeue() diff_hash_i = tf.placeholder(tf.bool, shape=[64]) diff_hashes_j = tf.placeholder(tf.bool, shape=[None, 64]) diff_op_many = tf.count_nonzero(tf.not_equal(diff_hash_i, diff_hashes_j), 1) filter_op = tf.less_equal(diff_op_many, DISTANCE_THRESHOLD) where_op = tf.where(filter_op) # start the threads for our FIFOQueue and batch config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) if devices.index(device) == 0: last_index += last_index % 2 elif devices.index(device) == 1: last_index += (last_index + 1) % 2 enqueue_threads = [ threading.Thread(target=check_batch_many, args=[ sess, hashes, enqueue_op, init_i, batch_size, queue_i, queue_hash_i, blacklist, num_devices ]) for init_i in range(last_index, len_hashes, batch_size) ] # Start the threads and wait for all of them to stop. for t in enqueue_threads: t.isDaemon() t.start() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) pbar.update(0) seen_images = [] outdir_tmp = outdir + '.tmp' + '.' + str( settings.distributed_machine) + '.' + str(devices.index(device)) print(outdir_tmp) # Fetch the data from the pipeline and put it where it belongs (into your model) for _ in range(devices.index(device), len_hashes - 1 - len(blacklist), num_devices): # Computing diff i, hash_i = sess.run(dequeue_op) diff, filter, where = sess.run([diff_op_many, filter_op, where_op], feed_dict={ diff_hash_i: hash_i, diff_hashes_j: hashes[i + 1:] }) for j in where: j_rel = j[0] j_abs = i + j_rel + 1 key_id = ids[i] + '-' + ids[j_abs] hashes_diff[key_id] = diff[j_rel] seen_images.append(i) # Store progress if _ % 1000 == 0: with open(outdir_tmp, 'w') as outfile: json.dump(hashes_diff, outfile, default=default) # progress_file = 'progress.' + outdir_tmp outdir_file = os.path.basename(outdir_tmp) outdir_dir = os.path.dirname(outdir_tmp) progress_file = outdir_dir + '/progress.' + outdir_file with open(progress_file + '.txt', 'w') as outfile: outfile.write(str(i) + '\n') with open(progress_file + '.json', 'w') as outfile: json.dump(str(seen_images), outfile, default=default) pbar.update(_) # Consolidate results with open( outdir + '.' + str(settings.distributed_machine) + '.' + str(devices.index(device)), 'w') as outfile: json.dump(hashes_diff, outfile, default=default) # Reset progress with open(progress_file, 'w') as outfile: outfile.write('0\n') # Shutdown everything to avoid zombies sess.run(queue.close(cancel_pending_enqueues=True)) coord.request_stop() coord.join(enqueue_threads) coord.join(threads) sess.close()
def _cond(inputs, batch_inputs, step, receptive_field): return tf.less_equal(step + receptive_field, tf.shape(inputs)[0])
def isclose(x, y, rtol=1e-05, atol=1e-08): rhs = tf.constant(atol) + tf.constant(rtol) * tf.abs(y) return tf.less_equal(tf.abs(tf.subtract(x, y)), rhs)
def step(self, low, high, close, signal, previous_stop_price=None, previous_limit_price=None, previous_buy_price=None, previous_bet_duration=None, previous_successful_bets=None, previous_total_bets=None, previous_volatility_state=None): if previous_stop_price is None: previous_stop_price = tf.constant(0.0) if previous_limit_price is None: previous_limit_price = tf.constant(0.0) if previous_buy_price is None: previous_buy_price = tf.constant(0.0) if previous_bet_duration is None: previous_bet_duration = tf.constant(0) if previous_successful_bets is None: previous_successful_bets = tf.constant(0) if previous_total_bets is None: previous_total_bets = tf.constant(0) volatility, next_volatility_state, volatility_init = self.msd_op( inputs=close, state=previous_volatility_state, streamable=False) if previous_volatility_state is None: previous_volatility_state = volatility_init is_null = tf.equal(previous_stop_price, 0) stop_hit = tf.logical_and(tf.logical_not(is_null), tf.less_equal(low, previous_stop_price)) limit_hit = tf.logical_and( tf.logical_not(is_null), tf.greater_equal(high, previous_limit_price)) if not self.max_bet_duration is None: max_bet_duration_reached = tf.logical_and( tf.logical_not(is_null), tf.equal(previous_bet_duration, self.max_bet_duration)) buy_signal = tf.logical_and(is_null, tf.equal(signal, 1)) def buy(): stop_price = close - self.stop_factor * volatility limit_price = close + self.limit_factor * volatility return (stop_price, limit_price, close, 1, previous_successful_bets, previous_total_bets, 1) def wait(): next_bet_duration = previous_bet_duration + 1 return (previous_stop_price, previous_limit_price, previous_buy_price, next_bet_duration, previous_successful_bets, previous_total_bets, 0) def sell_after_stop_hit(): return (tf.constant(0.0), tf.constant(0.0), tf.constant(0.0), 0, previous_successful_bets, previous_total_bets + 1, -1) def sell_after_limit_hit(): return (tf.constant(0.0), tf.constant(0.0), tf.constant(0.0), 0, previous_successful_bets + 1, previous_total_bets + 1, -1) def sell_after_max_bet_duration_reached(): next_successful_bets = tf.where( tf.greater(close, previous_buy_price), previous_successful_bets + 1, previous_successful_bets) return (tf.constant(0.0), tf.constant(0.0), tf.constant(0.0), 0, next_successful_bets, previous_total_bets + 1, -1) cases = [(stop_hit, sell_after_stop_hit), (limit_hit, sell_after_limit_hit), (buy_signal, buy)] if not self.max_bet_duration is None: cases.append((max_bet_duration_reached, sell_after_max_bet_duration_reached)) next_state = tf.case(cases, default=wait, exclusive=False) return (next_state[4], next_state[5], volatility, next_state[6]), (*next_state[0:6], next_volatility_state), ( previous_stop_price, previous_limit_price, previous_buy_price, previous_bet_duration, previous_successful_bets, previous_total_bets, previous_volatility_state)
def assert_in_range(x, min_value, max_value): return tf.Assert( tf.logical_and(tf.greater_equal(tf.reduce_min(x), min_value), tf.less_equal(tf.reduce_max(x), max_value)), [x])
def step(self, time, inputs, states, name=None): outputs, states = rnn_decoder(inputs, states) # Run the decoder GRU cell using inputs and states. outputs = decoder_layer(outputs) # Apply the decoder_layer on outputs. next_input = embedded_target_seqs[:, time] # Next input are words with index `time` in target_embedded. finished = tf.less_equal(target_lens, time + 1) # False if target_lens > time + 1, True otherwise. return outputs, states, next_input, finished
def __init__(self, sess, config, api, log_dir, forward, scope=None): self.vocab = api.vocab self.rev_vocab = api.rev_vocab self.vocab_size = len(self.vocab) self.topic_vocab = api.topic_vocab self.topic_vocab_size = len(self.topic_vocab) self.da_vocab = api.dialog_act_vocab self.da_vocab_size = len(self.da_vocab) self.sess = sess self.scope = scope self.max_utt_len = config.max_utt_len self.go_id = self.rev_vocab["<s>"] self.eos_id = self.rev_vocab["</s>"] self.context_cell_size = config.cxt_cell_size self.sent_cell_size = config.sent_cell_size self.dec_cell_size = config.dec_cell_size with tf.name_scope("io"): # all dialog context and known attributes self.input_contexts = tf.placeholder(dtype=tf.int32, shape=(None, None, self.max_utt_len), name="dialog_context") self.floors = tf.placeholder(dtype=tf.int32, shape=(None, None), name="floor") self.context_lens = tf.placeholder(dtype=tf.int32, shape=(None,), name="context_lens") self.topics = tf.placeholder(dtype=tf.int32, shape=(None,), name="topics") self.my_profile = tf.placeholder(dtype=tf.float32, shape=(None, 4), name="my_profile") self.ot_profile = tf.placeholder(dtype=tf.float32, shape=(None, 4), name="ot_profile") # target response given the dialog context self.output_tokens = tf.placeholder(dtype=tf.int32, shape=(None, None), name="output_token") self.output_lens = tf.placeholder(dtype=tf.int32, shape=(None,), name="output_lens") self.output_das = tf.placeholder(dtype=tf.int32, shape=(None,), name="output_dialog_acts") # optimization related variables self.learning_rate = tf.Variable(float(config.init_lr), trainable=False, name="learning_rate") self.learning_rate_decay_op = self.learning_rate.assign(tf.multiply(self.learning_rate, config.lr_decay)) self.global_t = tf.placeholder(dtype=tf.int32, name="global_t") self.use_prior = tf.placeholder(dtype=tf.bool, name="use_prior") max_dialog_len = array_ops.shape(self.input_contexts)[1] max_out_len = array_ops.shape(self.output_tokens)[1] batch_size = array_ops.shape(self.input_contexts)[0] with variable_scope.variable_scope("topicEmbedding"): t_embedding = tf.get_variable("embedding", [self.topic_vocab_size, config.topic_embed_size], dtype=tf.float32) topic_embedding = embedding_ops.embedding_lookup(t_embedding, self.topics) if config.use_hcf: with variable_scope.variable_scope("dialogActEmbedding"): d_embedding = tf.get_variable("embedding", [self.da_vocab_size, config.da_embed_size], dtype=tf.float32) da_embedding = embedding_ops.embedding_lookup(d_embedding, self.output_das) with variable_scope.variable_scope("wordEmbedding"): self.embedding = tf.get_variable("embedding", [self.vocab_size, config.embed_size], dtype=tf.float32) embedding_mask = tf.constant([0 if i == 0 else 1 for i in range(self.vocab_size)], dtype=tf.float32, shape=[self.vocab_size, 1]) embedding = self.embedding * embedding_mask input_embedding = embedding_ops.embedding_lookup(embedding, tf.reshape(self.input_contexts, [-1])) input_embedding = tf.reshape(input_embedding, [-1, self.max_utt_len, config.embed_size]) output_embedding = embedding_ops.embedding_lookup(embedding, self.output_tokens) if config.sent_type == "bow": input_embedding, sent_size = get_bow(input_embedding) output_embedding, _ = get_bow(output_embedding) elif config.sent_type == "rnn": sent_cell = self.get_rnncell("gru", self.sent_cell_size, config.keep_prob, 1) input_embedding, sent_size = get_rnn_encode(input_embedding, sent_cell, scope="sent_rnn") output_embedding, _ = get_rnn_encode(output_embedding, sent_cell, self.output_lens, scope="sent_rnn", reuse=True) elif config.sent_type == "bi_rnn": fwd_sent_cell = self.get_rnncell("gru", self.sent_cell_size, keep_prob=1.0, num_layer=1) bwd_sent_cell = self.get_rnncell("gru", self.sent_cell_size, keep_prob=1.0, num_layer=1) input_embedding, sent_size = get_bi_rnn_encode(input_embedding, fwd_sent_cell, bwd_sent_cell, scope="sent_bi_rnn") output_embedding, _ = get_bi_rnn_encode(output_embedding, fwd_sent_cell, bwd_sent_cell, self.output_lens, scope="sent_bi_rnn", reuse=True) else: raise ValueError("Unknown sent_type. Must be one of [bow, rnn, bi_rnn]") # reshape input into dialogs input_embedding = tf.reshape(input_embedding, [-1, max_dialog_len, sent_size]) if config.keep_prob < 1.0: input_embedding = tf.nn.dropout(input_embedding, config.keep_prob) # convert floors into 1 hot floor_one_hot = tf.one_hot(tf.reshape(self.floors, [-1]), depth=2, dtype=tf.float32) floor_one_hot = tf.reshape(floor_one_hot, [-1, max_dialog_len, 2]) joint_embedding = tf.concat([input_embedding, floor_one_hot], 2, "joint_embedding") with variable_scope.variable_scope("contextRNN"): enc_cell = self.get_rnncell(config.cell_type, self.context_cell_size, keep_prob=1.0, num_layer=config.num_layer) # and enc_last_state will be same as the true last state _, enc_last_state = tf.nn.dynamic_rnn( enc_cell, joint_embedding, dtype=tf.float32, sequence_length=self.context_lens) if config.num_layer > 1: enc_last_state = tf.concat(enc_last_state, 1) # combine with other attributes if config.use_hcf: attribute_embedding = da_embedding attribute_fc1 = layers.fully_connected(attribute_embedding, 30, activation_fn=tf.tanh, scope="attribute_fc1") cond_list = [topic_embedding, self.my_profile, self.ot_profile, enc_last_state] cond_embedding = tf.concat(cond_list, 1) with variable_scope.variable_scope("recognitionNetwork"): if config.use_hcf: recog_input = tf.concat([cond_embedding, output_embedding, attribute_fc1], 1) else: recog_input = tf.concat([cond_embedding, output_embedding], 1) self.recog_mulogvar = recog_mulogvar = layers.fully_connected(recog_input, config.latent_size * 2, activation_fn=None, scope="muvar") recog_mu, recog_logvar = tf.split(recog_mulogvar, 2, axis=1) with variable_scope.variable_scope("priorNetwork"): # P(XYZ)=P(Z|X)P(X)P(Y|X,Z) prior_fc1 = layers.fully_connected(cond_embedding, np.maximum(config.latent_size * 2, 100), activation_fn=tf.tanh, scope="fc1") prior_mulogvar = layers.fully_connected(prior_fc1, config.latent_size * 2, activation_fn=None, scope="muvar") prior_mu, prior_logvar = tf.split(prior_mulogvar, 2, axis=1) # use sampled Z or posterior Z latent_sample = tf.cond(self.use_prior, lambda: sample_gaussian(prior_mu, prior_logvar), lambda: sample_gaussian(recog_mu, recog_logvar)) with variable_scope.variable_scope("generationNetwork"): gen_inputs = tf.concat([cond_embedding, latent_sample], 1) # BOW loss bow_fc1 = layers.fully_connected(gen_inputs, 400, activation_fn=tf.tanh, scope="bow_fc1") if config.keep_prob < 1.0: bow_fc1 = tf.nn.dropout(bow_fc1, config.keep_prob) self.bow_logits = layers.fully_connected(bow_fc1, self.vocab_size, activation_fn=None, scope="bow_project") # Y loss if config.use_hcf: meta_fc1 = layers.fully_connected(gen_inputs, 400, activation_fn=tf.tanh, scope="meta_fc1") if config.keep_prob <1.0: meta_fc1 = tf.nn.dropout(meta_fc1, config.keep_prob) self.da_logits = layers.fully_connected(meta_fc1, self.da_vocab_size, scope="da_project") da_prob = tf.nn.softmax(self.da_logits) pred_attribute_embedding = tf.matmul(da_prob, d_embedding) if forward: selected_attribute_embedding = pred_attribute_embedding else: selected_attribute_embedding = attribute_embedding dec_inputs = tf.concat([gen_inputs, selected_attribute_embedding], 1) else: self.da_logits = tf.zeros((batch_size, self.da_vocab_size)) dec_inputs = gen_inputs # Decoder if config.num_layer > 1: dec_init_state = [layers.fully_connected(dec_inputs, self.dec_cell_size, activation_fn=None, scope="init_state-%d" % i) for i in range(config.num_layer)] dec_init_state = tuple(dec_init_state) else: dec_init_state = layers.fully_connected(dec_inputs, self.dec_cell_size, activation_fn=None, scope="init_state") with variable_scope.variable_scope("decoder"): dec_cell = self.get_rnncell(config.cell_type, self.dec_cell_size, config.keep_prob, config.num_layer) dec_cell = rnn_cell.OutputProjectionWrapper(dec_cell, self.vocab_size) if forward: loop_func = decoder_fn_lib.context_decoder_fn_inference(None, dec_init_state, embedding, start_of_sequence_id=self.go_id, end_of_sequence_id=self.eos_id, maximum_length=self.max_utt_len, num_decoder_symbols=self.vocab_size, context_vector=selected_attribute_embedding) dec_input_embedding = None dec_seq_lens = None else: loop_func = decoder_fn_lib.context_decoder_fn_train(dec_init_state, selected_attribute_embedding) dec_input_embedding = embedding_ops.embedding_lookup(embedding, self.output_tokens) dec_input_embedding = dec_input_embedding[:, 0:-1, :] dec_seq_lens = self.output_lens - 1 if config.keep_prob < 1.0: dec_input_embedding = tf.nn.dropout(dec_input_embedding, config.keep_prob) # apply word dropping. Set dropped word to 0 if config.dec_keep_prob < 1.0: keep_mask = tf.less_equal(tf.random_uniform((batch_size, max_out_len-1), minval=0.0, maxval=1.0), config.dec_keep_prob) keep_mask = tf.expand_dims(tf.to_float(keep_mask), 2) dec_input_embedding = dec_input_embedding * keep_mask dec_input_embedding = tf.reshape(dec_input_embedding, [-1, max_out_len-1, config.embed_size]) dec_outs, _, final_context_state = dynamic_rnn_decoder(dec_cell, loop_func, inputs=dec_input_embedding, sequence_length=dec_seq_lens) if final_context_state is not None: final_context_state = final_context_state[:, 0:array_ops.shape(dec_outs)[1]] mask = tf.to_int32(tf.sign(tf.reduce_max(dec_outs, axis=2))) self.dec_out_words = tf.multiply(tf.reverse(final_context_state, axis=[1]), mask) else: self.dec_out_words = tf.arg_max(dec_outs, 2) if not forward: with variable_scope.variable_scope("loss"): labels = self.output_tokens[:, 1:] label_mask = tf.to_float(tf.sign(labels)) rc_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=dec_outs, labels=labels) rc_loss = tf.reduce_sum(rc_loss * label_mask, reduction_indices=1) self.avg_rc_loss = tf.reduce_mean(rc_loss) # used only for perpliexty calculation. Not used for optimzation self.rc_ppl = tf.exp(tf.reduce_sum(rc_loss) / tf.reduce_sum(label_mask)) """ as n-trial multimodal distribution. """ tile_bow_logits = tf.tile(tf.expand_dims(self.bow_logits, 1), [1, max_out_len - 1, 1]) bow_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=tile_bow_logits, labels=labels) * label_mask bow_loss = tf.reduce_sum(bow_loss, reduction_indices=1) self.avg_bow_loss = tf.reduce_mean(bow_loss) # reconstruct the meta info about X if config.use_hcf: da_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.da_logits, labels=self.output_das) self.avg_da_loss = tf.reduce_mean(da_loss) else: self.avg_da_loss = 0.0 kld = gaussian_kld(recog_mu, recog_logvar, prior_mu, prior_logvar) self.avg_kld = tf.reduce_mean(kld) if log_dir is not None: kl_weights = tf.minimum(tf.to_float(self.global_t)/config.full_kl_step, 1.0) else: kl_weights = tf.constant(1.0) self.kl_w = kl_weights self.elbo = self.avg_rc_loss + kl_weights * self.avg_kld aug_elbo = self.avg_bow_loss + self.avg_da_loss + self.elbo tf.summary.scalar("da_loss", self.avg_da_loss) tf.summary.scalar("rc_loss", self.avg_rc_loss) tf.summary.scalar("elbo", self.elbo) tf.summary.scalar("kld", self.avg_kld) tf.summary.scalar("bow_loss", self.avg_bow_loss) self.summary_op = tf.summary.merge_all() self.log_p_z = norm_log_liklihood(latent_sample, prior_mu, prior_logvar) self.log_q_z_xy = norm_log_liklihood(latent_sample, recog_mu, recog_logvar) self.est_marginal = tf.reduce_mean(rc_loss + bow_loss - self.log_p_z + self.log_q_z_xy) self.optimize(sess, config, aug_elbo, log_dir) self.saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V2)
def d_tf_elu(x): return tf.cast(tf.greater(x,0),tf.float32) + ( tf_elu(tf.cast(tf.less_equal(x,0),tf.float32) * x) + 1.0) def tf_softmax(x): return tf.nn.softmax(x)
# coding:utf-8 ''' TensorFlow 中与 Assert 相关的函数进行具体的举例说明,断言给定条件的真假与条件中持有的元素 created on 2019/4/8 @author:sunyihuan ''' import tensorflow as tf X = tf.constant([2., 3]) assert_op = tf.Assert(tf.less_equal(tf.reduce_max(X), 1.), [X]) with tf.control_dependencies([assert_op]): with tf.Session() as sess: print(sess.run(X)) with tf.control_dependencies([tf.assert_positive(X)]): output = tf.reduce_sum(X) with tf.Session() as sess1: print(sess1.run(output))
sample_mean_z = tf.reduce_mean( element_wise_product_z, axis = 0 )[0] # Update variational parameters lambda_pi = tf.assign(lambda_pi, tf.add(lambda_pi, tf.multiply(rho, sample_mean_pi)) ) lambda_mu = tf.assign(lambda_mu, tf.add(lambda_mu, tf.multiply(rho, sample_mean_mu)) ) lambda_z = tf.assign(lambda_z, tf.add(lambda_z, tf.multiply(rho, sample_mean_z)) ) # Care Values _lambda_pi = [] _lambda_pi.append( tf.split(lambda_pi, K, 0) ) k=0 while(k < K): _lambda_pi[0][k] = tf.cond( tf.less_equal( _lambda_pi[0][k][0], 0.0 ), lambda: tf.abs( tf.multiply(0.5, _lambda_pi[0][k]) ), lambda: _lambda_pi[0][k] ) k = k + 1 if(k == K): lambda_pi = tf.concat(_lambda_pi[0], 0) del _lambda_pi[:] gc.collect() _lambda_z = [] _lambda_z.append( tf.split(lambda_z, N, 0) ) n=0 while(n < N): k=0 while(k < K): #tf.less_equal( lambda_z[0][n][0][k], 0.0 ) _lambda_z[0][n] = tf.cond( tf.less_equal( _lambda_z[0][n][0][k], 0.0 ), lambda: PGMethod(_lambda_z[0][n], [1, K]), lambda: _lambda_z[0][n] )
net = models.ResNet50UpProj({'data': state}, BATCH, 1, True) depth_predict = net.get_output() depth_kinect = tf.placeholder( "float", [None, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) img_mask = tf.placeholder("float", [None, DEPTH_IMAGE_HEIGHT, DEPTH_IMAGE_WIDTH, 1]) print('Loading initial network param') init_saver = tf.train.Saver() init_saver.restore(sess, '../init_network/NYU_FCRN.ckpt') d_show = tf.subtract(tf.multiply(depth_predict, img_mask), tf.multiply(depth_kinect, img_mask)) abs_d_show = tf.abs(d_show) c = tf.divide(tf.reduce_max(abs_d_show), 5.) berHu = tf.where(tf.less_equal(abs_d_show, c), abs_d_show, tf.square(d_show)) loss = tf.reduce_mean(tf.reduce_mean(berHu, 1)) train_step = tf.train.AdamOptimizer(5e-5).minimize(loss) train_loss_var = tf.Variable(0., trainable=False) train_loss_sum = tf.summary.scalar('training_loss', train_loss_var) test_loss_var = tf.Variable(0., trainable=False) test_loss_sum = tf.summary.scalar('testing_loss', test_loss_var) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter('./logs', sess.graph) print('Initializing var') uninitialized_vars = [] start_time = time.time()
def __init__(self, model, shape, mode, AE, batch_size, kappa, init_learning_rate, binary_search_steps, max_iterations, initial_const, beta, gamma): """ Constructor method. Args: model: KerasClassifier classification model arg_mode(str): 'PP' or 'PN' AE: Auto-encoder model batch_size(int): Number of samples in a batch kappa(double): Confidence gap between desired class and other classes init_learning_rate(double): Initial learning rate binary_search_steps(int): Number of search steps max_iterations(int): For each weighting of loss function number of iterations to search initial_const(double): Initial weighting of loss function beta (double): Weighting of L1 loss gamma (double): Weighting of auto-encoder """ num_classes = model._nb_classes tf_sum = list(range(1, len(shape))) self.sess = K.get_session() self.INIT_LEARNING_RATE = init_learning_rate self.MAX_ITERATIONS = max_iterations self.BINARY_SEARCH_STEPS = binary_search_steps self.kappa = kappa self.init_const = initial_const self.batch_size = batch_size self.AE = AE self.mode = mode self.beta = beta self.gamma = gamma # these are variables to be more efficient in sending data to tf self.orig_img = tf.Variable(np.zeros(shape), dtype=tf.float32) self.adv_img = tf.Variable(np.zeros(shape), dtype=tf.float32) self.adv_img_s = tf.Variable(np.zeros(shape), dtype=tf.float32) self.target_lab = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32) self.const = tf.Variable(np.zeros(batch_size), dtype=tf.float32) self.global_step = tf.Variable(0.0, trainable=False) # and here's what we use to assign them self.assign_orig_img = tf.placeholder(tf.float32, shape) self.assign_adv_img = tf.placeholder(tf.float32, shape) self.assign_adv_img_s = tf.placeholder(tf.float32, shape) self.assign_target_lab = tf.placeholder(tf.float32, (batch_size, num_classes)) self.assign_const = tf.placeholder(tf.float32, [batch_size]) """Fast Iterative Soft Thresholding""" """--------------------------------""" self.zt = tf.divide(self.global_step, self.global_step + tf.cast(3, tf.float32)) cond1 = tf.cast( tf.greater(tf.subtract(self.adv_img_s, self.orig_img), self.beta), tf.float32) cond2 = tf.cast( tf.less_equal(tf.abs(tf.subtract(self.adv_img_s, self.orig_img)), self.beta), tf.float32) cond3 = tf.cast( tf.less(tf.subtract(self.adv_img_s, self.orig_img), tf.negative(self.beta)), tf.float32) upper = tf.minimum(tf.subtract(self.adv_img_s, self.beta), tf.cast(0.5, tf.float32)) lower = tf.maximum(tf.add(self.adv_img_s, self.beta), tf.cast(-0.5, tf.float32)) self.assign_adv_img = tf.multiply(cond1, upper) + tf.multiply( cond2, self.orig_img) + tf.multiply(cond3, lower) cond4 = tf.cast( tf.greater(tf.subtract(self.assign_adv_img, self.orig_img), 0), tf.float32) cond5 = tf.cast( tf.less_equal(tf.subtract(self.assign_adv_img, self.orig_img), 0), tf.float32) if self.mode == "PP": self.assign_adv_img = tf.multiply( cond5, self.assign_adv_img) + tf.multiply( cond4, self.orig_img) elif self.mode == "PN": self.assign_adv_img = tf.multiply( cond4, self.assign_adv_img) + tf.multiply( cond5, self.orig_img) self.assign_adv_img_s = self.assign_adv_img + tf.multiply( self.zt, self.assign_adv_img - self.adv_img) cond6 = tf.cast( tf.greater(tf.subtract(self.assign_adv_img_s, self.orig_img), 0), tf.float32) cond7 = tf.cast( tf.less_equal(tf.subtract(self.assign_adv_img_s, self.orig_img), 0), tf.float32) if self.mode == "PP": self.assign_adv_img_s = tf.multiply( cond7, self.assign_adv_img_s) + tf.multiply( cond6, self.orig_img) elif self.mode == "PN": self.assign_adv_img_s = tf.multiply( cond6, self.assign_adv_img_s) + tf.multiply( cond7, self.orig_img) self.adv_updater = tf.assign(self.adv_img, self.assign_adv_img) self.adv_updater_s = tf.assign(self.adv_img_s, self.assign_adv_img_s) """--------------------------------""" # prediction BEFORE-SOFTMAX of the model self.delta_img = self.orig_img - self.adv_img self.delta_img_s = self.orig_img - self.adv_img_s # %%change%% if self.mode == "PP": # self.ImgToEnforceLabel_Score = model.predict(self.delta_img) # self.ImgToEnforceLabel_Score_s = model.predict(self.delta_img_s) self.ImgToEnforceLabel_Score = model.predictsym(self.delta_img) self.ImgToEnforceLabel_Score_s = model.predictsym(self.delta_img_s) elif self.mode == "PN": # self.ImgToEnforceLabel_Score = model.predict(self.adv_img) # self.ImgToEnforceLabel_Score_s = model.predict(self.adv_img_s) self.ImgToEnforceLabel_Score = model.predictsym(self.adv_img) self.ImgToEnforceLabel_Score_s = model.predictsym(self.adv_img_s) # distance to the input data self.L2_dist = tf.reduce_sum(tf.square(self.delta_img), axis=tf_sum) self.L2_dist_s = tf.reduce_sum(tf.square(self.delta_img_s), axis=tf_sum) self.L1_dist = tf.reduce_sum(tf.abs(self.delta_img), axis=tf_sum) self.L1_dist_s = tf.reduce_sum(tf.abs(self.delta_img_s), axis=tf_sum) self.EN_dist = self.L2_dist + tf.multiply(self.L1_dist, self.beta) self.EN_dist_s = self.L2_dist_s + tf.multiply(self.L1_dist_s, self.beta) # compute the probability of the label class versus the maximum other self.target_lab_score = tf.reduce_sum( (self.target_lab) * self.ImgToEnforceLabel_Score, 1) target_lab_score_s = tf.reduce_sum( (self.target_lab) * self.ImgToEnforceLabel_Score_s, 1) self.max_nontarget_lab_score = tf.reduce_max( (1 - self.target_lab) * self.ImgToEnforceLabel_Score - (self.target_lab * 10000), 1) max_nontarget_lab_score_s = tf.reduce_max( (1 - self.target_lab) * self.ImgToEnforceLabel_Score_s - (self.target_lab * 10000), 1) if self.mode == "PP": Loss_Attack = tf.maximum( 0.0, self.max_nontarget_lab_score - self.target_lab_score + self.kappa) Loss_Attack_s = tf.maximum( 0.0, max_nontarget_lab_score_s - target_lab_score_s + self.kappa) elif self.mode == "PN": Loss_Attack = tf.maximum( 0.0, -self.max_nontarget_lab_score + self.target_lab_score + self.kappa) Loss_Attack_s = tf.maximum( 0.0, -max_nontarget_lab_score_s + target_lab_score_s + self.kappa) # sum up the losses self.Loss_L1Dist = tf.reduce_sum(self.L1_dist) self.Loss_L1Dist_s = tf.reduce_sum(self.L1_dist_s) self.Loss_L2Dist = tf.reduce_sum(self.L2_dist) self.Loss_L2Dist_s = tf.reduce_sum(self.L2_dist_s) self.Loss_Attack = tf.reduce_sum(self.const * Loss_Attack) self.Loss_Attack_s = tf.reduce_sum(self.const * Loss_Attack_s) if self.mode == "PP" and callable(self.AE): self.Loss_AE_Dist = self.gamma * tf.square( tf.norm(self.AE(self.delta_img) - self.delta_img)) self.Loss_AE_Dist_s = self.gamma * tf.square( tf.norm(self.AE(self.delta_img) - self.delta_img_s)) elif self.mode == "PN" and callable(self.AE): self.Loss_AE_Dist = self.gamma * tf.square( tf.norm(self.AE(self.adv_img) - self.adv_img)) self.Loss_AE_Dist_s = self.gamma * tf.square( tf.norm(self.AE(self.adv_img_s) - self.adv_img_s)) else: self.Loss_AE_Dist = tf.constant(0.) self.Loss_AE_Dist_s = tf.constant(0.) self.Loss_ToOptimize = self.Loss_Attack_s + self.Loss_L2Dist_s + self.Loss_AE_Dist_s self.Loss_Overall = self.Loss_Attack + self.Loss_L2Dist + self.Loss_AE_Dist + tf.multiply( self.beta, self.Loss_L1Dist) self.learning_rate = tf.train.polynomial_decay(self.INIT_LEARNING_RATE, self.global_step, self.MAX_ITERATIONS, 0, power=0.5) optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) start_vars = set(x.name for x in tf.global_variables()) self.train = optimizer.minimize(self.Loss_ToOptimize, var_list=[self.adv_img_s], global_step=self.global_step) end_vars = tf.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] # these are the variables to initialize when we run self.setup = [] self.setup.append(self.orig_img.assign(self.assign_orig_img)) self.setup.append(self.target_lab.assign(self.assign_target_lab)) self.setup.append(self.const.assign(self.assign_const)) self.setup.append(self.adv_img.assign(self.assign_adv_img)) self.setup.append(self.adv_img_s.assign(self.assign_adv_img_s)) self.init = tf.variables_initializer(var_list=[self.global_step] + [self.adv_img_s] + [self.adv_img] + new_vars)
def dense_resample(im, flow_im, output_valid_mask, name='dense_resample'): """ Resample reward at particular locations. Args: im: ...xHxWxC matrix to sample from. flow_im: ...xHxWx2 matrix, samples the image using absolute offsets as given by the flow_im. """ with tf.name_scope(name): valid_mask = None x, y = tf.unstack(flow_im, axis=-1) x = tf.cast(tf.reshape(x, [-1]), tf.float32) y = tf.cast(tf.reshape(y, [-1]), tf.float32) # constants shape = tf.unstack(tf.shape(im)) channels = shape[-1] width = shape[-2] height = shape[-3] num_batch = tf.cast(tf.reduce_prod(tf.stack(shape[:-3])), 'int32') zero = tf.constant(0, dtype=tf.int32) # Round up and down. x0 = tf.cast(tf.floor(x), 'int32') x1 = x0 + 1 y0 = tf.cast(tf.floor(y), 'int32') y1 = y0 + 1 if output_valid_mask: valid_mask = tf.logical_and( tf.logical_and( tf.less_equal(x, tf.cast(width, tf.float32) - 1.), tf.greater_equal(x, 0.)), tf.logical_and( tf.less_equal(y, tf.cast(height, tf.float32) - 1.), tf.greater_equal(y, 0.))) valid_mask = tf.reshape(valid_mask, shape=shape[:-1] + [1]) x0 = tf.clip_by_value(x0, zero, width - 1) x1 = tf.clip_by_value(x1, zero, width - 1) y0 = tf.clip_by_value(y0, zero, height - 1) y1 = tf.clip_by_value(y1, zero, height - 1) dim2 = width dim1 = width * height # Create base index base = tf.reshape(tf.range(num_batch) * dim1, shape=[-1, 1]) base = tf.reshape(tf.tile(base, [1, height * width]), shape=[-1]) base_y0 = base + y0 * dim2 base_y1 = base + y1 * dim2 idx_a = base_y0 + x0 idx_b = base_y1 + x0 idx_c = base_y0 + x1 idx_d = base_y1 + x1 # use indices to lookup pixels in the flat image and restore channels dim sh = tf.stack([tf.constant(-1, dtype=tf.int32), channels]) im_flat = tf.cast(tf.reshape(im, sh), dtype=tf.float32) pixel_a = tf.gather(im_flat, idx_a) pixel_b = tf.gather(im_flat, idx_b) pixel_c = tf.gather(im_flat, idx_c) pixel_d = tf.gather(im_flat, idx_d) # and finally calculate interpolated values x1_f = tf.to_float(x1) y1_f = tf.to_float(y1) wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1) wb = tf.expand_dims((x1_f - x) * (1.0 - (y1_f - y)), 1) wc = tf.expand_dims(((1.0 - (x1_f - x)) * (y1_f - y)), 1) wd = tf.expand_dims(((1.0 - (x1_f - x)) * (1.0 - (y1_f - y))), 1) output = tf.add_n( [wa * pixel_a, wb * pixel_b, wc * pixel_c, wd * pixel_d]) output = tf.reshape(output, shape=tf.shape(im)) return output, valid_mask
def rpn_train(x, rpn_cls, rpn_box, result, input_img=None): #先預測[14,14]個方塊,預測哪個方塊為前景,再用iou(實際,預測),如果大於threshold就預測為前景 #前景預測完後,再用box_regression微調預測完前景方塊的(x,y,w,h)最後預測出物體的(x,y,w,h) Is_iou = tf.placeholder(tf.float32) Is_box = tf.placeholder(tf.float32) IOU = tf.placeholder(tf.float32, [None, 14, 14, box_num * rs_num, 2]) bounding_box = tf.placeholder(tf.float32, [None, 14, 14, box_num * rs_num, 4]) #loss = (-tf.reduce_sum(IOU*tf.log(tf.clip_by_value(rpn_cls,1e-8,1.0)),reduction_indices=[1]))# loss #loss = tf.div(tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits_v2(labels=IOU,logits=rpn_cls)),Is_iou) loss = tf.reduce_mean( (tf.nn.softmax_cross_entropy_with_logits_v2(labels=IOU, logits=rpn_cls))) sm1_sub = tf.add(bounding_box, -1 * rpn_box) #loss2 使用smooth L1作法 sm1_sub = tf.abs(sm1_sub) sm1_bool = tf.cast(tf.less_equal(sm1_sub, 1.0), tf.float32) sm1_bool2 = tf.cast(tf.equal(bounding_box, 0), tf.float32) print(rpn_box, bounding_box, sm1_sub, sm1_bool, sm1_bool2) #loss2 = (tf.reduce_sum((1-sm1_bool2)*(sm1_bool*(0.5*sm1_sub*sm1_sub)+(1-sm1_bool)*(sm1_sub-0.5)))) loss2 = tf.reduce_mean( (1 - sm1_bool2) * (sm1_bool * (0.5 * sm1_sub * sm1_sub) + (1 - sm1_bool) * (sm1_sub - 0.5))) total_loss = tf.add(loss, loss2) optimizer = tf.train.AdamOptimizer(initial_lr).minimize(total_loss) #ls_optimizer = tf.train.AdamOptimizer(1e-3).minimize(loss) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if not os.path.exists(save_ckpt): for i in range(300): predict_box, archor_box = sess.run([rpn_cls, rpn_box], feed_dict={x: input_img}) result_iou, is_iou = cal_iou(predict_box, result) up_box, is_box = box_regression(archor_box, result_iou, result) up_box = up_box[np.newaxis, :, :, :, :] result_iou = result_iou[np.newaxis, :, :, :, :] _, total_ls, ls, ls2 = sess.run( [optimizer, total_loss, loss, loss2], feed_dict={ Is_iou: is_iou, x: input_img, IOU: result_iou, bounding_box: up_box, Is_box: is_box }) #_,ls = sess.run([ls_optimizer,loss],feed_dict={x:input_img,IOU:result_iou,bounding}) #_,ls = sess.run([ls_optimizer,loss],feed_dict={x:input_img,IOU:result_iou,Is_iou:is_iou}) if (i % 10 == 0): #print(ls,predict_box[0][7][7][0],result_iou[0][7][7][0]) print('step', i, ' ', total_ls, ' ', ls, ' ', ls2, predict_box[0][7][7][0], result_iou[0][7][7][0]) iou, ar_box = sess.run([rpn_cls, rpn_box], feed_dict={x: input_img}) print(iou.shape, ar_box.shape) draw_image(iou[0], ar_box[0]) #saver.save(sess,save_meta) else: saver = tf.train.import_meta_graph( './checkpoint1_dir/MyModel.meta') saver.restore(sess, tf.train.latest_checkpoint('./checkpoint1_dir')) graph = tf.get_default_graph() rpn_cls = graph.get_tensor_by_name('Reshape_3:0') rpn_box = graph.get_tensor_by_name('Reshape_4:0') iou, ar_box = sess.run([rpn_cls, rpn_box], feed_dict={x: input_img}) draw_image(iou[0], ar_box[0])
def has_converged(model): return (tf.reduce_all( tf.less_equal(tf.constant(0.99, dtype=MODEL_DTYPE), model)) or tf.reduce_all( tf.less_equal(model, tf.constant(0.01, dtype=MODEL_DTYPE))))
def create_path_drop_masks(self, p_img, p_bev, random_values): """Determines global path drop decision based on given probabilities. Args: p_img: A tensor of float32, probability of keeping image branch p_bev: A tensor of float32, probability of keeping bev branch random_values: A tensor of float32 of shape [3], the results of coin flips, values should range from 0.0 - 1.0. Returns: final_img_mask: A constant tensor mask containing either one or zero depending on the final coin flip probability. final_bev_mask: A constant tensor mask containing either one or zero depending on the final coin flip probability. """ def keep_branch(): return tf.constant(1.0) def kill_branch(): return tf.constant(0.0) # The logic works as follows: # We have flipped 3 coins, first determines the chance of keeping # the image branch, second determines keeping bev branch, the third # makes the final decision in the case where both branches were killed # off, otherwise the initial img and bev chances are kept. img_chances = tf.case( [(tf.less(random_values[0], p_img), keep_branch)], default=kill_branch) bev_chances = tf.case( [(tf.less(random_values[1], p_bev), keep_branch)], default=kill_branch) # Decision to determine whether both branches were killed off third_flip = tf.logical_or(tf.cast(img_chances, dtype=tf.bool), tf.cast(bev_chances, dtype=tf.bool)) third_flip = tf.cast(third_flip, dtype=tf.float32) # Make a second choice, for the third case # Here we use a 50/50 chance to keep either image or bev # If its greater than 0.5, keep the image img_second_flip = tf.case( [(tf.greater(random_values[2], 0.5), keep_branch)], default=kill_branch) # If its less than or equal to 0.5, keep bev bev_second_flip = tf.case( [(tf.less_equal(random_values[2], 0.5), keep_branch)], default=kill_branch) # Use lambda since this returns another condition and it needs to # be callable final_img_mask = tf.case( [(tf.equal(third_flip, 1), lambda: img_chances)], default=lambda: img_second_flip) final_bev_mask = tf.case( [(tf.equal(third_flip, 1), lambda: bev_chances)], default=lambda: bev_second_flip) return final_img_mask, final_bev_mask
def box_matching(boxes, gt_boxes, gt_classes): """Match boxes to groundtruth boxes. Given the proposal boxes and the groundtruth boxes and classes, perform the groundtruth matching by taking the argmax of the IoU between boxes and groundtruth boxes. Args: boxes: a tensor of shape of [batch_size, N, 4] representing the box coordiantes to be matched to groundtruth boxes. gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing the groundtruth box coordinates. It is padded with -1s to indicate the invalid boxes. gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box classes. It is padded with -1s to indicate the invalid classes. Returns: matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing the matched groundtruth box coordinates for each input box. If the box does not overlap with any groundtruth boxes, the matched boxes of it will be set to all 0s. matched_gt_classes: a tensor of shape of [batch_size, N], representing the matched groundtruth classes for each input box. If the box does not overlap with any groundtruth boxes, the matched box classes of it will be set to 0, which corresponds to the background class. matched_gt_indices: a tensor of shape of [batch_size, N], representing the indices of the matched groundtruth boxes in the original gt_boxes tensor. If the box does not overlap with any groundtruth boxes, the index of the matched groundtruth will be set to -1. matched_iou: a tensor of shape of [batch_size, N], representing the IoU between the box and its matched groundtruth box. The matched IoU is the maximum IoU of the box and all the groundtruth boxes. iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix between boxes and the groundtruth boxes. The IoU between a box and the invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1. """ # Compute IoU between boxes and gt_boxes. # iou <- [batch_size, N, K] iou = bbox_overlap(boxes, gt_boxes) # max_iou <- [batch_size, N] # 0.0 -> no match to gt, or -1.0 match to no gt matched_iou = tf.reduce_max(iou, axis=-1) # background_box_mask <- bool, [batch_size, N] background_box_mask = tf.less_equal(matched_iou, 0.0) argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32) matched_gt_boxes, matched_gt_classes = gather_instances( argmax_iou_indices, gt_boxes, gt_classes) matched_gt_boxes = tf.where( tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]), tf.zeros_like(matched_gt_boxes, dtype=matched_gt_boxes.dtype), matched_gt_boxes) matched_gt_classes = tf.where(background_box_mask, tf.zeros_like(matched_gt_classes), matched_gt_classes) matched_gt_indices = tf.where(background_box_mask, -tf.ones_like(argmax_iou_indices), argmax_iou_indices) return (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou, iou)
def __init__(self, model, pop_size=6, mutation_rate=0.001, eps=0.15, max_steps=10000, alpha=0.20, resize_dim=None, adaptive=False): self.eps = eps self.pop_size = pop_size self.model = model self.alpha = alpha self.max_steps = max_steps self.mutation_rate = mutation_rate self.resize_dim = resize_dim noise_dim = self.resize_dim or 299 self.adaptive = adaptive self.writer = tf.summary.FileWriter(logdir='.') self.input_img = tf.Variable(np.zeros((1, 299, 299, 3), dtype=np.float32), name='x', dtype=tf.float32) # copies of original image self.pop_orig = tf.Variable(np.zeros((self.pop_size, 299, 299, 3), dtype=np.float32), name='pop_orig', dtype=tf.float32) self.pop_noise = tf.Variable(np.zeros( (self.pop_size, noise_dim, noise_dim, 3), dtype=np.float32), name='pop_noise', dtype=tf.float32) self.target = tf.Variable(0, dtype=tf.int64, name='target') self.init_success = tf.Variable(0, dtype=tf.int32, name='success') self.box_min = tf.tile(tf.maximum(self.input_img - eps, -0.5), (self.pop_size, 1, 1, 1)) self.box_max = tf.tile(tf.minimum(self.input_img + eps, 0.5), (self.pop_size, 1, 1, 1)) self.margin_log = tf.Variable(initial_value=np.zeros((1, 1), dtype=np.float32), validate_shape=False, name='margin_log', dtype=tf.float32) self.margin_log.set_shape((None, 1)) self.tlab = tf.contrib.layers.one_hot_encoding([self.target], num_classes=1001) self.i = tf.Variable(0, dtype=tf.int64, name='step') # Variables to detect plateau self.best_win_margin = tf.Variable(-1, dtype=tf.float32, name='cur_margin') self.cur_plateau_count = tf.Variable(0, dtype=tf.int32, name='plateau') self.num_plateaus = tf.Variable(0, dtype=tf.int32, name='num_plateaus') cond = lambda i, success, pop_orig, pop_noise, cur_elite, margin_log, best_win_margin, cur_plateau_count, num_plateaus: tf.logical_and( tf.less_equal(i, self.max_steps), tf.equal(success, 0)) attack_body = lambda i, success, pop_orig, pop_noise, cur_elite, margin_log, best_win_margin, cur_plateau_count, num_plateaus: self.attack_step( i, success, pop_orig, pop_noise, cur_elite, margin_log, best_win_margin, cur_plateau_count, num_plateaus) self.attack_main = tf.while_loop(cond, attack_body, [ self.i, self.init_success, self.pop_orig, self.pop_noise, self.pop_noise[0], self.margin_log, self.best_win_margin, self.cur_plateau_count, self.num_plateaus ]) self.summary_op = tf.summary.merge_all()
def Compile(): Input = tf.placeholder(dtype=tf.float32, shape=[params['Batch_Size'], 32, 100, 3], name='Input') Target = tf.sparse_placeholder(tf.int32, name='Target') Seq_Len = tf.placeholder(dtype=tf.int32, shape=[params['Batch_Size']], name='Seq_len') logits = Architecture(Input) #print(logits) with tf.control_dependencies([ tf.less_equal(Target.dense_shape[1], tf.reduce_max(tf.cast(Seq_Len, tf.int64))) ]): loss_ctc = tf.reduce_mean( tf.nn.ctc_loss(Target, logits, Seq_Len, ignore_longer_outputs_than_inputs=True)) decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits, Seq_Len, merge_repeated=False, beam_width=100, top_paths=2) ler = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), Target)) optimizer = tf.train.MomentumOptimizer(params['lr'], 0.9).minimize(loss_ctc) init = tf.global_variables_initializer() table_init = tf.tables_initializer() sess.run([init, table_init]) saver = tf.train.Saver(max_to_keep=5) itr, _ = load_weights(saver, params['model_path']) if itr == False: itr = 0 tf.summary.scalar('CTC_Loss_Value', loss_ctc) tf.summary.scalar('Label_Error_Rate', ler) print('Stteing up summary op...') summary_op = tf.summary.merge_all() print('Setting Up Saver...') Train_summary_writer = tf.summary.FileWriter('./log_dir/Train/', sess.graph) Test_summary_writer = tf.summary.FileWriter('./log_dir/Test/', sess.graph) trainpaths, testpaths, wordTrain, wordTest = get_file_paths() Total_Data = len(trainpaths) for i in range( params['num_epochs']): # range(1): #range(params['num_epochs']): index = np.random.permutation(Total_Data) trainpaths = trainpaths[index] wordTrain = wordTrain[index] for idx in range(Total_Data // params['Batch_Size']): # range(1): # batch_paths = trainpaths[idx * params['Batch_Size']:(idx + 1) * params['Batch_Size']] batch_Words = wordTrain[idx * params['Batch_Size']:(idx + 1) * params['Batch_Size']] batch_data = np.array([load_img(path) for path in batch_paths]) batch_target = word2Sparse(batch_Words) batch_SeqL = np.ones(params['Batch_Size']) * 49 feed_dict = { Input: batch_data[:, :32, :, :], Target: batch_target, Seq_Len: batch_SeqL } _, Train_loss_ctc, Train_summary_str, Train_decoded, Train_ler = sess.run( [optimizer, loss_ctc, summary_op, decoded, ler], feed_dict) Train_summary_writer.add_summary(Train_summary_str, itr) itr = itr + 1 #print('Epoch:' + str(i) + ' Step:' + str(idx) + ' Iter:' + str(itr) +' train_CTC_loss:' + # str(Train_loss_ctc) + ' Label_Error_Rate:' + str(Train_ler)) if idx % 100 == 0: print('Epoch:' + str(i) + ' Step:' + str(itr) + ' train_CTC_loss:' + str(Train_loss_ctc) + ' Label_Error_Rate:' + str(Train_ler)) Predicted_Words = Sparse2Word( Train_decoded[0], np.bincount(Train_decoded[0].indices[:, 0], minlength=params['Batch_Size'])) #print('Done!') print( pandas.DataFrame([batch_Words[:1], Predicted_Words[:1]], ['Label', 'Predicted'])) if itr % 1000 == 0: save(saver, params['model_path'], itr) print('Model Saved!!') Total_test_Data = len(testpaths) tes_accu = [] tes_ctc = [] tes_ler = [] for itdx in range(Total_test_Data // params['Batch_Size']): batch_paths = testpaths[itdx * params['Batch_Size']:(itdx + 1) * params['Batch_Size']] batch_Words = wordTest[itdx * params['Batch_Size']:(itdx + 1) * params['Batch_Size']] batch_data = np.array( [load_img(path) for path in batch_paths]) batch_target = word2Sparse(batch_Words) batch_SeqL = np.ones(params['Batch_Size']) * 49 feed_dict = { Input: batch_data[:, :32, :, :], Target: batch_target, Seq_Len: batch_SeqL } Test_loss_ctc, Test_summary_str, Test_decoded, Test_ler = sess.run( [loss_ctc, summary_op, decoded, ler], feed_dict) Predicted_Words = Sparse2Word( Test_decoded[0], np.bincount(Test_decoded[0].indices[:, 0], minlength=params['Batch_Size'])) res = np.array([ i == j for i, j in zip(batch_Words, Predicted_Words) ]).astype(np.int8) accuarcy = np.mean(res) tes_accu.append(accuarcy) tes_ctc.append(np.mean(Test_loss_ctc)) tes_ler.append(np.mean(Test_ler)) print('### Testing Results: Epoch:' + str(i) + ' Step:' + str(itr) + ' Test_CTC_loss:' + str(np.mean(tes_ctc)) + ' Label_Error_Rate:' + str(np.mean(tes_ler)) + ' Test Acuracy: ' + str(np.mean(tes_accu)))
def auto_canny_tf(img, sigma): # img is 3channel [ b, h, w, 1 ] and gray scale. # # step 0 get - parameter v = get_median(img) lower = tf.clip_by_value(tf.to_float((1.0 - sigma) * v), 0, 255) upper = tf.clip_by_value(tf.to_float((1.0 + sigma) * v), 0, 255) lower = lower[:, tf.newaxis, tf.newaxis, tf.newaxis] upper = upper[:, tf.newaxis, tf.newaxis, tf.newaxis] # step 1 Gaussian Filtering gauss_kernel = gaussian_kernel(sigma=1.0) gauss_kernel = gauss_kernel[:, :, tf.newaxis, tf.newaxis] [h, w, _, __] = gauss_kernel.shape padding_hw = int(int(w) / 2) img = tf.pad( img, [[0, 0], [padding_hw, padding_hw], [padding_hw, padding_hw], [0, 0]], mode='SYMMETRIC') img = tf.nn.conv2d(img, gauss_kernel, strides=[1, 1, 1, 1], padding="VALID") # step 2 get Gradient Magnitude gradient_kernel_x = tf.constant([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], tf.float32) gradient_kernel_y = tf.constant([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], tf.float32) gradient_kernel_x = gradient_kernel_x[:, :, tf.newaxis, tf.newaxis] gradient_kernel_y = gradient_kernel_y[:, :, tf.newaxis, tf.newaxis] img = tf.pad(img, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='SYMMETRIC') gradient_x = tf.nn.conv2d(img, gradient_kernel_x, strides=[1, 1, 1, 1], padding="VALID") gradient_y = tf.nn.conv2d(img, gradient_kernel_y, strides=[1, 1, 1, 1], padding="VALID") magnitude = tf.sqrt(tf.square(gradient_x) + tf.square(gradient_y)) theta = tf.atan2(gradient_y, gradient_x) thetaQ = (tf.round(theta * (5.0 / np.pi)) + 5) % 5 # Quantize direction thetaQ = thetaQ % 4 gradSup = tf.identity(magnitude) E_MATRIX = tf.constant([[0, 0, 0], [0, 0, 1], [0, 0, 0]], tf.float32) E_MATRIX = E_MATRIX[:, :, tf.newaxis, tf.newaxis] W_MATRIX = tf.constant([[0, 0, 0], [1, 0, 0], [0, 0, 0]], tf.float32) W_MATRIX = W_MATRIX[:, :, tf.newaxis, tf.newaxis] N_MATRIX = tf.constant([[0, 1, 0], [0, 0, 0], [0, 0, 0]], tf.float32) N_MATRIX = N_MATRIX[:, :, tf.newaxis, tf.newaxis] S_MATRIX = tf.constant([[0, 0, 0], [0, 0, 0], [0, 1, 0]], tf.float32) S_MATRIX = S_MATRIX[:, :, tf.newaxis, tf.newaxis] NE_MATRIX = tf.constant([[0, 0, 1], [0, 0, 0], [0, 0, 0]], tf.float32) NE_MATRIX = NE_MATRIX[:, :, tf.newaxis, tf.newaxis] NW_MATRIX = tf.constant([[1, 0, 0], [0, 0, 0], [0, 0, 0]], tf.float32) NW_MATRIX = NW_MATRIX[:, :, tf.newaxis, tf.newaxis] SE_MATRIX = tf.constant([[0, 0, 0], [0, 0, 0], [0, 0, 1]], tf.float32) SE_MATRIX = SE_MATRIX[:, :, tf.newaxis, tf.newaxis] SW_MATRIX = tf.constant([[0, 0, 0], [0, 0, 0], [1, 0, 0]], tf.float32) SW_MATRIX = SW_MATRIX[:, :, tf.newaxis, tf.newaxis] E_VAL = tf.nn.conv2d(gradSup, E_MATRIX, strides=[1, 1, 1, 1], padding="SAME") W_VAL = tf.nn.conv2d(gradSup, W_MATRIX, strides=[1, 1, 1, 1], padding="SAME") N_VAL = tf.nn.conv2d(gradSup, N_MATRIX, strides=[1, 1, 1, 1], padding="SAME") S_VAL = tf.nn.conv2d(gradSup, S_MATRIX, strides=[1, 1, 1, 1], padding="SAME") NE_VAL = tf.nn.conv2d(gradSup, NE_MATRIX, strides=[1, 1, 1, 1], padding="SAME") SW_VAL = tf.nn.conv2d(gradSup, SW_MATRIX, strides=[1, 1, 1, 1], padding="SAME") NW_VAL = tf.nn.conv2d(gradSup, NW_MATRIX, strides=[1, 1, 1, 1], padding="SAME") SE_VAL = tf.nn.conv2d(gradSup, SE_MATRIX, strides=[1, 1, 1, 1], padding="SAME") NE_SW_LOGIC = tf.logical_or(tf.less_equal(gradSup, NE_VAL), tf.less_equal(gradSup, SW_VAL)) NW_SE_LOGIC = tf.logical_or(tf.less_equal(gradSup, NW_VAL), tf.less_equal(gradSup, SE_VAL)) EW_LOGIC = tf.logical_or(tf.less_equal(gradSup, E_VAL), tf.less_equal(gradSup, W_VAL)) NS_LOGIC = tf.logical_or(tf.less_equal(gradSup, N_VAL), tf.less_equal(gradSup, S_VAL)) EW_POS = tf.equal(thetaQ, 0) EW_ZERO_POSITION = tf.logical_and(EW_POS, EW_LOGIC) gradSup = tf.where(EW_ZERO_POSITION, tf.zeros_like(gradSup), gradSup) NE_SW_POS = tf.equal(thetaQ, 1) NE_SW_ZERO_POSITION = tf.logical_and(NE_SW_POS, NE_SW_LOGIC) gradSup = tf.where(NE_SW_ZERO_POSITION, tf.zeros_like(gradSup), gradSup) NS_POS = tf.equal(thetaQ, 2) NS_ZERO_POSITION = tf.logical_and(NS_POS, NS_LOGIC) gradSup = tf.where(NS_ZERO_POSITION, tf.zeros_like(gradSup), gradSup) NW_SE_POS = tf.equal(thetaQ, 3) NW_SE_ZERO_POSITION = tf.logical_and(NW_SE_POS, NW_SE_LOGIC) gradSup = tf.where(NW_SE_ZERO_POSITION, tf.zeros_like(gradSup), gradSup) CENTER_MATRIX = tf.constant([[0, 0, 0], [0, 1, 0], [0, 0, 0]], tf.float32) CENTER_MATRIX = CENTER_MATRIX[:, :, tf.newaxis, tf.newaxis] gradSup = tf.nn.conv2d(gradSup, CENTER_MATRIX, strides=[1, 1, 1, 1], padding="VALID") gradSup = tf.pad(gradSup, [[0, 0], [1, 1], [1, 1], [0, 0]]) # step 4 Thresh holding strongEdges = gradSup > upper # highThreshold thresholdedEdges = tf.to_float(strongEdges) + tf.to_float(gradSup > lower) finalEdges = tf.cast(tf.identity(strongEdges), tf.float32) patchMax = tf.nn.max_pool(thresholdedEdges, [1, 3, 3, 1], [1, 1, 1, 1], padding="VALID") patchMax = tf.pad(patchMax, [[0, 0], [1, 1], [1, 1], [0, 0]]) weak_strong_bind = tf.logical_and(tf.equal(patchMax, 2.0), tf.equal(thresholdedEdges, 1.0)) finalEdges = tf.where(weak_strong_bind, tf.ones_like(finalEdges), finalEdges) cond = lambda wk_bj, te, fe: tf.reduce_any(wk_bj) is True def body(wk_bj, te, fe): currentPixels = tf.to_float(wk_bj) targetPixels = tf.nn.max_pool(currentPixels, [1, 3, 3, 1], [1, 1, 1, 1], padding="SAME") targetPixels = targetPixels > 0 new_weak_strong_bind = tf.logical_and( tf.logical_and(tf.equal(fe, 0), tf.equal(te, 1.0)), targetPixels) fe = tf.where(new_weak_strong_bind, tf.ones_like(fe), fe) return [new_weak_strong_bind, te, fe] weak_strong_bind, thresholdedEdges, finalEdges = tf.while_loop( cond, body, [weak_strong_bind, thresholdedEdges, finalEdges]) # Socred Edge all_grad_index = tf.cast(gradSup > 0, tf.float32) very_weak = tf.abs(all_grad_index - finalEdges) gradSup = gradSup * very_weak gradSup = gradSup / upper finalEdges += gradSup return tf.to_float(finalEdges)
def _example_too_big(example, max_length): return tf.less_equal(_example_length(example), max_length)
def d_leaky_tf_relu(x): return tf.cast(tf.greater(x, 0), dtype=tf.float32) + tf.cast( tf.less_equal(x, 0), dtype=tf.float32) * 0.2
def build_model(data_batch, data, step): batch_size, num_steps = [ tf.shape(data_batch["x_value_text_ids"])[d] for d in range(2) ] vocab = data.vocab('y_aux') id2str = '<{}>'.format bos_str, eos_str = map(id2str, (vocab.bos_token_id, vocab.eos_token_id)) def single_bleu(ref, hypo): ref = [id2str(u if u != vocab.unk_token_id else -1) for u in ref] hypo = [id2str(u) for u in hypo] ref = tx.utils.strip_special_tokens(' '.join(ref), strip_bos=bos_str, strip_eos=eos_str) hypo = tx.utils.strip_special_tokens(' '.join(hypo), strip_eos=eos_str) return 0.01 * tx.evals.sentence_bleu(references=[ref], hypothesis=hypo) def batch_bleu(refs, hypos): return np.array( [single_bleu(ref, hypo) for ref, hypo in zip(refs, hypos)], dtype=np.float32) # losses losses = {} # embedders embedders = { name: tx.modules.WordEmbedder(vocab_size=data.vocab(name).size, hparams=hparams) for name, hparams in config_model.embedders.items() } # encoders y_encoder = tx.modules.BidirectionalRNNEncoder( hparams=config_model.y_encoder) x_encoder = tx.modules.BidirectionalRNNEncoder( hparams=config_model.x_encoder) def concat_encoder_outputs(outputs): return tf.concat(outputs, -1) def encode(ref_flag): y_str = y_strs[ref_flag] y_ids = data_batch['{}_text_ids'.format(y_str)] y_embeds = embedders['y_aux'](y_ids) y_sequence_length = data_batch['{}_length'.format(y_str)] y_enc_outputs, _ = y_encoder(y_embeds, sequence_length=y_sequence_length) y_enc_outputs = concat_encoder_outputs(y_enc_outputs) x_str = x_strs[ref_flag] x_ids = { field: data_batch['{}_{}_text_ids'.format(x_str, field)][:, 1:-1] for field in x_fields } x_embeds = tf.concat([ embedders['x_{}'.format(field)](x_ids[field]) for field in x_fields ], axis=-1) x_sequence_length = data_batch['{}_{}_length'.format( x_str, x_fields[0])] - 2 x_enc_outputs, _ = x_encoder(x_embeds, sequence_length=x_sequence_length) x_enc_outputs = concat_encoder_outputs(x_enc_outputs) return y_ids, y_embeds, y_enc_outputs, y_sequence_length, \ x_ids, x_embeds, x_enc_outputs, x_sequence_length encode_results = [encode(ref_str) for ref_str in range(2)] y_ids, y_embeds, y_enc_outputs, y_sequence_length, \ x_ids, x_embeds, x_enc_outputs, x_sequence_length = \ zip(*encode_results) # get rnn cell rnn_cell = tx.core.layers.get_rnn_cell(config_model.rnn_cell) def get_decoder(cell, y__ref_flag, x_ref_flag, tgt_ref_flag, beam_width=None): output_layer_params = \ {'output_layer': tf.identity} if copy_flag else \ {'vocab_size': vocab.size} # attention memory = tf.concat( [y_enc_outputs[y__ref_flag], x_enc_outputs[x_ref_flag]], axis=1) memory_sequence_length = None attention_decoder = tx.modules.AttentionRNNDecoder( cell=cell, memory=memory, memory_sequence_length=memory_sequence_length, hparams=config_model.attention_decoder, **output_layer_params) if not copy_flag: return attention_decoder cell = attention_decoder.cell if beam_width is None else \ attention_decoder._get_beam_search_cell(beam_width) if copy_flag: # copynet kwargs = { 'y__ids': y_ids[y__ref_flag][:, 1:], 'y__states': y_enc_outputs[y__ref_flag][:, 1:], 'y__lengths': y_sequence_length[y__ref_flag] - 1, 'x_ids': x_ids[x_ref_flag]['value'], 'x_states': x_enc_outputs[x_ref_flag], 'x_lengths': x_sequence_length[x_ref_flag], } if tgt_ref_flag is not None: kwargs.update({ 'input_ids': data_batch['{}_text_ids'.format( y_strs[tgt_ref_flag])][:, :-1] }) memory_prefixes = [] if FLAGS.copy_y_: memory_prefixes.append('y_') if FLAGS.copy_x: memory_prefixes.append('x') if beam_width is not None: kwargs = { name: tile_batch(value, beam_width) for name, value in kwargs.items() } def get_get_copy_scores(memory_ids_states_lengths, output_size): memory_copy_states = [ tf.layers.dense(memory_states, units=output_size, activation=None, use_bias=False) for _, memory_states, _ in memory_ids_states_lengths ] def get_copy_scores(query, coverities=None): ret = [] if FLAGS.copy_y_: memory = memory_copy_states[len(ret)] if coverities is not None: memory = memory + tf.layers.dense( coverities[len(ret)], units=output_size, activation=None, use_bias=False) memory = tf.nn.tanh(memory) ret_y_ = tf.einsum("bim,bm->bi", memory, query) ret.append(ret_y_) if FLAGS.copy_x: memory = memory_copy_states[len(ret)] if coverities is not None: memory = memory + tf.layers.dense( coverities[len(ret)], units=output_size, activation=None, use_bias=False) memory = tf.nn.tanh(memory) ret_x = tf.einsum("bim,bm->bi", memory, query) ret.append(ret_x) return ret return get_copy_scores cell = CopyNetWrapper( cell=cell, vocab_size=vocab.size, memory_ids_states_lengths=[ tuple(kwargs['{}_{}'.format(prefix, s)] for s in ('ids', 'states', 'lengths')) for prefix in memory_prefixes], input_ids= \ kwargs['input_ids'] if tgt_ref_flag is not None else None, get_get_copy_scores=get_get_copy_scores, coverity_dim=config_model.coverage_state_dim if FLAGS.coverage else None, coverity_rnn_cell_hparams=config_model.coverage_rnn_cell if FLAGS.coverage else None, disabled_vocab_size=FLAGS.disabled_vocab_size, eps=FLAGS.eps) decoder = tx.modules.BasicRNNDecoder(cell=cell, hparams=config_model.decoder, **output_layer_params) return decoder def get_decoder_and_outputs(cell, y__ref_flag, x_ref_flag, tgt_ref_flag, params, beam_width=None): decoder = get_decoder(cell, y__ref_flag, x_ref_flag, tgt_ref_flag, beam_width=beam_width) if beam_width is None: ret = decoder(**params) else: ret = tx.modules.beam_search_decode(decoder_or_cell=decoder, beam_width=beam_width, **params) return (decoder, ) + ret get_decoder_and_outputs = tf.make_template('get_decoder_and_outputs', get_decoder_and_outputs) def teacher_forcing(cell, y__ref_flag, x_ref_flag, loss_name): tgt_ref_flag = x_ref_flag tgt_str = y_strs[tgt_ref_flag] sequence_length = data_batch['{}_length'.format(tgt_str)] - 1 decoder, tf_outputs, final_state, _ = get_decoder_and_outputs( cell, y__ref_flag, x_ref_flag, tgt_ref_flag, { 'decoding_strategy': 'train_greedy', 'inputs': y_embeds[tgt_ref_flag], 'sequence_length': sequence_length }) tgt_y_ids = data_batch['{}_text_ids'.format(tgt_str)][:, 1:] loss = tx.losses.sequence_sparse_softmax_cross_entropy( labels=tgt_y_ids, logits=tf_outputs.logits, sequence_length=sequence_length, average_across_batch=False) loss = tf.reduce_mean(loss, 0) if copy_flag and FLAGS.exact_cover_w != 0: sum_copy_probs = list( map(lambda t: tf.cast(t, tf.float32), final_state.sum_copy_probs)) memory_lengths = [ lengths for _, _, lengths in decoder.cell.memory_ids_states_lengths ] exact_coverage_losses = [ tf.reduce_mean( tf.reduce_sum( tx.utils.mask_sequences(tf.square(sum_copy_prob - 1.), memory_length), 1)) for sum_copy_prob, memory_length in zip( sum_copy_probs, memory_lengths) ] print_xe_loss_op = tf.print(loss_name, 'xe loss:', loss) with tf.control_dependencies([print_xe_loss_op]): for i, exact_coverage_loss in enumerate(exact_coverage_losses): print_op = tf.print(loss_name, 'exact coverage loss {:d}:'.format(i), exact_coverage_loss) with tf.control_dependencies([print_op]): # exact_cover_w = FLAGS.exact_cover_w + FLAGS.exact_cover_w * tf.cast(step, tf.float32) loss += FLAGS.exact_cover_w * exact_coverage_loss losses[loss_name] = loss return decoder, tf_outputs, loss def beam_searching(cell, y__ref_flag, x_ref_flag, beam_width): start_tokens = tf.ones_like(data_batch['y_aux_length']) * \ vocab.bos_token_id end_token = vocab.eos_token_id decoder, bs_outputs, _, _ = get_decoder_and_outputs( cell, y__ref_flag, x_ref_flag, None, { 'embedding': embedders['y_aux'], 'start_tokens': start_tokens, 'end_token': end_token, 'max_decoding_length': config_train.infer_max_decoding_length }, beam_width=config_train.infer_beam_width) return decoder, bs_outputs decoder, tf_outputs, loss = teacher_forcing(rnn_cell, 1, 0, 'MLE') rec_decoder, _, rec_loss = teacher_forcing(rnn_cell, 1, 1, 'REC') rec_weight = FLAGS.rec_w # rec_weight = tf.py_func( # lambda_anneal, [step_stage], # tf.float32, stateful=False, name='lambda_w') # rec_weight = tf.cond(step_stage < 1 ,) #rec_weight = rec_weight[0] step_stage = tf.cast(step, tf.float32) / tf.constant(600.0) rec_weight = tf.case([(tf.less_equal(step_stage, tf.constant(1.0)), lambda:tf.constant(1.0)), \ (tf.greater(step_stage, tf.constant(2.0)), lambda:FLAGS.rec_w)], \ default=lambda:tf.constant(1.0) - (step_stage - 1) * (1 - FLAGS.rec_w)) joint_loss = (1 - rec_weight) * loss + rec_weight * rec_loss losses['joint'] = joint_loss tiled_decoder, bs_outputs = beam_searching(rnn_cell, 1, 0, config_train.infer_beam_width) train_ops = { name: get_train_op(losses[name], hparams=config_train.train[name]) for name in config_train.train } return train_ops, bs_outputs
def project_points_with_depth_visibility_check(point_positions, camera_intrinsics, camera_rotation_matrix, camera_translation, image_width, image_height, depth_image, depth_intrinsics=None, depth_threshold=0.1): """Project 3D points to image with depthmap based visibility check. Args: point_positions: A tf.float32 tensor of shape [N, 3] containing N 3D point positions. camera_intrinsics: A tf.float32 tensor of shape [3, 3] contains intrinsic matrix. camera_rotation_matrix: A tf.float32 tensor of size [3, 3]. camera_translation: A tf.float32 tensor of size [3]. image_width: Width of image. image_height: Height of image. depth_image: Depth image as 2D tensor. depth_intrinsics: A tf.float32 tensor of size [3, 3]. If None, it is set to be same as camera_intrinsics. depth_threshold: Threshold for depth checking. Returns: points_in_image_frame: A tf.int32 tensor of size [N, 2] containing the x, y location of point projections in image. visibility: A tf.bool tensor of size [N] which denotes if a point is visible from the image. """ if depth_intrinsics is None: depth_intrinsics = camera_intrinsics image_height = tf.convert_to_tensor(image_height, dtype=tf.int32) image_width = tf.convert_to_tensor(image_width, dtype=tf.int32) depth_image_height = tf.shape(depth_image)[0] depth_image_width = tf.shape(depth_image)[1] # Points in camera frame points_in_camera_frame = tf.linalg.einsum('ij,nj->ni', camera_rotation_matrix, point_positions) + tf.expand_dims( camera_translation, axis=0) # Points in image frame. points_in_image_frame = tf.linalg.einsum('ij,nj->ni', camera_intrinsics, points_in_camera_frame) points_in_image_frame = tf.cast( points_in_image_frame[:, :2] / points_in_image_frame[:, 2:3], dtype=tf.int32) # Points in depth frame. points_in_depth_frame = tf.linalg.einsum('ij,nj->ni', depth_intrinsics, points_in_camera_frame) points_in_depth_frame = tf.cast( points_in_depth_frame[:, :2] / points_in_depth_frame[:, 2:3], dtype=tf.int32) # Check if point is in front of camera. visibility = tf.greater(points_in_camera_frame[:, 2], 0.0) # Check if within color image. visibility &= tf.math.reduce_all( tf.greater_equal(points_in_image_frame, 0), axis=1) visibility &= tf.math.reduce_all( tf.less(points_in_image_frame, tf.expand_dims(tf.stack([image_width, image_height]), axis=0)), axis=1) # Check if within depth image. visibility &= tf.math.reduce_all( tf.greater_equal(points_in_depth_frame, 0), axis=1) visibility &= tf.math.reduce_all( tf.less( points_in_depth_frame, tf.expand_dims( tf.stack([depth_image_width, depth_image_height]), axis=0)), axis=1) # Check if the depth of points is within some threshold of depth_image. points_in_depth_frame = tf.boolean_mask(points_in_depth_frame, visibility) points_in_depth_frame_y = points_in_depth_frame[:, 1] points_in_depth_frame_x = points_in_depth_frame[:, 0] indices = ( points_in_depth_frame_y * depth_image_width + points_in_depth_frame_x) visible_points_in_camera_frame = tf.boolean_mask(points_in_camera_frame, visibility) depth_of_visible_points_in_camera_frame = visible_points_in_camera_frame[:, 2] depth_of_visible_points_in_depth_frame = tf.gather( tf.reshape(depth_image, [-1]), indices) valid_depths_visible = tf.less_equal( tf.abs(depth_of_visible_points_in_camera_frame - depth_of_visible_points_in_depth_frame), depth_threshold) visibility_indices = tf.cast(tf.where(visibility), dtype=tf.int32) valid_depths = tf.scatter_nd( indices=visibility_indices, updates=tf.cast(valid_depths_visible, dtype=tf.int32), shape=tf.shape(visibility)) visibility &= tf.cast(valid_depths, dtype=tf.bool) return points_in_image_frame, visibility
def seek_queue_many(ids, hashes, outdir, blacklist, hashes_diff): len_hashes = len(hashes) last_index = 0 num_threads = 5 batch_size = int(len_hashes/num_threads) total_tasks = len_hashes - len(blacklist) print(batch_size) print(total_tasks) pbar = tf.keras.utils.Progbar(total_tasks) # are used to feed data into our queue queue_i = v1.placeholder(tf.int32, shape=[None]) queue_hash_i = v1.placeholder(tf.bool, shape=[None, 64]) queue_hashes_j = v1.placeholder(tf.bool, shape=[batch_size, None]) #shape=[None, 64] [len_hashes] queue = tf.queue.FIFOQueue(capacity=50, dtypes=[tf.int32, tf.bool], shapes=[[], [64]]) enqueue_op = queue.enqueue_many([queue_i, queue_hash_i]) dequeue_op = queue.dequeue() diff_hash_i = v1.placeholder(tf.bool, shape=[64]) diff_hashes_j = v1.placeholder(tf.bool, shape=[None, 64]) diff_op_many = tf.math.count_nonzero(tf.not_equal(diff_hash_i, diff_hashes_j), 1) filter_op = tf.less_equal(diff_op_many, DISTANCE_THRESHOLD) where_op = tf.where(filter_op) # start the threads for our FIFOQueue and batch sess = v1.Session(config=config) enqueue_threads = [threading.Thread(target=check_batch_many, args=[sess, hashes, enqueue_op, init_i, batch_size, queue_i, queue_hash_i, blacklist]) for init_i in range(last_index, len_hashes, batch_size)] # Start the threads and wait for all of them to stop. for t in enqueue_threads: t.isDaemon() t.start() coord = tf.train.Coordinator() threads = v1.train.start_queue_runners(coord=coord, sess=sess) pbar.update(0) seen_images = [] outdir_tmp = outdir + '.tmp' + '.' + str(settings.distributed_machine) # Fetch the data from the pipeline and put it where it belongs (into your model) for _ in range(total_tasks): # Computing diff i, hash_i = sess.run(dequeue_op) diff, filter, where = sess.run([diff_op_many, filter_op, where_op], feed_dict={diff_hash_i: hash_i, diff_hashes_j: hashes[i:]}) for j in where: j_rel = j[0] j_abs = i+j_rel key_id = ids[i] + '-' + ids[j_abs] hashes_diff[key_id] = diff[j_rel] seen_images.append(i) if _ % 100000 == 0: with open(outdir_tmp, 'w') as outfile: json.dump(hashes_diff, outfile, default=default) progress_file = 'progress.' + outdir_tmp with open(progress_file + '.txt', 'w') as outfile: outfile.write(str(i)+'\n') with open(progress_file + '.json', 'w') as outfile: json.dump(str(seen_images), outfile, default=default) pbar.update(_) with open(outdir, 'w') as outfile: json.dump(hashes_diff, outfile, default=default) # shutdown everything to avoid zombies sess.run(queue.close(cancel_pending_enqueues=True)) coord.request_stop() coord.join(enqueue_threads) coord.join(threads) #coord.join(operation_threads) sess.close() os.remove(outdir_tmp) os.remove(progress_file+'.txt') os.remove(progress_file+'.json')
def main(): """Create the model and start the evaluation process.""" args = get_arguments() # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader( args.data_dir, args.data_list, None, # No defined input size. False, # No random scale. False, # No random mirror. args.ignore_label, IMG_MEAN, coord) image, label = reader.image, reader.label image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims( label, dim=0) # Add one batch dimension. h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float( tf.shape(image_batch)[2]) image_batch075 = tf.image.resize_images( image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75)) ])) image_batch05 = tf.image.resize_images( image_batch, tf.stack([ tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5)) ])) # Create network. with tf.variable_scope('', reuse=False): net = DeepLabResNetModel({'data': image_batch}, is_training=False, num_classes=args.num_classes) with tf.variable_scope('', reuse=True): net075 = DeepLabResNetModel({'data': image_batch075}, is_training=False, num_classes=args.num_classes) with tf.variable_scope('', reuse=True): net05 = DeepLabResNetModel({'data': image_batch05}, is_training=False, num_classes=args.num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output100 = net.layers['fc1_voc12'] raw_output075 = tf.image.resize_images(net075.layers['fc1_voc12'], tf.shape(raw_output100)[1:3, ]) raw_output05 = tf.image.resize_images(net05.layers['fc1_voc12'], tf.shape(raw_output100)[1:3, ]) raw_output = tf.reduce_max(tf.stack( [raw_output100, raw_output075, raw_output05]), axis=0) raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output = tf.argmax(raw_output, dimension=3) pred = tf.expand_dims(raw_output, dim=3) # Create 4-d tensor. # mIoU pred = tf.reshape(pred, [ -1, ]) gt = tf.reshape(label_batch, [ -1, ]) #tensorflow 1.3.0 conflict #weights = tf.cast(tf.less_equal(gt, args.num_classes - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes. #mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=args.num_classes, weights=weights) indices = tf.squeeze(tf.where(tf.less_equal(gt, args.num_classes - 1)), 1) # ignore all labels >= num_classes gt = tf.cast(tf.gather(gt, indices), tf.int32) pred = tf.gather(pred, indices) mIoU, update_op = tf.contrib.metrics.streaming_mean_iou( pred, gt, num_classes=args.num_classes) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): preds, _ = sess.run([pred, update_op]) if step % 100 == 0: print('step {:d}'.format(step)) print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess))) coord.request_stop() coord.join(threads)
def __init__(self, sess, config, api, log_dir, forward, scope=None, name=None): self.vocab = api.vocab self.rev_vocab = api.rev_vocab self.vocab_size = len(self.vocab) self.idf = api.index2idf self.gen_vocab_size = api.gen_vocab_size self.topic_vocab = api.topic_vocab self.topic_vocab_size = len(self.topic_vocab) self.da_vocab = api.dialog_act_vocab self.da_vocab_size = len(self.da_vocab) self.sess = sess self.scope = scope self.max_utt_len = config.max_utt_len self.max_per_len = config.max_per_len self.max_per_line = config.max_per_line self.max_per_words = config.max_per_words self.go_id = self.rev_vocab["<s>"] self.eos_id = self.rev_vocab["</s>"] self.context_cell_size = config.cxt_cell_size self.sent_cell_size = config.sent_cell_size self.memory_cell_size = config.memory_cell_size self.dec_cell_size = config.dec_cell_size self.hops = config.hops self.batch_size = config.batch_size self.test_samples = config.test_samples self.balance_factor = config.balance_factor with tf.name_scope("io"): self.first_dimension_size = self.batch_size self.input_contexts = tf.placeholder( dtype=tf.int32, shape=(self.first_dimension_size, None, self.max_utt_len), name="dialog_context") self.floors = tf.placeholder(dtype=tf.int32, shape=(self.first_dimension_size, None), name="floor") self.context_lens = tf.placeholder( dtype=tf.int32, shape=(self.first_dimension_size, ), name="context_lens") self.topics = tf.placeholder(dtype=tf.int32, shape=(self.first_dimension_size, ), name="topics") self.personas = tf.placeholder(dtype=tf.int32, shape=(self.first_dimension_size, self.max_per_line, self.max_per_len), name="personas") self.persona_words = tf.placeholder( dtype=tf.int32, shape=(self.first_dimension_size, self.max_per_line, self.max_per_len), name="persona_words") self.persona_position = tf.placeholder( dtype=tf.int32, shape=(self.first_dimension_size, None), name="persona_position") self.selected_persona = tf.placeholder( dtype=tf.int32, shape=(self.first_dimension_size, 1), name="selected_persona") self.query = tf.placeholder(dtype=tf.int32, shape=(self.first_dimension_size, self.max_utt_len), name="query") # target response given the dialog context self.output_tokens = tf.placeholder( dtype=tf.int32, shape=(self.first_dimension_size, None), name="output_token") self.output_lens = tf.placeholder( dtype=tf.int32, shape=(self.first_dimension_size, ), name="output_lens") # optimization related variables self.learning_rate = tf.Variable(float(config.init_lr), trainable=False, name="learning_rate") self.learning_rate_decay_op = self.learning_rate.assign( tf.multiply(self.learning_rate, config.lr_decay)) self.global_t = tf.placeholder(dtype=tf.int32, name="global_t") self.use_prior = tf.placeholder(dtype=tf.bool, name="use_prior") max_context_lines = array_ops.shape(self.input_contexts)[1] max_out_len = array_ops.shape(self.output_tokens)[1] batch_size = array_ops.shape(self.input_contexts)[0] with variable_scope.variable_scope("wordEmbedding"): self.embedding = tf.get_variable( "embedding", [self.vocab_size, config.embed_size], dtype=tf.float32) embedding_mask = tf.constant( [0 if i == 0 else 1 for i in range(self.vocab_size)], dtype=tf.float32, shape=[self.vocab_size, 1]) embedding = self.embedding * embedding_mask input_embedding = embedding_ops.embedding_lookup( embedding, tf.reshape(self.input_contexts, [-1])) input_embedding = tf.reshape( input_embedding, [-1, self.max_utt_len, config.embed_size]) output_embedding = embedding_ops.embedding_lookup( embedding, self.output_tokens) persona_input_embedding = embedding_ops.embedding_lookup( embedding, tf.reshape(self.personas, [-1])) persona_input_embedding = tf.reshape( persona_input_embedding, [-1, self.max_per_len, config.embed_size]) if config.sent_type == "bow": input_embedding, sent_size = get_bow(input_embedding) output_embedding, _ = get_bow(output_embedding) persona_input_embedding, _ = get_bow(persona_input_embedding) elif config.sent_type == "rnn": sent_cell = self.get_rnncell("gru", self.sent_cell_size, config.keep_prob, 1) _, input_embedding, sent_size = get_rnn_encode( input_embedding, sent_cell, scope="sent_rnn") _, output_embedding, _ = get_rnn_encode(output_embedding, sent_cell, self.output_lens, scope="sent_rnn", reuse=True) _, persona_input_embedding, _ = get_rnn_encode( persona_input_embedding, sent_cell, scope="sent_rnn", reuse=True) elif config.sent_type == "bi_rnn": fwd_sent_cell = self.get_rnncell("gru", self.sent_cell_size, keep_prob=1.0, num_layer=1) bwd_sent_cell = self.get_rnncell("gru", self.sent_cell_size, keep_prob=1.0, num_layer=1) input_step_embedding, input_embedding, sent_size = get_bi_rnn_encode( input_embedding, fwd_sent_cell, bwd_sent_cell, scope="sent_bi_rnn") _, output_embedding, _ = get_bi_rnn_encode(output_embedding, fwd_sent_cell, bwd_sent_cell, self.output_lens, scope="sent_bi_rnn", reuse=True) _, persona_input_embedding, _ = get_bi_rnn_encode( persona_input_embedding, fwd_sent_cell, bwd_sent_cell, scope="sent_bi_rnn", reuse=True) else: raise ValueError( "Unknown sent_type. Must be one of [bow, rnn, bi_rnn]") # reshape input into dialogs input_embedding = tf.reshape(input_embedding, [-1, max_context_lines, sent_size]) self.input_step_embedding = input_step_embedding self.encoder_state_size = sent_size if config.keep_prob < 1.0: input_embedding = tf.nn.dropout(input_embedding, config.keep_prob) with variable_scope.variable_scope("personaMemory"): embedding_mask = tf.constant( [0 if i == 0 else 1 for i in range(self.vocab_size)], dtype=tf.float32, shape=[self.vocab_size, 1]) A = tf.get_variable("persona_embedding_A", [self.vocab_size, self.memory_cell_size], dtype=tf.float32) A = A * embedding_mask C = [] for hopn in range(self.hops): C.append( tf.get_variable("persona_embedding_C_hop_{}".format(hopn), [self.vocab_size, self.memory_cell_size], dtype=tf.float32) * embedding_mask) q_emb = tf.nn.embedding_lookup(A, self.query) u_0 = tf.reduce_sum(q_emb, 1) u = [u_0] for hopn in range(self.hops): if hopn == 0: m_emb_A = tf.nn.embedding_lookup(A, self.personas) m_A = tf.reshape(m_emb_A, [ -1, self.max_per_len * self.max_per_line, self.memory_cell_size ]) else: with tf.variable_scope('persona_hop_{}'.format(hopn)): m_emb_A = tf.nn.embedding_lookup( C[hopn - 1], self.personas) m_A = tf.reshape(m_emb_A, [ -1, self.max_per_len * self.max_per_line, self.memory_cell_size ]) u_temp = tf.transpose(tf.expand_dims(u[-1], -1), [0, 2, 1]) dotted = tf.reduce_sum(m_A * u_temp, 2) probs = tf.nn.softmax(dotted) probs_temp = tf.transpose(tf.expand_dims(probs, -1), [0, 2, 1]) with tf.variable_scope('persona_hop_{}'.format(hopn)): m_emb_C = tf.nn.embedding_lookup( C[hopn], tf.reshape(self.personas, [-1, self.max_per_len * self.max_per_line])) m_emb_C = tf.expand_dims(m_emb_C, -2) m_C = tf.reduce_sum(m_emb_C, axis=2) c_temp = tf.transpose(m_C, [0, 2, 1]) o_k = tf.reduce_sum(c_temp * probs_temp, axis=2) u_k = u[-1] + o_k u.append(u_k) persona_memory = u[-1] with variable_scope.variable_scope("contextEmbedding"): context_layers = 2 enc_cell = self.get_rnncell(config.cell_type, self.context_cell_size, keep_prob=1.0, num_layer=context_layers) _, enc_last_state = tf.nn.dynamic_rnn( enc_cell, input_embedding, dtype=tf.float32, sequence_length=self.context_lens) if context_layers > 1: if config.cell_type == 'lstm': enc_last_state = [temp.h for temp in enc_last_state] enc_last_state = tf.concat(enc_last_state, 1) else: if config.cell_type == 'lstm': enc_last_state = enc_last_state.h cond_embedding = tf.concat([persona_memory, enc_last_state], 1) with variable_scope.variable_scope("recognitionNetwork"): recog_input = tf.concat( [cond_embedding, output_embedding, persona_memory], 1) self.recog_mulogvar = recog_mulogvar = layers.fully_connected( recog_input, config.latent_size * 2, activation_fn=None, scope="muvar") recog_mu, recog_logvar = tf.split(recog_mulogvar, 2, axis=1) with variable_scope.variable_scope("priorNetwork"): prior_fc1 = layers.fully_connected(cond_embedding, np.maximum( config.latent_size * 2, 100), activation_fn=tf.tanh, scope="fc1") prior_mulogvar = layers.fully_connected(prior_fc1, config.latent_size * 2, activation_fn=None, scope="muvar") prior_mu, prior_logvar = tf.split(prior_mulogvar, 2, axis=1) latent_sample = tf.cond( self.use_prior, lambda: sample_gaussian(prior_mu, prior_logvar), lambda: sample_gaussian(recog_mu, recog_logvar)) with variable_scope.variable_scope("personaSelecting"): condition = tf.concat([persona_memory, latent_sample], 1) self.persona_dist = tf.nn.log_softmax( layers.fully_connected(condition, self.max_per_line, activation_fn=tf.tanh, scope="persona_dist")) select_temp = tf.expand_dims( tf.argmax(self.persona_dist, 1, output_type=tf.int32), 1) index_temp = tf.expand_dims( tf.range(0, self.first_dimension_size, dtype=tf.int32), 1) persona_select = tf.concat([index_temp, select_temp], 1) selected_words_ordered = tf.reshape( tf.gather_nd(self.persona_words, persona_select), [self.max_per_len * self.first_dimension_size]) self.selected_words = tf.gather_nd(self.persona_words, persona_select) label = tf.reshape( selected_words_ordered, [self.max_per_len * self.first_dimension_size, 1]) index = tf.reshape( tf.range(self.first_dimension_size, dtype=tf.int32), [self.first_dimension_size, 1]) index = tf.reshape( tf.tile(index, [1, self.max_per_len]), [self.max_per_len * self.first_dimension_size, 1]) concated = tf.concat([index, label], 1) true_labels = tf.where(selected_words_ordered > 0) concated = tf.gather_nd(concated, true_labels) self.persona_word_mask = tf.sparse_to_dense( concated, [self.first_dimension_size, self.vocab_size], config.perw_weight, 0.0) self.other_word_mask = tf.sparse_to_dense( concated, [self.first_dimension_size, self.vocab_size], 0.0, config.othw_weight) self.persona_word_mask = self.persona_word_mask * self.idf with variable_scope.variable_scope("generationNetwork"): gen_inputs = tf.concat([cond_embedding, latent_sample], 1) # BOW loss bow_fc1 = layers.fully_connected(gen_inputs, 400, activation_fn=tf.tanh, scope="bow_fc1") if config.keep_prob < 1.0: bow_fc1 = tf.nn.dropout(bow_fc1, config.keep_prob) self.bow_logits = layers.fully_connected(bow_fc1, self.vocab_size, activation_fn=None, scope="bow_project") # Y loss dec_inputs = gen_inputs selected_attribute_embedding = None self.da_logits = tf.zeros((batch_size, self.da_vocab_size)) # Decoder if config.num_layer > 1: dec_init_state = [] for i in range(config.num_layer): temp_init = layers.fully_connected(dec_inputs, self.dec_cell_size, activation_fn=None, scope="init_state-%d" % i) if config.cell_type == 'lstm': temp_init = rnn_cell.LSTMStateTuple( temp_init, temp_init) dec_init_state.append(temp_init) dec_init_state = tuple(dec_init_state) else: dec_init_state = layers.fully_connected(dec_inputs, self.dec_cell_size, activation_fn=None, scope="init_state") if config.cell_type == 'lstm': dec_init_state = rnn_cell.LSTMStateTuple( dec_init_state, dec_init_state) with variable_scope.variable_scope("decoder"): dec_cell = self.get_rnncell(config.cell_type, self.dec_cell_size, config.keep_prob, config.num_layer) dec_cell = OutputProjectionWrapper(dec_cell, self.vocab_size) pos_cell = self.get_rnncell(config.cell_type, self.dec_cell_size, config.keep_prob, config.num_layer) pos_cell = OutputProjectionWrapper(pos_cell, self.vocab_size) with variable_scope.variable_scope("position"): self.pos_w_1 = tf.get_variable("pos_w_1", [self.dec_cell_size, 2], dtype=tf.float32) self.pos_b_1 = tf.get_variable("pos_b_1", [2], dtype=tf.float32) def position_function(states, logp=False): states = tf.reshape(states, [-1, self.dec_cell_size]) if logp: return tf.reshape( tf.nn.log_softmax( tf.matmul(states, self.pos_w_1) + self.pos_b_1), [self.first_dimension_size, -1, 2]) return tf.reshape( tf.nn.softmax( tf.matmul(states, self.pos_w_1) + self.pos_b_1), [self.first_dimension_size, -1, 2]) if forward: loop_func = self.context_decoder_fn_inference( position_function, self.persona_word_mask, self.other_word_mask, None, dec_init_state, embedding, start_of_sequence_id=self.go_id, end_of_sequence_id=self.eos_id, maximum_length=self.max_utt_len, num_decoder_symbols=self.vocab_size, context_vector=selected_attribute_embedding, ) dec_input_embedding = None dec_seq_lens = None else: loop_func = self.context_decoder_fn_train( dec_init_state, selected_attribute_embedding) dec_input_embedding = embedding_ops.embedding_lookup( embedding, self.output_tokens) dec_input_embedding = dec_input_embedding[:, 0:-1, :] dec_seq_lens = self.output_lens - 1 if config.keep_prob < 1.0: dec_input_embedding = tf.nn.dropout( dec_input_embedding, config.keep_prob) if config.dec_keep_prob < 1.0: keep_mask = tf.less_equal( tf.random_uniform((batch_size, max_out_len - 1), minval=0.0, maxval=1.0), config.dec_keep_prob) keep_mask = tf.expand_dims(tf.to_float(keep_mask), 2) dec_input_embedding = dec_input_embedding * keep_mask dec_input_embedding = tf.reshape( dec_input_embedding, [-1, max_out_len - 1, config.embed_size]) with variable_scope.variable_scope("dec_state"): dec_outs, _, final_context_state, rnn_states = dynamic_rnn_decoder( dec_cell, loop_func, inputs=dec_input_embedding, sequence_length=dec_seq_lens) with variable_scope.variable_scope("pos_state"): _, _, _, pos_states = dynamic_rnn_decoder( pos_cell, loop_func, inputs=dec_input_embedding, sequence_length=dec_seq_lens) self.position_dist = position_function(pos_states, logp=True) if final_context_state is not None: final_context_state = final_context_state[:, 0:array_ops. shape(dec_outs)[1]] mask = tf.to_int32(tf.sign(tf.reduce_max(dec_outs, axis=2))) self.dec_out_words = tf.multiply( tf.reverse(final_context_state, axis=[1]), mask) else: self.dec_out_words = tf.argmax(dec_outs, 2) if not forward: with variable_scope.variable_scope("loss"): labels = self.output_tokens[:, 1:] label_mask = tf.to_float(tf.sign(labels)) rc_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=dec_outs, labels=labels) rc_loss = tf.reduce_sum(rc_loss * label_mask, reduction_indices=1) self.avg_rc_loss = tf.reduce_mean(rc_loss) self.rc_ppl = tf.exp( tf.reduce_sum(rc_loss) / tf.reduce_sum(label_mask)) per_select_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tf.reshape(self.persona_dist, [self.first_dimension_size, 1, -1]), labels=self.selected_persona) per_select_loss = tf.reduce_sum(per_select_loss, reduction_indices=1) self.avg_per_select_loss = tf.reduce_mean(per_select_loss) position_labels = self.persona_position[:, 1:] per_pos_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.position_dist, labels=position_labels) per_pos_loss = tf.reduce_sum(per_pos_loss, reduction_indices=1) self.avg_per_pos_loss = tf.reduce_mean(per_pos_loss) tile_bow_logits = tf.tile(tf.expand_dims(self.bow_logits, 1), [1, max_out_len - 1, 1]) bow_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=tile_bow_logits, labels=labels) * label_mask bow_loss = tf.reduce_sum(bow_loss, reduction_indices=1) self.avg_bow_loss = tf.reduce_mean(bow_loss) kld = gaussian_kld(recog_mu, recog_logvar, prior_mu, prior_logvar) self.avg_kld = tf.reduce_mean(kld) if log_dir is not None: kl_weights = tf.minimum( tf.to_float(self.global_t) / config.full_kl_step, 1.0) else: kl_weights = tf.constant(1.0) self.kl_w = kl_weights self.elbo = self.avg_rc_loss + kl_weights * self.avg_kld aug_elbo = self.elbo + self.avg_bow_loss + 0.1 * self.avg_per_select_loss + 0.05 * self.avg_per_pos_loss tf.summary.scalar("rc_loss", self.avg_rc_loss) tf.summary.scalar("elbo", self.elbo) tf.summary.scalar("kld", self.avg_kld) tf.summary.scalar("per_pos_loss", self.avg_per_pos_loss) self.summary_op = tf.summary.merge_all() self.log_p_z = norm_log_liklihood(latent_sample, prior_mu, prior_logvar) self.log_q_z_xy = norm_log_liklihood(latent_sample, recog_mu, recog_logvar) self.est_marginal = tf.reduce_mean(rc_loss + bow_loss - self.log_p_z + self.log_q_z_xy) self.optimize(sess, config, aug_elbo, log_dir) self.saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V2)
def lesser_equal(x, y): '''Element-wise truth value of (x <= y). Returns a bool tensor. ''' return tf.less_equal(x, y)
def main(): """Create the model and start the training.""" args = get_arguments() os.environ['CUDA_DEVIDE_ORDER'] = "PCI_BUS_ID" os.environ['CUDA_VISIBLE_DEVICES'] = args.GPU h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) # Create queue coordinator. coord = tf.train.Coordinator() # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) image_batch075 = tf.image.resize_images( image_batch, [int(h * 0.75), int(w * 0.75)]) image_batch05 = tf.image.resize_images( image_batch, [int(h * 0.5), int(w * 0.5)]) # Create network. with tf.variable_scope('', reuse=False): net = DeepLabResNetModel_50({'data': image_batch}, is_training=args.is_training, num_classes=args.num_classes) with tf.variable_scope('', reuse=True): net075 = DeepLabResNetModel_50({'data': image_batch075}, is_training=args.is_training, num_classes=args.num_classes) with tf.variable_scope('', reuse=True): net05 = DeepLabResNetModel_50({'data': image_batch05}, is_training=args.is_training, num_classes=args.num_classes) # For a small batch size, it is better to keep # the statistics of the BN layers (running means and variances) # frozen, and to not update the values provided by the pre-trained model. # If is_training=True, the statistics will be updated during the training. # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset) # if they are presented in var_list of the optimiser definition. # Predictions. raw_output100 = net.layers['fc1_voc12'] raw_output075 = net075.layers['fc1_voc12'] raw_output05 = net05.layers['fc1_voc12'] raw_output = tf.reduce_max(tf.stack([ raw_output100, tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3, ]), tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3, ]) ]), axis=0) # Which variables to load. Running means and variances are not trainable, # thus all_variables() should be restored. restore_var = [ v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last ] all_trainable = [ v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name ] fc_trainable = [v for v in all_trainable if 'fc' in v.name] conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) raw_prediction100 = tf.reshape(raw_output100, [-1, args.num_classes]) raw_prediction075 = tf.reshape(raw_output075, [-1, args.num_classes]) raw_prediction05 = tf.reshape(raw_output05, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] label_proc075 = prepare_label(label_batch, tf.stack(raw_output075.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) label_proc05 = prepare_label(label_batch, tf.stack(raw_output05.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) raw_gt = tf.reshape(label_proc, [ -1, ]) raw_gt075 = tf.reshape(label_proc075, [ -1, ]) raw_gt05 = tf.reshape(label_proc05, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) indices075 = tf.squeeze( tf.where(tf.less_equal(raw_gt075, args.num_classes - 1)), 1) indices05 = tf.squeeze( tf.where(tf.less_equal(raw_gt05, args.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32) gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32) prediction = tf.gather(raw_prediction, indices) prediction100 = tf.gather(raw_prediction100, indices) prediction075 = tf.gather(raw_prediction075, indices075) prediction05 = tf.gather(raw_prediction05, indices05) # Pixel-wise softmax loss. loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction100, labels=gt) loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction075, labels=gt075) loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction05, labels=gt05) l2_losses = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean( loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(loss05) + tf.add_n( l2_losses) tf.summary.scalar('loss', reduced_loss) # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. # Define loss and optimisation parameters. base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) opt_conv = tf.train.AdamOptimizer(learning_rate) opt_fc_w = tf.train.AdamOptimizer(learning_rate) opt_fc_b = tf.train.AdamOptimizer(learning_rate) # Define a variable to accumulate gradients. accum_grads = [ tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False) for v in conv_trainable + fc_w_trainable + fc_b_trainable ] # Define an operation to clear the accumulated gradients for next batch. zero_op = [v.assign(tf.zeros_like(v)) for v in accum_grads] # Compute gradients. grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) # Accumulate and normalise the gradients. accum_grads_op = [ accum_grads[i].assign_add(grad / args.grad_update_every) for i, grad in enumerate(grads) ] grads_conv = accum_grads[:len(conv_trainable)] grads_fc_w = accum_grads[len(conv_trainable):(len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = accum_grads[(len(conv_trainable) + len(fc_w_trainable)):] # Apply the gradients. train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) merged = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) # Load variables if the checkpoint is provided. if args.restore_from is not None: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, args.restore_from) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = {step_ph: step} loss_value = 0 # Clear the accumulated gradients. sess.run(zero_op, feed_dict=feed_dict) # Accumulate gradients. for i in range(args.grad_update_every): _, l_val = sess.run([accum_grads_op, reduced_loss], feed_dict=feed_dict) loss_value += l_val # Normalise the loss. loss_value /= args.grad_update_every # Apply gradients. if step % args.save_pred_every == 0: images, labels, summary, _ = sess.run( [image_batch, label_batch, merged, train_op], feed_dict=feed_dict) summary_writer.add_summary(summary, step) save(saver, sess, args.snapshot_dir, step) else: sess.run(train_op, feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) coord.request_stop() coord.join(threads)