def make_minibatch(self, valid_anchors): with tf.variable_scope('rpn_minibatch'): # in labels(shape is [N, ]): 1 is positive, 0 is negative, -1 is ignored labels, anchor_matched_gtboxes, object_mask = \ self.rpn_find_positive_negative_samples(valid_anchors) # [num_of_valid_anchors, ] positive_indices = tf.reshape(tf.where(tf.equal(labels, 1.0)), [-1]) # use labels is same as object_mask num_of_positives = tf.minimum(tf.shape(positive_indices)[0], tf.cast(self.rpn_mini_batch_size * self.rpn_positives_ratio, tf.int32)) # num of positives <= minibatch_size * 0.5 positive_indices = tf.random_shuffle(positive_indices) positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives]) # positive_anchors = tf.gather(self.anchors, positive_indices) negative_indices = tf.reshape(tf.where(tf.equal(labels, 0.0)), [-1]) num_of_negatives = tf.minimum(self.rpn_mini_batch_size - num_of_positives, tf.shape(negative_indices)[0]) negative_indices = tf.random_shuffle(negative_indices) negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives]) # negative_anchors = tf.gather(self.anchors, negative_indices) minibatch_indices = tf.concat([positive_indices, negative_indices], axis=0) minibatch_indices = tf.random_shuffle(minibatch_indices) minibatch_anchor_matched_gtboxes = tf.gather(anchor_matched_gtboxes, minibatch_indices) object_mask = tf.gather(object_mask, minibatch_indices) labels = tf.cast(tf.gather(labels, minibatch_indices), tf.int32) labels_one_hot = tf.one_hot(labels, depth=2) return minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, labels_one_hot
def __init__(self, tensors: List[tf.Tensor], cluster_indexes: tf.Tensor, n_splits, seed, train_sampling=1.0, test_sampling=1.0): size = tensors[0].shape[0].value self.seed = seed clustered_index = self.cluster_pages(cluster_indexes) index_len = tf.shape(clustered_index)[0] assert_op = tf.assert_equal(index_len, size, message='n_pages is not equals to size of clustered index') with tf.control_dependencies([assert_op]): split_nitems = int(round(size / n_splits)) split_size = [split_nitems] * n_splits split_size[-1] = size - (n_splits - 1) * split_nitems splits = tf.split(clustered_index, split_size) complements = [tf.random_shuffle(tf.concat(splits[:i] + splits[i + 1:], axis=0), seed) for i in range(n_splits)] splits = [tf.random_shuffle(split, seed) for split in splits] def mk_name(prefix, tensor): return prefix + '_' + tensor.name[:-2] def prepare_split(i): test_size = split_size[i] train_size = size - test_size test_sampled_size = int(round(test_size * test_sampling)) train_sampled_size = int(round(train_size * train_sampling)) test_idx = splits[i][:test_sampled_size] train_idx = complements[i][:train_sampled_size] test_set = [tf.gather(tensor, test_idx, name=mk_name('test', tensor)) for tensor in tensors] tran_set = [tf.gather(tensor, train_idx, name=mk_name('train', tensor)) for tensor in tensors] return Split(test_set, tran_set, test_sampled_size, train_sampled_size) self.splits = [prepare_split(i) for i in range(n_splits)]
def fast_rcnn_minibatch(self, reference_boxes): with tf.variable_scope('fast_rcnn_minibatch'): reference_boxes_mattached_gtboxes, object_mask, label = \ self.fast_rcnn_find_positive_negative_samples(reference_boxes) positive_indices = tf.reshape(tf.where(tf.not_equal(object_mask, 0.)), [-1]) num_of_positives = tf.minimum(tf.shape(positive_indices)[0], tf.cast(self.fast_rcnn_minibatch_size*self.fast_rcnn_positives_ratio, tf.int32)) positive_indices = tf.random_shuffle(positive_indices) positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives]) negative_indices = tf.reshape(tf.where(tf.equal(object_mask, 0.)), [-1]) num_of_negatives = tf.minimum(tf.shape(negative_indices)[0], self.fast_rcnn_minibatch_size - num_of_positives) negative_indices = tf.random_shuffle(negative_indices) negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives]) minibatch_indices = tf.concat([positive_indices, negative_indices], axis=0) minibatch_indices = tf.random_shuffle(minibatch_indices) minibatch_reference_boxes_mattached_gtboxes = tf.gather(reference_boxes_mattached_gtboxes, minibatch_indices) object_mask = tf.gather(object_mask, minibatch_indices) label = tf.gather(label, minibatch_indices) label_one_hot = tf.one_hot(label, self.num_classes + 1) return minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, object_mask, label_one_hot
def cifar_filename_queue(filename_list): # convert the list to a tensor string_tensor = tf.convert_to_tensor(filename_list, dtype=tf.string) # randomize the tensor tf.random_shuffle(string_tensor) # create the queue fq = tf.FIFOQueue(capacity=10, dtypes=tf.string) # create our enqueue_op for this q fq_enqueue_op = fq.enqueue_many([string_tensor]) # create a QueueRunner and add to queue runner list # we only need one thread for this simple queue tf.train.add_queue_runner(tf.train.QueueRunner(fq, [fq_enqueue_op] * 1)) return fq
def subsample_indicator(indicator, num_samples): """Subsample indicator vector. Given a boolean indicator vector with M elements set to `True`, the function assigns all but `num_samples` of these previously `True` elements to `False`. If `num_samples` is greater than M, the original indicator vector is returned. Args: indicator: a 1-dimensional boolean tensor indicating which elements are allowed to be sampled and which are not. num_samples: int32 scalar tensor Returns: a boolean tensor with the same shape as input (indicator) tensor """ indices = tf.where(indicator) indices = tf.random_shuffle(indices) indices = tf.reshape(indices, [-1]) num_samples = tf.minimum(tf.size(indices), num_samples) selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1])) selected_indicator = ops.indices_to_dense_vector(selected_indices, tf.shape(indicator)[0]) return tf.equal(selected_indicator, 1)
def get_random_scale(min_scale_factor, max_scale_factor, step_size): """Gets a random scale value. Args: min_scale_factor: Minimum scale value. max_scale_factor: Maximum scale value. step_size: The step size from minimum to maximum value. Returns: A random scale value selected between minimum and maximum value. Raises: ValueError: min_scale_factor has unexpected value. """ if min_scale_factor < 0 or min_scale_factor > max_scale_factor: raise ValueError('Unexpected value of min_scale_factor.') if min_scale_factor == max_scale_factor: return tf.cast(min_scale_factor, tf.float32) # When step_size = 0, we sample the value uniformly from [min, max). if step_size == 0: return tf.random_uniform([1], minval=min_scale_factor, maxval=max_scale_factor) # When step_size != 0, we randomly select one discrete value from [min, max]. num_steps = int((max_scale_factor - min_scale_factor) / step_size + 1) scale_factors = tf.lin_space(min_scale_factor, max_scale_factor, num_steps) shuffled_scale_factors = tf.random_shuffle(scale_factors) return shuffled_scale_factors[0]
def get_svtcn_indices(seq_len, batch_size, num_views): """Gets a random window of contiguous time indices from a sequence. Args: seq_len: Int, number of timesteps in the image sequence. batch_size: Int, size of the batch to construct. num_views: Int, the number of simultaneous viewpoints at each timestep in the dataset. Returns: time_indices: 1-D Int `Tensor` with size [batch_size], holding the timestep for each batch image. view_indices: 1-D Int `Tensor` with size [batch_size], holding the view for each batch image. This is consistent across the batch. """ # Get anchor, positive time indices. def f1(): # Choose a random contiguous range from within the sequence. range_min = tf.random_shuffle(tf.range(seq_len-batch_size))[0] range_max = range_min+batch_size return tf.range(range_min, range_max) def f2(): # Consider the full sequence. return tf.range(seq_len) time_indices = tf.cond(tf.greater(seq_len, batch_size), f1, f2) # Get opposing anchor, positive view indices. random_view = tf.random_shuffle(tf.range(num_views))[0] view_indices = tf.tile([random_view], (batch_size,)) return time_indices, view_indices
def _build_graph(self): """Construct tensorflow nodes for round of clustering""" # N.B. without tf.Variable, makes awesome glitchy clustered images self.centroids_in = tf.Variable(tf.slice(tf.random_shuffle(self.arr), [0, 0], [self.k, -1]), name="centroids_in") # tiled should be shape(self.n_pixels, self.k, size_data = 2 + self.channels) tiled_pix = tf.tile(tf.expand_dims(self.arr, 1), multiples=[1, self.k, 1], name="tiled_pix") # no need to take square root b/c positive reals and sqrt are isomorphic def radical_euclidean_dist(x, y): """Takes in 2 tensors and returns euclidean distance radical, i.e. dist**2""" with tf.name_scope("radical_euclidean"): return tf.square(tf.sub(x, y)) # should be shape(self.n_pixels, self.k) distances = tf.reduce_sum(radical_euclidean_dist(tiled_pix, self.centroids_in), reduction_indices=2, name="distances") # should be shape(self.n_pixels) nearest = tf.to_int32(tf.argmin(distances, 1), name="nearest") # should be list of len self.k with tensors of shape(size_cluster, size_data) self.clusters = tf.dynamic_partition(self.arr, nearest, self.k) # should be shape(self.k, size_data) self.centroids = tf.pack([tf.reduce_mean(cluster, 0) for cluster in self.clusters], name="centroids_out") self.update_roids = tf.assign(self.centroids_in, self.centroids)
def scheduled_sample_count(ground_truth_x, generated_x, batch_size, scheduled_sample_var): """Sample batch with specified mix of groundtruth and generated data points. Args: ground_truth_x: tensor of ground-truth data points. generated_x: tensor of generated data points. batch_size: batch size scheduled_sample_var: number of ground-truth examples to include in batch. Returns: New batch with num_ground_truth sampled from ground_truth_x and the rest from generated_x. """ num_ground_truth = scheduled_sample_var idx = tf.random_shuffle(tf.range(batch_size)) ground_truth_idx = tf.gather(idx, tf.range(num_ground_truth)) generated_idx = tf.gather(idx, tf.range(num_ground_truth, batch_size)) ground_truth_examps = tf.gather(ground_truth_x, ground_truth_idx) generated_examps = tf.gather(generated_x, generated_idx) output = tf.dynamic_stitch([ground_truth_idx, generated_idx], [ground_truth_examps, generated_examps]) # if batch size is known set it. if isinstance(batch_size, int): output.set_shape([batch_size] + common_layers.shape_list(output)[1:]) return output
def MiniminibatchLayer(name, n_in, dim_b, dim_c, group_size, inputs): inputs = tf.random_shuffle(inputs) inputs = tf.reshape(inputs, [-1, group_size, n_in]) def f(a,x): return MinibatchLayer(name, n_in, dim_b, dim_c, x) outputs = tf.scan(f, inputs) return tf.reshape(outputs, [-1, n_in+dim_b])
def _add_gtboxes_as_first_stage_proposals(self, first_stage_proposals, first_stage_scores, gtboxes): # 1. jitter gtboxes ws = gtboxes[:, 2] hs = gtboxes[:, 3] thetas = gtboxes[:, 4] hs_offset = (tf.random_normal(shape=tf.shape(hs)) - 0.5)*0.1*hs ws_offset = (tf.random_normal(shape=tf.shape(ws)) - 0.5)*0.1*ws thetas_offset = (tf.random_normal(shape=tf.shape(thetas)) - 0.5)*0.1*thetas hs = hs + hs_offset ws = ws + ws_offset thetas = thetas + thetas_offset new_boxes = tf.transpose(tf.stack([gtboxes[:, 0], gtboxes[:, 1], ws, hs, thetas], axis=0)) # 2. get needed added gtboxes num_needed_add = tf.minimum(tf.cast(cfgs.FAST_RCNN_MINIBATCH_SIZE*cfgs.FAST_RCNN_POSITIVE_RATE*0.5, tf.int32), tf.shape(gtboxes)[0]) added_boxes_indices = tf.random_shuffle(tf.range(start=0, limit=tf.shape(new_boxes)[0])) added_boxes_indices = tf.slice(added_boxes_indices, begin=[0], size=[num_needed_add]) added_boxes = tf.gather(new_boxes, added_boxes_indices) # 3. add them all_boxes = tf.concat([first_stage_proposals, added_boxes], axis=0) all_scores = tf.concat([first_stage_scores, tf.ones(shape=[tf.shape(added_boxes)[0]])*0.95], axis=0) return all_boxes, all_scores
def disable_some_fgs(): # We want to delete a randomly-selected subset of fg_inds of # size `fg_inds.shape[0] - max_fg`. # We shuffle along the dimension 0 and then we get the first # num_fg_inds - max_fg indices and we disable them. shuffled_inds = tf.random_shuffle(fg_inds, seed=self._seed) disable_place = (tf.shape(fg_inds)[0] - max_fg) # This function should never run if num_fg_inds <= max_fg, so we # add an assertion to catch the wrong behaviour if it happens. integrity_assertion = tf.assert_positive( disable_place, message="disable_place in disable_some_fgs is negative." ) with tf.control_dependencies([integrity_assertion]): disable_inds = shuffled_inds[:disable_place] is_disabled = tf.sparse_to_dense( sparse_indices=disable_inds, sparse_values=True, default_value=False, output_shape=tf.cast(proposals_label_shape, tf.int64), # We are shuffling the indices, so they may not be ordered. validate_indices=False ) return tf.where( condition=is_disabled, # We set it to -label for debugging purposes. x=tf.negative(proposals_label), y=proposals_label )
def generate_one(d): seed = stream() fn = lambda _: tf.random_shuffle(tf.range(d), seed=seed) return tf.map_fn( fn, sample_range, parallel_iterations=1 if seed is not None else 10)
def __init__(self, config): paths, meta = Input._collect(config.path) self.dimension_count = meta['dimension_count'] self.sample_count = meta['sample_count'] self.batch_size = config.get('batch_size', 1) if self.sample_count % self.batch_size > 0: raise Exception( ('expected the number of samples ({}) to be ' + 'divisible by the batch size ({})').format(self.sample_count, self.batch_size)) with tf.variable_scope('state'): self.state = State() with tf.variable_scope('source'): paths = tf.Variable(paths, name='paths', dtype=tf.string, trainable=False) queue = tf.FIFOQueue(meta['path_count'], [tf.string]) enqueue = queue.enqueue_many([tf.random_shuffle(paths)]) tf.train.add_queue_runner(tf.train.QueueRunner(queue, [enqueue])) _, record = tf.TFRecordReader().read(queue) with tf.variable_scope('x'): features = tf.parse_single_example(record, { 'data': tf.VarLenFeature(tf.float32), }) data = tf.sparse_tensor_to_dense(features['data']) if self.batch_size == 1: self.x = tf.reshape(data, [1, -1, self.dimension_count]) else: x = tf.reshape(data, [-1, self.dimension_count]) _, outputs = tf.contrib.training.bucket_by_sequence_length( tf.shape(x)[0], [x], self.batch_size, config.buckets, dynamic_pad=True) self.x = outputs[0] with tf.variable_scope('y'): self.y = tf.pad(self.x[:, 1:, :], [[0, 0], [0, 1], [0, 0]])
def build(self, input_shape): input_dim = input_shape[1] #Per tree N_DECISION = (2 ** (self.n_depth)) - 1 # Number of decision nodes N_LEAF = 2 ** (self.n_depth + 1) # Number of leaf nodes if self.randomize_training: #Construct a mask that lets N trees get trained per minibatch train_mask = np.zeros(self.n_trees, dtype=np.float32) for i in xrange(self.randomize_training): train_mask[i] = 1 self.random_mask = tf.random_shuffle(tf.constant(train_mask)) self.w_d_ensemble = [] self.w_l_ensemble = [] self.trainable_weights = [] for i in xrange(self.n_trees): decision_weights = self.d_init((input_dim, N_DECISION), name=self.name+"_tree"+i+"_dW") leaf_distributions = self.l_init((N_LEAF, self.output_classes), name=self.name+"_tree"+i+"_lW") self.trainable_weights.append(decision_weights) self.trainable_weights.append(leaf_distributions) if self.randomize_training: do_gradient = self.random_mask[i] no_gradient = 1 - do_gradient #This should always allow inference, but block gradient flow when do_gradient = 0 decision_weights = do_gradient * decision_weights + no_gradient * tf.stop_gradient(decision_weights) leaf_distributions = do_gradient * leaf_distributions + no_gradient * tf.stop_gradient(leaf_distributions) self.w_d_ensemble.append(decision_weights) self.w_l_ensemble.append(leaf_distributions)
def PreDiscriminator(inputs): outputs = [] for n_rows in [784]: output = tf.reshape(inputs, [-1, n_rows, 1]) output = tf.gather(output, tf.random_shuffle(tf.range((784/n_rows)*BATCH_SIZE))[:BATCH_SIZE]) output = lib.ops.gru.GRU('Discriminator.GRU_{}'.format(1), 1, 256, output) outputs.append(output) return outputs
def generate_one(d): seed[0] = distributions_util.gen_new_seed( seed[0], salt='mcmc_sample_halton_sequence_4') fn = lambda _: tf.random_shuffle(tf.range(d), seed=seed[0]) return tf.map_fn( fn, sample_range, parallel_iterations=1 if seed[0] is not None else 10)
def sample_fg_bg(iou): fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH fg_inds = tf.reshape(tf.where(fg_mask), [-1]) num_fg = tf.minimum(int( cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO), tf.size(fg_inds), name='num_fg') fg_inds = tf.random_shuffle(fg_inds)[:num_fg] bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1]) num_bg = tf.minimum( cfg.FRCNN.BATCH_PER_IM - num_fg, tf.size(bg_inds), name='num_bg') bg_inds = tf.random_shuffle(bg_inds)[:num_bg] add_moving_summary(num_fg, num_bg) return fg_inds, bg_inds
def get_random_init(sess, batch): # returns (pseudo) random parameters for the algorithm points = tf.placeholder(tf.float32, shape=[n, p, 1]) pi_init = sess.run(tf.random_uniform([1, k]), feed_dict={points: batch}) mu_init = sess.run(tf.slice(tf.random_shuffle(points), [0, 0, 0], [k, p, 1]), feed_dict={points: batch}) seed = tf.random_uniform([k, p, p]) sigma_init = sess.run(tf.batch_matmul(seed, tf.transpose(seed, [0, 2, 1])), feed_dict={points: batch}) return pi_init, mu_init, sigma_init
def get_sparse_subset_tensor(shape, subset_sizes, initializer=tf.random_normal_initializer( stddev=0.15)): """Gets the required bits and pieces for a sparse tensor bilinear product with random subsets of the inputs (as opposed to a totally randomly sparse tensor, this will have a kind of rectangular structure). Args: shape: the shape of the tensor. We can only make this work for 3-tensors so len(shape) should be 3. subset_sizes: the number of random elements to take from each of the inputs. Should be a sequence of length 2. initializer: the initialiser to use for the elements of the tensor. Returns: (tensor, idcs_a, idcs_b): all that we need to do the bilinear product later -- the tensor elements as a dense matrix and the indices representing the indices into the input vectors. """ # first let's make sure the inputs make sense if len(shape) != 3: raise ValueError( 'Can do this with 3-way tensors, got shape {}'.format(shape)) if len(subset_sizes) != 2: raise ValueError('subset_sizes needs to be of length two') if subset_sizes[0] > shape[0]: raise ValueError('first subset size greater than specified dimension: ' '{} > {}'.format(subset_sizes[0], shape[0])) if subset_sizes[2] > shape[1]: raise ValueError( 'second subset size greater than specified dimension: ' '{} > {}'.format(subset_sizes[2], shape[1])) with tf.name_scope(name): # now we need to make some random indices # potentially kinda gross for a little while a_idcs = tf.stack([tf.random_shuffle(tf.range(shape[0])) for _ in range(shape[1])]) b_idcs = tf.stack([tf.random_shuffle(tf.range(shape[2])) for _ in range(shape[1])]) # if we eval these they will be random every time # so we use them as the initialiser to a new variable a_idcs = tf.get_variable('a_idcs', initializer=a_idcs[:, :subset_sizes[0]]) b_idcs = tf.get_variable('b_idcs', initializer=b_idcs[:, :subset_sizes[1]])
def init_k_means(samples, num_clusters, num_samples_per_cluster): sample_indices = tf.random_shuffle(tf.range(0,num_samples_per_cluster*num_clusters)) #Start by choosing num_cluster random points start = [0,] size = [num_clusters,] size[0] = num_clusters centroids = tf.slice(sample_indices, start, size) chosen_centroids = tf.gather(samples, centroids) return chosen_centroids
def prepare_split(i): idx = tf.random_shuffle(tf.range(0, n_pages, dtype=tf.int32), seed + i) train_tensors = [tf.gather(tensor, idx, name=mk_name('shfl', tensor)) for tensor in tensors] if test_sampling < 1.0: sampled_idx = idx[:n_pages] test_tensors = [tf.gather(tensor, sampled_idx, name=mk_name('shfl_test', tensor)) for tensor in tensors] else: test_tensors = train_tensors return Split(test_tensors, train_tensors, n_pages, total_pages)
def get_ensemble_idx_info(self): if self.bayesian_config is not False: ensemble_idxs = tf.random_shuffle(tf.range(self.transition_predictor.ensemble_size)) transition_ensemble_sample_n = self.transition_predictor.eval_sample_count reward_ensemble_sample_n = self.reward_predictor.eval_sample_count ensemble_idxs = ensemble_idxs[:transition_ensemble_sample_n] return ensemble_idxs, transition_ensemble_sample_n, reward_ensemble_sample_n else: return None, 1, 1
def choose_random_centroids(samples, n_clusters): n_samples = tf.shape(samples)[0] random_indices = tf.random_shuffle(tf.range(0, n_samples)) begin = [0,] size = [n_clusters,] size[0] = n_clusters centroid_indices = tf.slice(random_indices, begin, size) initial_centroids = tf.gather(samples, centroid_indices) return initial_centroids
def __call__(self, batch_size, **kwargs): """Sample a batch of context. Args: batch_size: Batch size. Returns: Two [batch_size, num_context_dims] tensors. """ spec = self._context_spec context_range = self._context_range if isinstance(context_range[0], (int, float)): contexts = tf.random_uniform( shape=[ batch_size, ] + spec.shape.as_list(), minval=context_range[0], maxval=context_range[1], dtype=spec.dtype) elif isinstance(context_range[0], (list, tuple, np.ndarray)): assert len(spec.shape.as_list()) == 1 assert spec.shape.as_list()[0] == len(context_range[0]) assert spec.shape.as_list()[0] == len(context_range[1]) contexts = tf.concat( [ tf.random_uniform( shape=[ batch_size, 1, ] + spec.shape.as_list()[1:], minval=context_range[0][i], maxval=context_range[1][i], dtype=spec.dtype) for i in range(spec.shape.as_list()[0]) ], axis=1) else: raise NotImplementedError(context_range) self._validate_contexts(contexts) if 'sampler_fn' in kwargs: other_contexts = kwargs['sampler_fn']() else: other_contexts = contexts state, next_state = kwargs['state'], kwargs['next_state'] if state is not None and next_state is not None: my_context_range = (np.array(context_range[1]) - np.array(context_range[0])) / 2 * np.ones(spec.shape.as_list()) contexts = tf.concat( [0.1 * my_context_range[:self._k] * tf.random_normal(tf.shape(state[:, :self._k]), dtype=state.dtype) + tf.random_shuffle(state[:, :self._k]) - state[:, :self._k], other_contexts[:, self._k:]], 1) #contexts = tf.Print(contexts, # [contexts, tf.reduce_max(contexts, 0), # tf.reduce_min(state, 0), tf.reduce_max(state, 0)], 'contexts', summarize=15) next_contexts = tf.concat( #LALA [state[:, :self._k] + contexts[:, :self._k] - next_state[:, :self._k], other_contexts[:, self._k:]], 1) next_contexts = contexts #LALA cosine else: next_contexts = contexts return tf.stop_gradient(contexts), tf.stop_gradient(next_contexts)
def tf_fastfood_transform(in_x, dd, DD, use_get=False, use_C=False): '''Transform from d to D. Pads as necessary. For now: assume dd and DD are known in python.''' # Tensor d and D #assert_D_big = tf.assert_greater_equal(DD, dd, message='d cannot be larger than D') #with tf.control_dependencies([assert_D_big]): # ll = tf.cast(tf.round(tf.log(tf.to_float(DD)) / np.log(2)), 'int32') # LL = tf.pow(2, ll) # Python d and D assert isinstance(dd, int), 'd should be int' assert isinstance(DD, int), 'D should be int' assert DD >= dd, 'd cannot be larger than D' assert dd > 0, 'd and D must be positive' ll = int(np.ceil(np.log(DD) / np.log(2))) LL = 2 ** ll # Make vars init_BB = tf.to_float(tf.random_uniform((LL,), 0, 2, dtype='int32')) * 2 - 1 init_Pi = tf.random_shuffle(tf.range(LL)) init_GG = tf.random_normal((LL,)) init_divisor = lambda GG: tf.sqrt(LL * tf.reduce_sum(tf.pow(GG.initialized_value(), 2))) if use_get: BB = tf.get_variable('B', initializer=init_BB, trainable=False) Pi = tf.get_variable('Pi', initializer=init_Pi, trainable=False) GG = tf.get_variable('G', initializer=init_GG, trainable=False) divisor = tf.get_variable('divisor', initializer=init_divisor(GG), trainable=False) else: BB = tf.Variable(init_BB, name='B', trainable=False) Pi = tf.Variable(init_Pi, name='Pi', trainable=False) GG = tf.Variable(init_GG, name='G', trainable=False) divisor = tf.Variable(init_divisor(GG), name='divisor', trainable=False) fastfood_vars = [BB, Pi, GG, divisor] # Implement transform dd_pad = tf.pad(in_x, [[0, LL - dd]]) mul_1 = tf.multiply(BB, dd_pad) if use_C: mul_2 = tf_fast_walsh_hadamard(mul_1, 0, method='c', normalize=True) else: mul_2 = tf_fast_walsh_hadamard(mul_1, 0, method='two', normalize=False) mul_3 = tf.gather(mul_2, Pi) mul_4 = tf.multiply(mul_3, GG) if use_C: mul_5 = tf_fast_walsh_hadamard(mul_4, 0, method='c', normalize=True) print '\nWARNING: check normalization on this next line more carefully\n' ret = tf.divide(tf.slice(mul_5, [0], [DD]), divisor * np.sqrt(float(DD) / LL / ll)) else: mul_5 = tf_fast_walsh_hadamard(mul_4, 0, method='two', normalize=False) ret = tf.divide(tf.slice(mul_5, [0], [DD]), divisor * np.sqrt(float(DD) / LL)) return fastfood_vars, ret
def choose_random_centroids(samples, n_clusters, seed): # Step 0: Initialisation: Select `n_clusters` number of random points n_samples = tf.shape(samples)[0] random_indices = tf.random_shuffle(tf.range(0, n_samples), seed=seed) begin = [0,] size = [n_clusters,] # size[0] = n_clusters centroid_indices = tf.slice(random_indices, begin, size) initial_centroids = tf.gather(samples, centroid_indices) return initial_centroids
def dequeue_batch(): with tf.name_scope("sample_n_per_batch/dequeue/"): entries = [] for q in queues: entries.append(q.dequeue_many(samples_per_class)) flat_batch = [tf.concat(x, 0) for x in zip(*entries)] idx = tf.random_shuffle(tf.range(batch_size)) flat_batch = [tf.gather(f, idx, axis=0) for f in flat_batch] return nest.pack_sequence_as(batch, flat_batch)
def random_column(columns): """Zeros out all except one of `columns`. Used for rounds with global drop path. Args: columns: the columns of a fractal block to be selected from. """ num_columns = tensor_shape(columns)[0] mask = tf.random_shuffle([True]+[False]*(num_columns-1)) return apply_mask(mask, columns)* num_columns
def preprocess_for_train(image, output_height, output_width, mean_vals, out_dim_scale=1.0): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. Returns: A preprocessed image. """ num_channels = image.get_shape().as_list()[-1] image = tf.image.resize_images(image, [_RESIZE_HT, _RESIZE_WD]) # compute the crop size base_size = float(min(_RESIZE_HT, _RESIZE_WD)) scale_ratio_h = tf.random_shuffle(tf.constant(_SCALE_RATIOS))[0] scale_ratio_w = tf.random_shuffle(tf.constant(_SCALE_RATIOS))[0] image = _random_crop([image], tf.cast(output_height * scale_ratio_h, tf.int32), tf.cast(output_width * scale_ratio_w, tf.int32))[0] image = tf.image.resize_images( image, [int(output_height * out_dim_scale), int(output_width * out_dim_scale)]) image = tf.to_float(image) image = tf.image.random_flip_left_right(image) image.set_shape([int(output_height * out_dim_scale), int(output_width * out_dim_scale), num_channels]) image = _mean_image_subtraction(image, mean_vals) image = tf.expand_dims(image, 0) # 1x... image, to be consistent with eval # Gets logged multiple times with NetVLAD, so gives an error. # I'm anyway logging from the train code, so removing it here. # tf.image_summary('final_distorted_image', # tf.expand_dims(image / 128.0, 0)) return image
# -*- coding:utf-8 -*- # @Time :2018/9/23 下午3:43 # @Author :Coast Cao import tensorflow as tf # Create a tensor of shape [2, 3] consisting of random normal values, with mean # -1 and standard deviation 4. norm = tf.random_normal([2, 3], mean=-1, stddev=4) # Shuffle the first dimension of a tensor c = tf.constant([[1, 2], [3, 4], [5, 6]]) shuff = tf.random_shuffle(c) # Each time we run these ops, different results are generated sess = tf.Session() print(sess.run(norm)) print(sess.run(norm)) # Set an op-level seed to generate repeatable sequences across sessions. norm = tf.random_normal([2, 3], seed=1234) sess = tf.Session() print(sess.run(norm)) print(sess.run(norm)) sess = tf.Session() print(sess.run(norm)) print(sess.run(norm))
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): num_gpu = len(cfgs.GPU_GROUP.strip().split(',')) global_step = slim.get_or_create_global_step() lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu) tf.summary.scalar('lr', lr) with tf.name_scope('get_batch'): if cfgs.IMAGE_PYRAMID: shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN) shortside_len = tf.random_shuffle(shortside_len_list)[0] else: shortside_len = cfgs.IMG_SHORT_SIDE_LEN img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, batch_size=cfgs.BATCH_SIZE * num_gpu, shortside_len=shortside_len, is_training=True) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) r3det = build_whole_network_r3det_efficientnet.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) # data processing inputs_list = [] for i in range(num_gpu): img = tf.expand_dims(img_batch[i], axis=0) if cfgs.NET_NAME in [ 'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d' ]: img = img / tf.constant([cfgs.PIXEL_STD]) gtboxes_and_label_r = tf.py_func(backward_convert, inp=[gtboxes_and_label_batch[i]], Tout=tf.float32) gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6]) gtboxes_and_label_h = get_horizen_minAreaRectangle( gtboxes_and_label_batch[i]) gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5]) num_objects = num_objects_batch[i] num_objects = tf.cast(tf.reshape(num_objects, [ -1, ]), tf.float32) img_h = img_h_batch[i] img_w = img_w_batch[i] inputs_list.append([ img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects, img_h, img_w ]) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer( cfgs.WEIGHT_DECAY) total_loss_dict = { 'cls_loss': tf.constant(0., tf.float32), 'reg_loss': tf.constant(0., tf.float32), 'refine_cls_loss': tf.constant(0., tf.float32), 'refine_reg_loss': tf.constant(0., tf.float32), 'refine_cls_loss_stage3': tf.constant(0., tf.float32), 'refine_reg_loss_stage3': tf.constant(0., tf.float32), 'total_losses': tf.constant(0., tf.float32), } if cfgs.USE_SUPERVISED_MASK: total_loss_dict['mask_loss'] = tf.constant(0., tf.float32) with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope( [ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer( 0.0)): gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func( get_gtboxes_and_label, inp=[ inputs_list[i][1], inputs_list[i][2], inputs_list[i][3] ], Tout=[tf.float32, tf.float32]) gtboxes_and_label_h = tf.reshape( gtboxes_and_label_h, [-1, 5]) gtboxes_and_label_r = tf.reshape( gtboxes_and_label_r, [-1, 6]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] img = tf.image.crop_to_bounding_box( image=img, offset_height=0, offset_width=0, target_height=tf.cast( img_shape[0], tf.int32), target_width=tf.cast( img_shape[1], tf.int32)) outputs = r3det.build_whole_detection_network( input_img_batch=img, gtboxes_batch_h=gtboxes_and_label_h, gtboxes_batch_r=gtboxes_and_label_r, gpu_id=i) gtboxes_in_img_h = draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_h[:, :-1], labels=gtboxes_and_label_h[:, -1], method=0) gtboxes_in_img_r = draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_r[:, :-1], labels=gtboxes_and_label_r[:, -1], method=1) tf.summary.image( 'Compare/gtboxes_h_gpu:%d' % i, gtboxes_in_img_h) tf.summary.image( 'Compare/gtboxes_r_gpu:%d' % i, gtboxes_in_img_r) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = draw_boxes_with_categories_and_scores( img_batch=img, boxes=outputs[0], scores=outputs[1], labels=outputs[2], method=1) tf.summary.image( 'Compare/final_detection_gpu:%d' % i, detections_in_img) loss_dict = outputs[-1] total_losses = 0.0 for k in loss_dict.keys(): total_losses += loss_dict[k] total_loss_dict[ k] += loss_dict[k] / num_gpu total_losses = total_losses / num_gpu total_loss_dict['total_losses'] += total_losses if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n( regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) if cfgs.GRADIENT_CLIPPING_BY_NORM is not None: grads = slim.learning.clip_gradient_norms( grads, cfgs.GRADIENT_CLIPPING_BY_NORM) tower_grads.append(grads) for k in total_loss_dict.keys(): tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k]) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] if cfgs.MUTILPY_BIAS_GRADIENT is not None: final_gvs = [] with tf.variable_scope('Gradient_Mult'): for grad, var in grads: scale = 1. if '/biases:' in var.name: scale *= cfgs.MUTILPY_BIAS_GRADIENT if 'conv_new' in var.name: scale *= 3. if not np.allclose(scale, 1.0): grad = tf.multiply(grad, scale) final_gvs.append((grad, var)) apply_gradient_op = optimizer.apply_gradients( final_gvs, global_step=global_step) else: apply_gradient_op = optimizer.apply_gradients( grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( 0.9999, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) train_op = tf.group(apply_gradient_op, variables_averages_op) # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step) summary_op = tf.summary.merge_all() restorer, restore_ckpt = r3det.get_restorer() saver = tf.train.Saver(max_to_keep=5) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) tfconfig = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tfconfig.gpu_options.allow_growth = True with tf.Session(config=tfconfig) as sess: sess.run(init_op) # sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for step in range(cfgs.MAX_ITERATION // num_gpu): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, total_loss_dict_ = \ sess.run([train_op, global_step, total_loss_dict]) end = time.time() print('***' * 20) print("""%s: global_step:%d current_step:%d""" % (training_time, (global_stepnp - 1) * num_gpu, step * num_gpu)) print("""per_cost_time:%.3fs""" % ((end - start) / num_gpu)) loss_str = '' for k in total_loss_dict_.keys(): loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k]) print(loss_str) if np.isnan(total_loss_dict_['total_losses']): sys.exit(0) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary( summary_str, (global_stepnp - 1) * num_gpu) summary_writer.flush() if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_gpu) == 0) or (step >= cfgs.MAX_ITERATION // num_gpu - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join( save_dir, '{}_'.format(cfgs.DATASET_NAME) + str( (global_stepnp - 1) * num_gpu) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def rpn_targets_graph(gt_boxes, gt_cls, anchors, anchors_tag, rpn_train_anchors=None): """ 处理单个图像的rpn分类和回归目标 a)正样本为 IoU>0.7的anchor;负样本为IoU<0.3的anchor; 居中的为中性样本,丢弃 b)需要保证所有的GT都有anchor对应,即使IoU<0.3; c)正负样本比例保持1:1 :param gt_boxes: GT 边框坐标 [MAX_GT_BOXs, (y1,x1,y2,x2,tag)] ,tag=0 为padding :param gt_cls: GT 类别 [MAX_GT_BOXs, 1+1] ;最后一位为tag, tag=0 为padding :param anchors: [anchor_num, (y1,x1,y2,x2)] :param anchors_tag:[anchor_num] bool类型 :param rpn_train_anchors: 训练样本数(256) :return: deltas:[rpn_train_anchors,(dy,dx,dh,dw,tag)]:anchor边框回归目标,tag=1 为正样本,tag=0为padding,tag=-1为负样本 class_ids:[rpn_train_anchors,1+1]: anchor边框分类,tag=1 为正样本,tag=0为padding,tag=-1为负样本 indices:[rpn_train_anchors,(indices,tag)]: tag=1 为正样本,tag=0为padding,tag=-1为负样本 """ # 获取真正的GT,去除标签位 gt_boxes = tf_utils.remove_pad(gt_boxes) gt_cls = tf_utils.remove_pad(gt_cls)[:, 0] # [N,1]转[N] # 获取有效的anchors valid_anchor_indices = tf.where(anchors_tag)[:, 0] # [valid_anchors_num] anchors = tf.gather(anchors, valid_anchor_indices) # 计算IoU iou = compute_iou(gt_boxes, anchors) # print("iou:{}".format(iou)) # 每个GT对应的IoU最大的anchor是正样本 gt_iou_argmax = tf.argmax(iou, axis=1) positive_gt_indices_1 = tf.range(tf.shape(gt_boxes)[0]) # 索引号就是1..n-1 positive_anchor_indices_1 = gt_iou_argmax # 每个anchors最大iou ,且iou>0.7的为正样本 anchors_iou_max = tf.reduce_max(iou, axis=0) # 正样本索引号(iou>0.7), positive_anchor_indices_2 = tf.where( anchors_iou_max > 0.7, name='rpn_target_positive_indices') # [:, 0] # 找到正样本对应的GT boxes 索引 # anchors_iou_argmax = tf.argmax(iou, axis=0) # 每个anchor最大iou对应的GT 索引 [n] anchors_iou_argmax = tf.cond( # 需要考虑GT个数为0的情况 tf.greater(tf.shape(gt_boxes)[0], 0), true_fn=lambda: tf.argmax(iou, axis=0), false_fn=lambda: tf.cast(tf.constant([]), tf.int64)) positive_gt_indices_2 = tf.gather_nd(anchors_iou_argmax, positive_anchor_indices_2) # 合并两部分正样本 positive_gt_indices = tf.concat( [positive_gt_indices_1, tf.cast(positive_gt_indices_2, tf.int32)], axis=0, name='rpn_gt_boxes_concat') positive_anchor_indices = tf.concat( [positive_anchor_indices_1, positive_anchor_indices_2[:, 0]], axis=0, name='rpn_positive_anchors_concat') # 根据正负样本比1:1,确定最终的正样本 positive_num = tf.minimum( tf.shape(positive_anchor_indices)[0], int(rpn_train_anchors * 0.9)) positive_anchor_indices, positive_gt_indices = shuffle_sample( [positive_anchor_indices, positive_gt_indices], tf.shape(positive_anchor_indices)[0], positive_num) # 根据索引选择anchor和GT positive_anchors = tf.gather(anchors, positive_anchor_indices) positive_gt_boxes = tf.gather(gt_boxes, positive_gt_indices) positive_gt_cls = tf.gather(gt_cls, positive_gt_indices) # 回归目标计算 deltas = regress_target(positive_anchors, positive_gt_boxes) # 处理负样本 negative_indices = tf.where(anchors_iou_max < 0.3, name='rpn_target_negative_indices') # [:, 0] # 负样本,保证负样本不超过一半 negative_num = tf.minimum(rpn_train_anchors - positive_num, tf.shape(negative_indices)[0], name='rpn_target_negative_num') # negative_num = tf.minimum(int(rpn_train_anchors * 0.5), negative_num, name='rpn_target_negative_num_2') negative_indices = tf.random_shuffle(negative_indices)[:negative_num] negative_gt_cls = tf.zeros([negative_num]) # 负样本类别id为0 negative_deltas = tf.zeros([negative_num, 4]) # 合并正负样本 deltas = tf.concat([deltas, negative_deltas], axis=0, name='rpn_target_deltas') class_ids = tf.concat([positive_gt_cls, negative_gt_cls], axis=0, name='rpn_target_class_ids') indices = tf.concat([positive_anchor_indices, negative_indices[:, 0]], axis=0, name='rpn_train_anchor_indices') # indices转换会原始的anchors索引 indices = tf.gather(valid_anchor_indices, indices, name='map_to_origin_anchor_indices') # 计算padding deltas, class_ids = tf_utils.pad_list_to_fixed_size( [deltas, tf.expand_dims(class_ids, 1)], rpn_train_anchors) # 将负样本tag标志改为-1;方便后续处理; indices = tf_utils.pad_to_fixed_size_with_negative( tf.expand_dims(indices, 1), rpn_train_anchors, negative_num=negative_num, data_type=tf.int64) # 其它统计指标 gt_num = tf.shape(gt_cls)[0] # GT数 miss_match_gt_num = gt_num - tf.shape( tf.unique(positive_gt_indices)[0])[0] # 未分配anchor的GT rpn_gt_min_max_iou = tf.reduce_min(tf.reduce_max( iou, axis=1)) # GT匹配anchor最小的IoU return [ deltas, class_ids, indices, tf_utils.scalar_to_1d_tensor(gt_num), tf_utils.scalar_to_1d_tensor(positive_num), tf_utils.scalar_to_1d_tensor(negative_num), tf_utils.scalar_to_1d_tensor(miss_match_gt_num), tf_utils.scalar_to_1d_tensor(rpn_gt_min_max_iou) ]
def label_downsampler(n_labels, true_labels, n_extra_labels): lbls = tf.random_shuffle(tf.range(n_labels))[:n_extra_labels] lbls = tf.unique(tf.concat([lbls, true_labels], axis=0)).y return lambda logits: tf.gather(logits, lbls, axis=-1)
points_x.append(np.random.normal(0.0, 0.6)) points_y.append(np.random.normal(0.0, 0.6)) elif np.random.random() > 0.33333: points_x.append(np.random.normal(2.0, 0.57)) points_y.append(np.random.normal(3.0, 0.6)) else: points_x.append(np.random.normal(2.0, 0.33)) points_y.append(np.random.normal(1.0, 0.2)) # 1000개의 포인트를 numpy array로 생성 points = np.array(np.transpose([points_x, points_y])) # numpy array를 tensor로 변환 vectors = tf.constant(points) centroides = tf.Variable( tf.slice(tf.random_shuffle(vectors), [0, 0], [num_clusters, -1])) # 중심점 초기화 # 텐서 차원 확장 expanded_vectors = tf.expand_dims(vectors, 0) expanded_centroides = tf.expand_dims(centroides, 1) # 거리 계산 diff = tf.subtract(expanded_vectors, expanded_centroides) distance = tf.reduce_sum(tf.square(diff), 2) assignments = tf.argmin(distance, 0) # 중심점 업데이트 means = tf.concat([ tf.reduce_mean(tf.gather( vectors, tf.reshape(tf.where(tf.equal(assignments, c)), [1, -1])),
def tensorflow_categorical(count, seed): assert count > 0 arr = [1.] + [.0 for _ in range(count - 1)] return tf.random_shuffle(arr, seed)
def postproc_flag( self, images, NOW_SIZE1=256, NOW_SIZE2=256, seed_random=0, curr_batch_size=None, with_noise=0, noise_level=10, with_flip=0, is_normal=0, eliprep=0, thprep=0, sub_mean=0, mean_path=None, color_norm=0, size_vary_prep=0, with_color_noise=0, shape_undefined=0, size_minval=0.08, sm_full_size=0, # sm_add ): if curr_batch_size == None: curr_batch_size = self.batch_size orig_dtype = images.dtype norm = tf.cast(images, tf.float32) if eliprep == 1: def prep_each(norm_): _RESIZE_SIDE_MIN = 256 _RESIZE_SIDE_MAX = 512 if self.group == 'train': im = preprocess_image(norm_, self.crop_size, self.crop_size, is_training=True, resize_side_min=_RESIZE_SIDE_MIN, resize_side_max=_RESIZE_SIDE_MAX) else: im = preprocess_image(norm_, self.crop_size, self.crop_size, is_training=False, resize_side_min=_RESIZE_SIDE_MIN, resize_side_max=_RESIZE_SIDE_MAX) return im crop_images = tf.map_fn(prep_each, norm) elif thprep == 1: def prep_each(norm_): im = preprocessing_th(norm_, self.crop_size, self.crop_size, is_training=self.group == 'train', seed_random=seed_random) return im crop_images = prep_each(images) crop_images = tf.expand_dims(crop_images, axis=0) elif sm_full_size == 1: if with_color_noise == 1 and self.group == 'train': order_temp = tf.constant([0, 1, 2], dtype=tf.int32) order_rand = tf.random_shuffle(order_temp, seed=seed_random) fn_pred_fn_pairs = lambda x, image: [ (tf.equal(x, order_temp[0]), lambda: tf.image. random_saturation(image, 0.6, 1.4, seed=seed_random)), (tf.equal(x, order_temp[1]), lambda: tf.image. random_brightness(image, 0.4, seed=seed_random)), ] default_fn = lambda image: tf.image.random_contrast( image, 0.6, 1.4, seed=seed_random) def _color_jitter_one(_norm): orig_shape = _norm.get_shape().as_list() _norm = tf.case(fn_pred_fn_pairs(order_rand[0], _norm), default=lambda: default_fn(_norm)) _norm = tf.case(fn_pred_fn_pairs(order_rand[1], _norm), default=lambda: default_fn(_norm)) _norm = tf.case(fn_pred_fn_pairs(order_rand[2], _norm), default=lambda: default_fn(_norm)) _norm.set_shape(orig_shape) return _norm norm = tf.map_fn(_color_jitter_one, norm) if sub_mean == 1: IMAGENET_MEAN = tf.constant(np.load(mean_path).swapaxes( 0, 1).swapaxes(1, 2)[:, :, ::-1], dtype=tf.float32) orig_dtype = tf.float32 norm = norm - IMAGENET_MEAN if self.group == 'train': if self.withflip == 1 or with_flip == 1: def _postprocess_flip(im): # Original way of flipping, changing to random_uniform way to be more controllable # im = tf.image.random_flip_left_right(im, seed = seed_random) # return im do_flip = tf.random_uniform(shape=[1], minval=0, maxval=1, dtype=tf.float32, seed=seed_random) def __left_right_flip(im): flipped = tf.image.flip_left_right(im) if is_normal == 1: # flipped = 256 - flipped flipped_x, flipped_y, flipped_z = tf.unstack( flipped, axis=2) flipped = tf.stack( [256 - flipped_x, flipped_y, flipped_z], axis=2) return flipped return tf.cond(tf.less(do_flip[0], 0.5), fn1=lambda: __left_right_flip(im), fn2=lambda: im) norm = tf.map_fn(_postprocess_flip, norm, dtype=norm.dtype) if with_noise == 1: def _postprocess_noise(im): do_noise = tf.random_uniform(shape=[1], minval=0, maxval=1, dtype=tf.float32, seed=None) def __add_noise(im): curr_level = tf.random_uniform(shape=[1], minval=0, maxval=noise_level, dtype=tf.float32, seed=None) curr_noise = tf.random_normal(shape=tf.shape(im), mean=0.0, stddev=curr_level, dtype=tf.float32) return tf.add(im, curr_noise) # return tf.cond(tf.less(do_noise[0], 0.5), true_fn = lambda: __add_noise(im), false_fn = lambda: im) return tf.cond(tf.less(do_noise[0], 0.5), fn1=lambda: __add_noise(im), fn2=lambda: im) norm = tf.map_fn(_postprocess_noise, norm, dtype=norm.dtype) crop_images = tf.cast(norm, orig_dtype) else: if with_color_noise == 1 and self.group == 'train': order_temp = tf.constant([0, 1, 2], dtype=tf.int32) order_rand = tf.random_shuffle(order_temp, seed=seed_random) fn_pred_fn_pairs = lambda x, image: [ (tf.equal(x, order_temp[0]), lambda: tf.image. random_saturation(image, 0.6, 1.4, seed=seed_random)), (tf.equal(x, order_temp[1]), lambda: tf.image. random_brightness(image, 0.4, seed=seed_random)), ] default_fn = lambda image: tf.image.random_contrast( image, 0.6, 1.4, seed=seed_random) def _color_jitter_one(_norm): orig_shape = _norm.get_shape().as_list() _norm = tf.case(fn_pred_fn_pairs(order_rand[0], _norm), default=lambda: default_fn(_norm)) _norm = tf.case(fn_pred_fn_pairs(order_rand[1], _norm), default=lambda: default_fn(_norm)) _norm = tf.case(fn_pred_fn_pairs(order_rand[2], _norm), default=lambda: default_fn(_norm)) _norm.set_shape(orig_shape) return _norm norm = tf.map_fn(_color_jitter_one, norm) if sub_mean == 1: IMAGENET_MEAN = tf.constant(np.load(mean_path).swapaxes( 0, 1).swapaxes(1, 2)[:, :, ::-1], dtype=tf.float32) orig_dtype = tf.float32 norm = norm - IMAGENET_MEAN if self.group == 'train': if self.size_vary_prep == 0 and size_vary_prep == 0: shape_tensor = norm.get_shape().as_list() if self.crop_each == 0: crop_images = tf.random_crop(norm, [ curr_batch_size, self.crop_size, self.crop_size, shape_tensor[3] ], seed=seed_random) else: # original implementation is not useful, deleted, see the end of this file crop_images = tf.random_crop(norm, [ curr_batch_size, self.crop_size, self.crop_size, shape_tensor[3] ], seed=seed_random) else: # self.size_vary_prep==1 if shape_undefined == 0: channel_num = norm.get_shape().as_list()[-1] else: channel_num = 3 RandomSizedCrop_with_para = lambda image: RandomSizedCrop( image=image, out_height=self.crop_size, out_width=self.crop_size, seed_random=seed_random, channel_num=channel_num, fix_asp_ratio=self.fix_asp_ratio, size_minval=size_minval, ) if shape_undefined == 0: crop_images = tf.map_fn(RandomSizedCrop_with_para, norm) curr_shape = crop_images.get_shape().as_list() crop_images.set_shape([curr_batch_size] + curr_shape[1:]) else: crop_images = RandomSizedCrop_with_para(norm) crop_images = tf.expand_dims(crop_images, axis=0) if self.withflip == 1 or with_flip == 1: def _postprocess_flip(im): # Original way of flipping, changing to random_uniform way to be more controllable # im = tf.image.random_flip_left_right(im, seed = seed_random) # return im do_flip = tf.random_uniform(shape=[1], minval=0, maxval=1, dtype=tf.float32, seed=seed_random) def __left_right_flip(im): flipped = tf.image.flip_left_right(im) if is_normal == 1: # flipped = 256 - flipped flipped_x, flipped_y, flipped_z = tf.unstack( flipped, axis=2) flipped = tf.stack( [256 - flipped_x, flipped_y, flipped_z], axis=2) return flipped return tf.cond(tf.less(do_flip[0], 0.5), fn1=lambda: __left_right_flip(im), fn2=lambda: im) crop_images = tf.map_fn(_postprocess_flip, crop_images, dtype=crop_images.dtype) if with_noise == 1: def _postprocess_noise(im): do_noise = tf.random_uniform(shape=[1], minval=0, maxval=1, dtype=tf.float32, seed=None) def __add_noise(im): curr_level = tf.random_uniform(shape=[1], minval=0, maxval=noise_level, dtype=tf.float32, seed=None) curr_noise = tf.random_normal(shape=tf.shape(im), mean=0.0, stddev=curr_level, dtype=tf.float32) return tf.add(im, curr_noise) # return tf.cond(tf.less(do_noise[0], 0.5), true_fn = lambda: __add_noise(im), false_fn = lambda: im) return tf.cond(tf.less(do_noise[0], 0.5), fn1=lambda: __add_noise(im), fn2=lambda: im) crop_images = tf.map_fn(_postprocess_noise, crop_images, dtype=crop_images.dtype) else: # not self.group=='train' if shape_undefined == 0: off = np.zeros(shape=[curr_batch_size, 4]) off[:, 0] = int((NOW_SIZE1 - self.crop_size) / 2) off[:, 1] = int((NOW_SIZE2 - self.crop_size) / 2) off[:, 2:4] = off[:, :2] + self.crop_size off[:, 0] = off[:, 0] * 1.0 / (NOW_SIZE1 - 1) off[:, 2] = off[:, 2] * 1.0 / (NOW_SIZE1 - 1) off[:, 1] = off[:, 1] * 1.0 / (NOW_SIZE2 - 1) off[:, 3] = off[:, 3] * 1.0 / (NOW_SIZE2 - 1) box_ind = tf.constant(range(curr_batch_size)) crop_images = tf.image.crop_and_resize( norm, off, box_ind, tf.constant([self.crop_size, self.crop_size])) else: image = _aspect_preserving_resize(norm, 256) image = _central_crop([image], self.crop_size, self.crop_size)[0] image.set_shape([self.crop_size, self.crop_size, 3]) crop_images = image crop_images = tf.expand_dims(crop_images, axis=0) crop_images = tf.cast(crop_images, orig_dtype) if curr_batch_size == 1: crop_images = tf.squeeze(crop_images, axis=[0]) return crop_images
def model_fn(features, labels, mode): """ the model_fn feeds into Estimator """ feature_columns = self.create_feature_columns(tf_transform_output) input_layer = tf.feature_column.input_layer( features=features, feature_columns=feature_columns) # Network structure # Batch norm after linear combination and before activation. Dropout after activation. h1 = tf.layers.Dense( units=MODEL_NUM_UNIT_SCALE * 4, activation=None, kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.zeros_initializer() )(input_layer) h1_bn = tf.layers.batch_normalization(h1, training=(mode == tf.estimator.ModeKeys.TRAIN)) h1_act = tf.nn.relu(h1_bn) h1_do = tf.layers.dropout( inputs=h1_act, rate=DROPOUT_PROB, training=(mode == tf.estimator.ModeKeys.TRAIN)) h2 = tf.layers.Dense( units=MODEL_NUM_UNIT_SCALE * 2, activation=None, kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.zeros_initializer() )(h1_do) h2_bn = tf.layers.batch_normalization(h2, training=(mode == tf.estimator.ModeKeys.TRAIN)) h2_act = tf.nn.relu(h2_bn) h2_do = tf.layers.dropout( inputs=h2_act, rate=DROPOUT_PROB, training=(mode == tf.estimator.ModeKeys.TRAIN)) # Head for label1 h30 = tf.layers.Dense( units=MODEL_NUM_UNIT_SCALE, activation=None, kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.zeros_initializer() )(h2_do) h3_bn0 = tf.layers.batch_normalization(h30, training=(mode == tf.estimator.ModeKeys.TRAIN)) h3_act0 = tf.nn.relu(h3_bn0) h3_do0 = tf.layers.dropout( inputs=h3_act0, rate=DROPOUT_PROB, training=(mode == tf.estimator.ModeKeys.TRAIN)) logits0 = tf.layers.Dense( units=2, activation=None, kernel_initializer=tf.glorot_normal_initializer(), bias_initializer=tf.zeros_initializer() )(h3_do0) softmax0 = tf.contrib.layers.softmax(logits0) q_values = tf.div(softmax0[:, 1] - tf.reduce_min(softmax0[:, 1]), tf.reduce_max(softmax0[:, 1]) - tf.reduce_min(softmax0[:, 1])) if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: labels0 = labels # int64 Notice: use labels but not labels[0], because we only have 1 label now. onehot_labels0 = tf.one_hot(labels0, depth=2) # shape(2,0) should [batch_size, num_classes] , logit should [batch_size, num_classes] # logit(?,2) # `ror_20_days_bool` loss definition: weighting to correct for class imbalances. unweighted_losses0 = tf.losses.softmax_cross_entropy( onehot_labels=onehot_labels0, logits=logits0, reduction=Reduction.NONE) class_weights0 = tf.constant([[1., 1.]]) sample_weights0 = tf.reduce_sum(tf.multiply(onehot_labels0, class_weights0), 1) loss0 = tf.reduce_mean(unweighted_losses0 * sample_weights0) loss = loss0 # Metrics auroc0 = tf.metrics.auc(labels0, softmax0[:, 1], num_thresholds=10000, curve='ROC') prauc0 = tf.metrics.auc(labels0, softmax0[:, 1], num_thresholds=10000, curve='PR', summation_method='careful_interpolation') if mode == tf.estimator.ModeKeys.TRAIN: # MSE loss, optimized with Adam optimizer = tf.train.AdamOptimizer(FIX_LEARNING_RATE) # This is to make sure we also update the rolling mean/var for `tf.layers.batch_normalization` # (which is stored outside of the Estimator scope). update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) # TensorBoard performance metrics. with tf.name_scope('losses'): tf.summary.scalar('loss_ror_20', loss0) # TensorBoard model evolution over time. with tf.name_scope('layer_1'): weights = tf.get_default_graph().get_tensor_by_name(os.path.split(h1.name)[0] + '/kernel:0') biases = tf.get_default_graph().get_tensor_by_name(os.path.split(h1.name)[0] + '/bias:0') tf.summary.histogram('weights', weights) tf.summary.histogram('biases', biases) tf.summary.histogram('activations', h1_act) with tf.name_scope('layer_2'): weights = tf.get_default_graph().get_tensor_by_name(os.path.split(h2.name)[0] + '/kernel:0') biases = tf.get_default_graph().get_tensor_by_name(os.path.split(h2.name)[0] + '/bias:0') tf.summary.histogram('weights', weights) tf.summary.histogram('biases', biases) tf.summary.histogram('activations', h2_act) with tf.name_scope('layer_3_ror_20'): weights = tf.get_default_graph().get_tensor_by_name(os.path.split(h30.name)[0] + '/kernel:0') biases = tf.get_default_graph().get_tensor_by_name(os.path.split(h30.name)[0] + '/bias:0') tf.summary.histogram('weights', weights) tf.summary.histogram('biases', biases) tf.summary.histogram('activations', h3_act0) with tf.name_scope('logits_ror_20'): weights = tf.get_default_graph().get_tensor_by_name( os.path.split(logits0.name)[0] + '/kernel:0') biases = tf.get_default_graph().get_tensor_by_name(os.path.split(logits0.name)[0] + '/bias:0') tf.summary.histogram('weights', weights) tf.summary.histogram('biases', biases) tf.summary.histogram('activations', h3_act0) with tf.name_scope('q_values_ror_20'): tf.summary.histogram('q0', softmax0[:, 0]) tf.summary.histogram('q1', softmax0[:, 1]) # Log a few predictions.label0 : ror_xxx_days_bool # to watch the labels and softmax in training label_and_softmax0 = tf.stack([tf.cast(labels0, tf.float32), softmax0[:, 1]], axis=1) logging_hook = tf.train.LoggingTensorHook({ 'label_and_softmax0': label_and_softmax0[0:10, :], # label_and_softmax0 size is batch size in train_config "TRAIN_BATCH_SIZE" }, every_n_iter=LOG_FREQ_STEP) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode=mode, loss=loss, # These metrics are computed over the complete eval dataset. eval_metric_ops={ 'metrics_ror_20_days_bool/AUC_ROC': auroc0, 'metrics_ror_20_days_bool/AUC_PR': prauc0, }, predictions={SignatureKeys.PREDICTIONS: q_values}) elif mode == tf.estimator.ModeKeys.PREDICT: """ A policy derived from the Q-value network. This epsilon-greedy policy computes the seeds with the `TOP_SEEDS_K` values and replaces them according to a `epsilon_greedy_probability` probability with a random value in [0, 1000). """ # Indices of top `p.TOP_SEEDS_K` Q-values. top_q_idx = tf.nn.top_k(q_values, k=TOP_SEEDS_K)[1] sel_q_idx = tf.random_shuffle(top_q_idx)[0:SEEDS_K_FINAL] # Since seeds are in [1, `p.SEEDS_K_FINAL`], we have to add 1 to the index. predictions = sel_q_idx + 1 class_labels_ror_20 = tf.reshape( tf.tile(tf.constant(['0', '1']), (tf.shape(softmax0)[0],)), (tf.shape(softmax0)[0], 2)) export_outputs = { # Default output (used in serving-infra) # * output: Seed list. Requires using `SignatureKeys.OUTPUT` dict key, since this is # used by the downstream SRS. # * eps_rnd_selection: Boolean list of whether a random seed (with eps prob) # was recommend or a predicted seed. # * q_values: Q-values for all `SEED_LIST_LENGTH` seeds. SignatureDefs.DEFAULT: tf.estimator.export.PredictOutput( {SignatureKeys.OUTPUT: predictions, "q_values": tf.transpose(q_values)}), # Analysis output SignatureDefs.ANALYSIS_ROR_20: tf.estimator.export.ClassificationOutput( scores=softmax0, classes=class_labels_ror_20), SignatureDefs.ANALYSIS_Q: tf.estimator.export.RegressionOutput( value=q_values) } return tf.estimator.EstimatorSpec( mode=mode, predictions={SignatureKeys.PREDICTIONS: q_values}, export_outputs=export_outputs)
def f1(): # Choose a random contiguous range from within the sequence. range_min = tf.random_shuffle(tf.range(seq_len - batch_size))[0] range_max = range_min + batch_size return tf.range(range_min, range_max)
def make_data_tensor(self, train=True): if train: folders = self.metatrain_character_folders # number of tasks, not number of meta-iterations. (divide by metabatch size to measure) num_total_batches = 200000 else: folders = self.metaval_character_folders num_total_batches = 600 # make list of files print('Generating filenames') all_filenames = [] for _ in range(num_total_batches): sampled_character_folders = random.sample(folders, self.num_classes) random.shuffle(sampled_character_folders) labels_and_images = get_images( sampled_character_folders, range(self.num_classes), nb_samples=self.num_samples_per_class, shuffle=False) # make sure the above isn't randomized order labels = [li[0] for li in labels_and_images] filenames = [li[1] for li in labels_and_images] all_filenames.extend(filenames) # make queue for tensorflow to read from filename_queue = tf.train.string_input_producer( tf.convert_to_tensor(all_filenames), shuffle=False) print('Generating image processing ops') image_reader = tf.WholeFileReader() _, image_file = image_reader.read(filename_queue) if FLAGS.datasource == 'miniimagenet': image = tf.image.decode_jpeg(image_file, channels=3) image.set_shape((self.img_size[0], self.img_size[1], 3)) image = tf.reshape(image, [self.dim_input]) image = tf.cast(image, tf.float32) / 255.0 else: image = tf.image.decode_png(image_file) image.set_shape((self.img_size[0], self.img_size[1], 1)) image = tf.reshape(image, [self.dim_input]) image = tf.cast(image, tf.float32) / 255.0 image = 1.0 - image # invert num_preprocess_threads = 1 # TODO - enable this to be set to >1 min_queue_examples = 256 examples_per_batch = self.num_classes * self.num_samples_per_class batch_image_size = self.batch_size * examples_per_batch print('Batching images') images = tf.train.batch( [image], batch_size=batch_image_size, num_threads=num_preprocess_threads, capacity=min_queue_examples + 3 * batch_image_size, ) all_image_batches, all_label_batches = [], [] print('Manipulating image data to be right shape') for i in range(self.batch_size): image_batch = images[i * examples_per_batch:(i + 1) * examples_per_batch] if FLAGS.datasource == 'omniglot': # omniglot augments the dataset by rotating digits to create new classes # get rotation per class (e.g. 0,1,2,0,0 if there are 5 classes) rotations = tf.multinomial(tf.log([[1., 1., 1., 1.]]), self.num_classes) label_batch = tf.convert_to_tensor(labels) new_list, new_label_list = [], [] for k in range(self.num_samples_per_class): class_idxs = tf.range(0, self.num_classes) class_idxs = tf.random_shuffle(class_idxs) true_idxs = class_idxs * self.num_samples_per_class + k new_list.append(tf.gather(image_batch, true_idxs)) if FLAGS.datasource == 'omniglot': # and FLAGS.train: new_list[-1] = tf.stack([ tf.reshape( tf.image.rot90(tf.reshape( new_list[-1][ind], [self.img_size[0], self.img_size[1], 1]), k=tf.cast( rotations[0, class_idxs[ind]], tf.int32)), (self.dim_input, )) for ind in range(self.num_classes) ]) new_label_list.append(tf.gather(label_batch, true_idxs)) new_list = tf.concat( new_list, 0 ) # has shape [self.num_classes*self.num_samples_per_class, self.dim_input] new_label_list = tf.concat(new_label_list, 0) all_image_batches.append(new_list) all_label_batches.append(new_label_list) all_image_batches = tf.stack(all_image_batches) all_label_batches = tf.stack(all_label_batches) all_label_batches = tf.one_hot(all_label_batches, self.num_classes) return all_image_batches, all_label_batches
def process_dataset(self, *row_parts): row_parts = list(row_parts) word = row_parts[0] # (, ) if not self.is_evaluating and self.config.RANDOM_CONTEXTS: all_contexts = tf.stack(row_parts[1:]) all_contexts_padded = tf.concat([all_contexts, [self.context_pad]], axis=-1) index_of_blank_context = tf.where( tf.equal(all_contexts_padded, self.context_pad)) num_contexts_per_example = tf.reduce_min(index_of_blank_context) # if there are less than self.max_contexts valid contexts, still sample self.max_contexts safe_limit = tf.cast( tf.maximum(num_contexts_per_example, self.config.MAX_CONTEXTS), tf.int32) rand_indices = tf.random_shuffle( tf.range(safe_limit))[:self.config.MAX_CONTEXTS] contexts = tf.gather(all_contexts, rand_indices) # (max_contexts,) else: contexts = row_parts[1:(self.config.MAX_CONTEXTS + 1)] # (max_contexts,) # contexts: (max_contexts, ) split_contexts = tf.string_split(contexts, delimiter=",", skip_empty=False) sparse_split_contexts = tf.sparse.SparseTensor( indices=split_contexts.indices, values=split_contexts.values, dense_shape=[self.config.MAX_CONTEXTS, 3], ) dense_split_contexts = tf.reshape( tf.sparse.to_dense(sp_input=sparse_split_contexts, default_value=Common.PAD), shape=[self.config.MAX_CONTEXTS, 3], ) # (batch, max_contexts, 3) split_target_labels = tf.string_split(tf.expand_dims(word, -1), delimiter="|") target_dense_shape = [ 1, tf.maximum( tf.cast(self.config.MAX_TARGET_PARTS, tf.int64), # tf.to_int64(self.config.MAX_TARGET_PARTS), split_target_labels.dense_shape[1] + 1, ), ] sparse_target_labels = tf.sparse.SparseTensor( indices=split_target_labels.indices, values=split_target_labels.values, dense_shape=target_dense_shape, ) dense_target_label = tf.reshape( tf.sparse.to_dense(sp_input=sparse_target_labels, default_value=Common.PAD), [-1], ) index_of_blank = tf.where(tf.equal(dense_target_label, Common.PAD)) target_length = tf.reduce_min(index_of_blank) dense_target_label = dense_target_label[:self.config.MAX_TARGET_PARTS] clipped_target_lengths = tf.clip_by_value( target_length, clip_value_min=0, clip_value_max=self.config.MAX_TARGET_PARTS) target_word_labels = tf.concat( [self.target_table.lookup(dense_target_label), [0]], axis=-1) # (max_target_parts + 1) of int path_source_strings = tf.slice( dense_split_contexts, [0, 0], [self.config.MAX_CONTEXTS, 1]) # (max_contexts, 1) flat_source_strings = tf.reshape(path_source_strings, [-1]) # (max_contexts) split_source = tf.string_split( flat_source_strings, delimiter="|", skip_empty=False) # (max_contexts, max_name_parts) sparse_split_source = tf.sparse.SparseTensor( indices=split_source.indices, values=split_source.values, dense_shape=[ self.config.MAX_CONTEXTS, tf.maximum(tf.to_int64(self.config.MAX_NAME_PARTS), split_source.dense_shape[1]), ], ) dense_split_source = tf.sparse.to_dense( sp_input=sparse_split_source, default_value=Common.PAD) # (max_contexts, max_name_parts) dense_split_source = tf.slice(dense_split_source, [0, 0], [-1, self.config.MAX_NAME_PARTS]) path_source_indices = self.subtoken_table.lookup( dense_split_source) # (max_contexts, max_name_parts) path_source_lengths = tf.reduce_sum( tf.cast(tf.not_equal(dense_split_source, Common.PAD), tf.int32), -1) # (max_contexts) path_strings = tf.slice(dense_split_contexts, [0, 1], [self.config.MAX_CONTEXTS, 1]) flat_path_strings = tf.reshape(path_strings, [-1]) split_path = tf.string_split(flat_path_strings, delimiter="|", skip_empty=False) sparse_split_path = tf.sparse.SparseTensor( indices=split_path.indices, values=split_path.values, dense_shape=[ self.config.MAX_CONTEXTS, self.config.MAX_PATH_LENGTH ], ) dense_split_path = tf.sparse.to_dense( sp_input=sparse_split_path, default_value=Common.PAD) # (batch, max_contexts, max_path_length) node_indices = self.node_table.lookup( dense_split_path) # (max_contexts, max_path_length) path_lengths = tf.reduce_sum( tf.cast(tf.not_equal(dense_split_path, Common.PAD), tf.int32), -1) # (max_contexts) path_target_strings = tf.slice( dense_split_contexts, [0, 2], [self.config.MAX_CONTEXTS, 1]) # (max_contexts, 1) flat_target_strings = tf.reshape(path_target_strings, [-1]) # (max_contexts) split_target = tf.string_split( flat_target_strings, delimiter="|", skip_empty=False) # (max_contexts, max_name_parts) sparse_split_target = tf.sparse.SparseTensor( indices=split_target.indices, values=split_target.values, dense_shape=[ self.config.MAX_CONTEXTS, tf.maximum(tf.to_int64(self.config.MAX_NAME_PARTS), split_target.dense_shape[1]), ], ) dense_split_target = tf.sparse.to_dense( sp_input=sparse_split_target, default_value=Common.PAD) # (max_contexts, max_name_parts) dense_split_target = tf.slice(dense_split_target, [0, 0], [-1, self.config.MAX_NAME_PARTS]) path_target_indices = self.subtoken_table.lookup( dense_split_target) # (max_contexts, max_name_parts) path_target_lengths = tf.reduce_sum( tf.cast(tf.not_equal(dense_split_target, Common.PAD), tf.int32), -1) # (max_contexts) valid_contexts_mask = tf.cast( # tf.to_float tf.not_equal( tf.reduce_max(path_source_indices, -1) + tf.reduce_max(node_indices, -1) + tf.reduce_max(path_target_indices, -1), 0, ), dtype=tf.float32, ) return { TARGET_STRING_KEY: word, TARGET_INDEX_KEY: target_word_labels, TARGET_LENGTH_KEY: clipped_target_lengths, PATH_SOURCE_INDICES_KEY: path_source_indices, NODE_INDICES_KEY: node_indices, PATH_TARGET_INDICES_KEY: path_target_indices, VALID_CONTEXT_MASK_KEY: valid_contexts_mask, PATH_SOURCE_LENGTHS_KEY: path_source_lengths, PATH_LENGTHS_KEY: path_lengths, PATH_TARGET_LENGTHS_KEY: path_target_lengths, PATH_SOURCE_STRINGS_KEY: path_source_strings, PATH_STRINGS_KEY: path_strings, PATH_TARGET_STRINGS_KEY: path_target_strings, }
y_values = [] vector_values = [] # generate random data for i in range(num_vectors): if np.random.random() > 0.5: x_values.append(np.random.normal(0.4, 0.7)) y_values.append(np.random.normal(0.2, 0.8)) else: x_values.append(np.random.normal(0.6, 0.4)) y_values.append(np.random.normal(0.8, 0.5)) vector_values = zip(x_values, y_values) vectors = tf.constant(vector_values) n_samples = tf.shape(vector_values)[0] random_indices = tf.random_shuffle(tf.range(0, n_samples)) begin = [ 0, ] size = [ num_clusters, ] size[0] = num_clusters centroid_indices = tf.slice(random_indices, begin, size) centroids = tf.Variable(tf.gather(vector_values, centroid_indices)) expanded_vectors = tf.expand_dims(vectors, 0) expanded_centroids = tf.expand_dims(centroids, 1) vectors_subtraction = tf.sub(expanded_vectors, expanded_centroids) euclidean_distances = tf.reduce_sum(tf.square(vectors_subtraction), 2) assignments = tf.to_int32(tf.argmin(euclidean_distances, 0)) partitions = [0, 0, 1, 1, 0]
def detection_targets_graph(config, proposals, gt_class_ids, gt_boxes, **kwargs): # Assertions asserts = \ [tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion"),] with tf.control_dependencies(asserts): proposals = tf.identity(proposals) # Remove zero padding proposals, _ = \ model.trim_zeros_graph(proposals, name="trim_proposals") gt_boxes, non_zeros = \ model.trim_zeros_graph(gt_boxes, name="trim_gt_boxes") gt_class_ids = \ model.tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids") # Handle COCO crowds # A crowd box in COCO is a bounding box around several instances. Exclude # them from training. A crowd box is given a negative class ID. crowd_ix = tf.where(gt_class_ids < 0)[:, 0] non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] crowd_boxes = tf.gather(gt_boxes, crowd_ix) gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) gt_boxes = tf.gather(gt_boxes, non_crowd_ix) # Compute overlaps matrix [proposals, gt_boxes] overlaps = model.overlaps_graph(proposals, gt_boxes) # Compute overlaps with crowd boxes [anchors, crowds] crowd_overlaps = model.overlaps_graph(proposals, crowd_boxes) crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) no_crowd_bool = (crowd_iou_max < 0.001) # Determine postive and negative ROIs roi_iou_max = tf.reduce_max(overlaps, axis=1) # 1. Positive ROIs are those with >= 0.5 IoU with a GT box positive_roi_bool = (roi_iou_max >= 0.5) positive_indices = tf.where(positive_roi_bool)[:, 0] # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds. negative_indices = tf.where( tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] # Subsample ROIs. Aim for 33% positive # Positive ROIs positive_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) positive_indices = tf.random_shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] # Negative ROIs. Add enough to maintain positive:negative ratio. r = 1.0 / config.ROI_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.random_shuffle(negative_indices)[:negative_count] # Gather selected ROIs positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) # Assign positive ROIs to GT boxes. positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.argmax(positive_overlaps, axis=1) roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # Compute bbox refinement for positive ROIs deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes) deltas /= config.BBOX_STD_DEV # Compute mask targets boxes = positive_rois # Append negative ROIs and pad bbox deltas and masks that # are not used for negative ROIs with zeros. rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) if 'gt_rboxes' in kwargs: gt_rboxes = kwargs.get('gt_rboxes') gt_rboxes, _ = model.trim_zeros_graph(gt_rboxes, name="trim_gt_rboxes") gt_rboxes = tf.gather(gt_rboxes, non_crowd_ix) roi_gt_rboxes = tf.gather(gt_rboxes, roi_gt_box_assignment) if config.regressor == "deltas": rbox_deltas = line_to_deltas(roi_gt_boxes, roi_gt_rboxes) rbox_deltas /= 0.2 rbox_deltas = tf.pad(rbox_deltas, [(0, N + P), (0, 0)]) return rois, roi_gt_class_ids, deltas, rbox_deltas elif config.regressor == "rotdim": gt_angles = kwargs.get('gt_angles') gt_angles = tf.boolean_mask(gt_angles, non_zeros, name="trim_gt_angles") gt_angles = tf.gather(gt_angles, non_crowd_ix) roi_gt_angles = tf.gather(gt_angles, roi_gt_box_assignment) rbox_angles = roi_gt_angles / 0.2 # point 1 y1 = roi_gt_rboxes[:, 0] x1 = roi_gt_rboxes[:, 1] # point 2 y2 = roi_gt_rboxes[:, 2] x2 = roi_gt_rboxes[:, 3] # point 3 y3 = roi_gt_rboxes[:, 4] x3 = roi_gt_rboxes[:, 5] rw = tf.sqrt(tf.pow(x2 - x1, 2) + tf.pow(y2 - y1, 2)) rh = tf.sqrt(tf.pow(x3 - x2, 2) + tf.pow(y3 - y2, 2)) rbox_dim = tf.stack([rh, rw], axis=1) rbox_dim /= 0.1 rbox_dim = tf.pad(rbox_dim, [(0, N + P), (0, 0)]) rbox_angles = tf.pad(rbox_angles, [(0, N + P), (0, 0)]) return rois, roi_gt_class_ids, deltas, rbox_angles, rbox_dim elif config.regressor == "verts": roi_gt_rboxes = tf.pad(roi_gt_rboxes, [(0, N + P), (0, 0)]) return rois, roi_gt_class_ids, deltas, roi_gt_rboxes return rois, roi_gt_class_ids, deltas
import matplotlib.pyplot as plt import pandas as pd import seaborn as sns df = pd.DataFrame({'x': [v[0] for v in vectors_set], 'y': [v[1] for v in vectors_set]}) sns.lmplot('x', 'y', data=df, fit_reg=False, size=6) plt.show() import tensorflow as tf vectors = tf.constant(vectors_set) k = 4 centroides = tf.Variable(tf.slice(tf.random_shuffle(vectors),[0,0],[k,-1])) expanded_vectors = tf.expand_dims(vectors, 0) expanded_centroides = tf.expand_dims(centroides, 0) assignments = tf.argmin(tf.reduce_sum(tf.square(tf.sub(expanded_vectors, expanded_centroides)),2),0) means = tf.concat(0, [tf.reduce_sum(tf.gather(vectors, tf.reshape(tf.where(tf.equal(assignments,c)), [1,-1])), reduction_indices=[1]) for c in range(k)]) update_centroides = tf.assign(centroides, means) init_op = tf.initialize_all_variables() sess = tf.Session() sess.run(init_op)
def build_model(self, is_training=True, inst_norm=False, no_target_source=False): real_data = tf.placeholder(tf.float32, [ self.batch_size, self.input_width, self.input_width, self.input_filters + self.output_filters ], name='real_A_and_B_images') embedding_ids = tf.placeholder(tf.int64, shape=None, name="embedding_ids") no_target_data = tf.placeholder(tf.float32, [ self.batch_size, self.input_width, self.input_width, self.input_filters + self.output_filters ], name='no_target_A_and_B_images') no_target_ids = tf.placeholder(tf.int64, shape=None, name="no_target_embedding_ids") # target images real_A = real_data[:, :, :, :self.input_filters] # source images real_B = real_data[:, :, :, self.input_filters:self.input_filters + self.output_filters] real_A_shuffle = tf.random_shuffle(real_A) embedding = init_embedding(self.embedding_num, self.embedding_dim) fake_target, target_gaussian1, target_gaussian2, source_e8, layers_source = self.generator( real_B, real_A, embedding_ids, is_training=is_training, inst_norm=inst_norm, reuse=False) fake_target_shuffle, target_gaussian1_shuffle, target_gaussian2_shuffle = self.generator_gaussian( layers_source, real_A_shuffle, embedding_ids, is_training=is_training, inst_norm=inst_norm, reuse=True) real_A_shuffle2 = tf.random_shuffle(real_A_shuffle) fake_target_shuffle2, target_gaussian1_shuffle2, target_gaussian2_shuffle2 = self.generator_gaussian( layers_source, real_A_shuffle2, embedding_ids, is_training=is_training, inst_norm=inst_norm, reuse=True) real_A_shuffle3 = tf.random_shuffle(real_A_shuffle2) fake_target_shuffle3, target_gaussian1_shuffle3, target_gaussian2_shuffle3 = self.generator_gaussian( layers_source, real_A_shuffle3, embedding_ids, is_training=is_training, inst_norm=inst_norm, reuse=True) real_A_shuffle4 = tf.random_shuffle(real_A_shuffle3) fake_target_shuffle4, target_gaussian1_shuffle4, target_gaussian2_shuffle4 = self.generator_gaussian( layers_source, real_A_shuffle4, embedding_ids, is_training=is_training, inst_norm=inst_norm, reuse=True) real_A_shuffle5 = tf.random_shuffle(real_A_shuffle4) fake_target_shuffle5, target_gaussian1_shuffle5, target_gaussian2_shuffle5 = self.generator_gaussian( layers_source, real_A_shuffle5, embedding_ids, is_training=is_training, inst_norm=inst_norm, reuse=True) source_fake_e8, layers_source_fake = self.encoder( fake_target_shuffle, is_training=is_training, reuse=True) target_gaussian1_fake = self.gaussion_encoder(fake_target_shuffle, layers_source, is_training=is_training, reuse=True) #real_AB = tf.concat([real_A, real_B], 3) #fake_AB = tf.concat([real_A, fake_B], 3) # Note it is not possible to set reuse flag back to False # initialize all variables before setting reuse to True #real_D, real_D_logits, real_category_logits = self.discriminator(real_AB, is_training=is_training, reuse=False) #fake_D, fake_D_logits, fake_category_logits = self.discriminator(fake_AB, is_training=is_training, reuse=True) # encoding constant loss # this loss assume that generated imaged and real image # should reside in the same space and close to each other #encoded_fake_B = self.encoder(fake_B, is_training, reuse=True)[0] #const_loss = (tf.reduce_mean(tf.square(encoded_real_A - encoded_fake_B))) * self.Lconst_penalty # category loss #true_labels = tf.reshape(tf.one_hot(indices=embedding_ids, depth=self.embedding_num), # shape=[self.batch_size, self.embedding_num]) #real_category_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=target_gaussian1_fake, # labels=target_gaussian1)) real_category_loss = tf.reduce_sum( tf.abs(target_gaussian1_fake[:, :self.latent_dim] - target_gaussian1_shuffle[:, :self.latent_dim])) #fake_category_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_category_logits, # labels=true_labels)) #category_loss = self.Lcategory_penalty * (real_category_loss + fake_category_loss) # binary real/fake loss #d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=real_D_logits, # labels=tf.ones_like(real_D))) #d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_D_logits, # labels=tf.zeros_like(fake_D))) # L1 loss between real and generated images l1_loss = self.L1_penalty * tf.reduce_mean( tf.abs(fake_target_shuffle - real_A), [1, 2, 3]) l1_loss2 = self.L1_penalty * tf.reduce_mean( tf.abs(fake_target_shuffle2 - real_A), [1, 2, 3]) l1_loss3 = self.L1_penalty * tf.reduce_mean( tf.abs(fake_target_shuffle3 - real_A), [1, 2, 3]) l1_loss4 = self.L1_penalty * tf.reduce_mean( tf.abs(fake_target_shuffle4 - real_A), [1, 2, 3]) l1_loss5 = self.L1_penalty * tf.reduce_mean( tf.abs(fake_target_shuffle5 - real_A), [1, 2, 3]) # total variation loss #width = self.output_width #tv_loss = (tf.nn.l2_loss(fake_B[:, 1:, :, :] - fake_B[:, :width - 1, :, :]) / width # + tf.nn.l2_loss(fake_B[:, :, 1:, :] - fake_B[:, :, :width - 1, :]) / width) * self.Ltv_penalty # maximize the chance generator fool the discriminator #cheat_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_D_logits, # labels=tf.ones_like(fake_D))) #d_loss = d_loss_real + d_loss_fake + category_loss / 2.0 q_z = distributions.Normal( loc=target_gaussian1_shuffle[:, :self.latent_dim], scale=tf.nn.softplus(target_gaussian1_shuffle[:, self.latent_dim:])) q_z2 = distributions.Normal( loc=target_gaussian1_shuffle2[:, :self.latent_dim], scale=tf.nn.softplus(target_gaussian1_shuffle2[:, self.latent_dim:])) q_z3 = distributions.Normal( loc=target_gaussian1_shuffle3[:, :self.latent_dim], scale=tf.nn.softplus(target_gaussian1_shuffle3[:, self.latent_dim:])) q_z4 = distributions.Normal( loc=target_gaussian1_shuffle4[:, :self.latent_dim], scale=tf.nn.softplus(target_gaussian1_shuffle4[:, self.latent_dim:])) q_z5 = distributions.Normal( loc=target_gaussian1_shuffle5[:, :self.latent_dim], scale=tf.nn.softplus(target_gaussian1_shuffle5[:, self.latent_dim:])) # print(output.get_shape()) p_z = distributions.Normal(loc=np.zeros(self.latent_dim, dtype=np.float32), scale=np.ones(self.latent_dim, dtype=np.float32)) kl_loss = tf.reduce_sum(distributions.kl_divergence(q_z, p_z), 1) kl_loss2 = tf.reduce_sum(distributions.kl_divergence(q_z2, p_z), 1) kl_loss3 = tf.reduce_sum(distributions.kl_divergence(q_z3, p_z), 1) kl_loss4 = tf.reduce_sum(distributions.kl_divergence(q_z4, p_z), 1) kl_loss5 = tf.reduce_sum(distributions.kl_divergence(q_z5, p_z), 1) kl_loss_1 = tf.reduce_sum( tf.reduce_sum(distributions.kl_divergence(q_z, p_z), 1)) #const_loss = (tf.reduce_mean(tf.reduce_sum(tf.square(lay_AB["e1"] - lay_A["e1"]))+tf.reduce_sum(tf.square(lay_AB["e2"] - lay_A["e2"]))+tf.reduce_sum(tf.square(lay_AB["e3"] - lay_A["e3"]))+tf.reduce_sum(tf.square(lay_AB["e4"] - lay_A["e4"]))+tf.reduce_sum(tf.square(lay_AB["e5"] - lay_A["e5"]))+tf.reduce_sum(tf.square(lay_AB["e6"] - lay_A["e6"]))+tf.reduce_sum(tf.square(lay_AB["e7"] - lay_A["e7"])))) * self.Lconst_penalty const_loss1 = tf.reduce_mean( tf.square(layers_source_fake["e8"] - layers_source["e8"]), [1, 2, 3]) const_loss2 = 10.0 * tf.reduce_mean( tf.square(layers_source_fake["e7"] - layers_source["e7"]), [1, 2, 3]) const_loss3 = 5.0 * tf.reduce_mean( tf.square(layers_source_fake["e6"] - layers_source["e6"]), [1, 2, 3]) const_loss4 = tf.reduce_mean( tf.square(layers_source_fake["e5"] - layers_source["e5"]), [1, 2, 3]) const_loss = tf.reduce_sum(const_loss1 + const_loss2 + const_loss3 + const_loss4) * self.Lconst_penalty const_loss = (tf.reduce_sum(const_loss1)) * self.Lconst_penalty T_loss = tf.reduce_sum(l1_loss + kl_loss) T_loss2 = tf.reduce_sum(l1_loss2 + kl_loss2) T_loss3 = tf.reduce_sum(l1_loss3 + kl_loss3) T_loss4 = tf.reduce_sum(l1_loss4 + kl_loss4) T_loss5 = tf.reduce_sum(l1_loss5 + kl_loss5) #d_loss_real_summary = tf.summary.scalar("d_loss_real", d_loss_real) #d_loss_fake_summary = tf.summary.scalar("d_loss_fake", d_loss_fake) #category_loss_summary = tf.summary.scalar("category_loss", category_loss) #cheat_loss_summary = tf.summary.scalar("cheat_loss", cheat_loss) #l1_loss_summary = tf.summary.scalar("l1_loss", l1_loss) real_category_loss_summary = tf.summary.scalar( "real_category_loss", tf.reduce_sum(real_category_loss)) #const_loss_summary = tf.summary.scalar("const_loss", const_loss) #d_loss_summary = tf.summary.scalar("d_loss", d_loss) T_loss_summary = tf.summary.scalar("T_loss", T_loss) T_loss2_summary = tf.summary.scalar("T_loss2", T_loss2) T_loss3_summary = tf.summary.scalar("T_loss3", T_loss3) T_loss4_summary = tf.summary.scalar("T_loss4", T_loss4) T_loss5_summary = tf.summary.scalar("T_loss5", T_loss5) l1_loss_summary = tf.summary.scalar("l1_loss", tf.reduce_sum(l1_loss)) l1_loss2_summary = tf.summary.scalar("l1_loss2", tf.reduce_sum(l1_loss2)) kl_loss_summary = tf.summary.scalar("kl_loss", kl_loss_1) #kl_loss2_summary = tf.summary.scalar("kl_loss2", kl_loss2_1) #kl_loss_fake_AB_summary = tf.summary.scalar("kl_loss_fake_AB", kl_loss_fake_AB_1) const_loss_summary = tf.summary.scalar("const_loss_loss", const_loss) All_merged_summary = tf.summary.merge([ l1_loss_summary, l1_loss2_summary, T_loss_summary, kl_loss_summary, const_loss_summary, real_category_loss_summary, T_loss2_summary, T_loss3_summary, T_loss4_summary, T_loss5_summary ]) #tv_loss_summary = tf.summary.scalar("tv_loss", tv_loss) #d_merged_summary = tf.summary.merge([d_loss_real_summary, d_loss_fake_summary, # category_loss_summary, d_loss_summary]) #g_merged_summary = tf.summary.merge([cheat_loss_summary, l1_loss_summary, # fake_category_loss_summary, # const_loss_summary, # g_loss_summary, tv_loss_summary]) # expose useful nodes in the graph as handles globally input_handle = InputHandle(real_data=real_data, embedding_ids=embedding_ids, no_target_data=no_target_data, no_target_ids=no_target_ids) loss_handle = LossHandle( T_loss=T_loss, T_loss2=T_loss2, T_loss3=T_loss3, T_loss4=T_loss4, T_loss5=T_loss5, #const_loss=const_loss, kl_loss=kl_loss, l1_loss=l1_loss #category_loss=category_loss, #cheat_loss=cheat_loss, ) eval_handle = EvalHandle(encoder=target_gaussian2, generator=fake_target, generator2=fake_target_shuffle, target=real_A, source=real_B, embedding=embedding, gaussian_params=target_gaussian1_shuffle) summary_handle = SummaryHandle(T_sum=All_merged_summary) # those operations will be shared, so we need # to make them visible globally setattr(self, "input_handle", input_handle) setattr(self, "loss_handle", loss_handle) setattr(self, "eval_handle", eval_handle) setattr(self, "summary_handle", summary_handle)
def detection_targets_graph(proposals, gt_class_ids, gt_boxes, config): """Generates detection targets for one image. Subsamples proposals and generates target class IDs, bounding box deltas for each. Inputs: proposals: [N, (y1, x1, y2, x2)] in normalized coordinates. Might be zero padded if there are not enough proposals. gt_class_ids: [MAX_GT_INSTANCES] int class IDs gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. Returns: Target ROIs and corresponding class IDs, bounding box shifts rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded. deltas: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Class-specific bbox refinments. Note: Returned arrays might be zero padded if not enough target ROIs. """ # Assertions asserts = [ tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion"), ] with tf.control_dependencies(asserts): proposals = tf.identity(proposals) # Remove zero padding proposals, _ = trim_zeros_graph(proposals, name="trim_proposals") gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes") gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids") # Compute overlaps matrix [proposals, gt_boxes] overlaps = overlaps_graph(proposals, gt_boxes) # Determine postive and negative ROIs roi_iou_max = tf.reduce_max(overlaps, axis=1) # 1. Positive ROIs are those with >= 0.5 IoU with a GT box positive_roi_bool = (roi_iou_max >= 0.5) positive_indices = tf.where(positive_roi_bool)[:, 0] # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds. negative_indices = tf.where(roi_iou_max < 0.5)[:, 0] # Subsample ROIs. Aim for 33% positive # Positive ROIs positive_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) positive_indices = tf.random_shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] # Negative ROIs. Add enough to maintain positive:negative ratio. r = 1.0 / config.ROI_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.random_shuffle(negative_indices)[:negative_count] # Gather selected ROIs positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) # Assign positive ROIs to GT boxes. positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.argmax(positive_overlaps, axis=1) roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # Compute bbox refinement for positive ROIs deltas = KerasRFCN.Utils.box_refinement_graph(positive_rois, roi_gt_boxes) deltas /= config.BBOX_STD_DEV # Append negative ROIs and pad bbox deltas and masks that # are not used for negative ROIs with zeros. rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) return rois, roi_gt_class_ids, deltas
def _get_randomized_indices(self): """Generates randomized indices into a sequence of a specific length.""" indices = tf.range(0, self._dataset_info.sequence_size) indices = tf.random_shuffle(indices) indices = tf.slice(indices, begin=[0], size=[self._example_size]) return indices
def crop_proposal(): rand_vec = lambda minval, maxval: tf.random_uniform(shape=( ssd_constants.NUM_CROP_PASSES, 1), minval=minval, maxval=maxval, dtype=tf.float32) width, height = rand_vec(0.3, 1), rand_vec(0.3, 1) left, top = rand_vec(0, 1 - width), rand_vec(0, 1 - height) right = left + width bottom = top + height ltrb = tf.concat([left, top, right, bottom], axis=1) min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0] ious = calc_iou_tensor(ltrb, boxes) # discard any bboxes whose center not in the cropped image xc, yc = [ tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :], (ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2) ] masks = tf.reduce_all(tf.stack([ tf.greater(xc, tf.tile(left, (1, num_boxes))), tf.less(xc, tf.tile(right, (1, num_boxes))), tf.greater(yc, tf.tile(top, (1, num_boxes))), tf.less(yc, tf.tile(bottom, (1, num_boxes))), ], axis=2), axis=2) # Checks of whether a crop is valid. valid_aspect = tf.logical_and(tf.less(height / width, 2), tf.less(height / width, 2)) valid_ious = tf.reduce_all(tf.greater(ious, min_iou), axis=1, keepdims=True) valid_masks = tf.reduce_any(masks, axis=1, keepdims=True) valid_all = tf.cast( tf.reduce_all(tf.concat([valid_aspect, valid_ious, valid_masks], axis=1), axis=1), tf.int32) # One indexed, as zero is needed for the case of no matches. index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32) # Either one-hot, or zeros if there is no valid crop. selection = tf.equal(tf.reduce_max(index * valid_all), index) use_crop = tf.reduce_any(selection) output_ltrb = tf.reduce_sum(tf.multiply( ltrb, tf.tile(tf.cast(selection, tf.float32)[:, tf.newaxis], (1, 4))), axis=0) output_masks = tf.reduce_any(tf.logical_and( masks, tf.tile(selection[:, tf.newaxis], (1, num_boxes))), axis=0) return use_crop, output_ltrb, output_masks
def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config): """Generates detection targets for one image. Subsamples proposals and generates target class IDs, bounding box deltas, and masks for each. Inputs: proposals: [N, (y1, x1, y2, x2)] in normalized coordinates. Might be zero padded if there are not enough proposals. gt_class_ids: [MAX_GT_INSTANCES] int class IDs gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type. Returns: Target ROIs and corresponding class IDs, bounding box shifts, and masks. rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded. deltas: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Class-specific bbox refinements. masks: [TRAIN_ROIS_PER_IMAGE, height, width). Masks cropped to bbox boundaries and resized to neural network output size. Note: Returned arrays might be zero padded if not enough target ROIs. """ # Assertions asserts = [ tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], name="roi_assertion"), ] with tf.control_dependencies(asserts): proposals = tf.identity(proposals) # Remove zero padding proposals, _ = trim_zeros_graph(proposals, name="trim_proposals") gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes") gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, name="trim_gt_class_ids") gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2, name="trim_gt_masks") # Handle COCO crowds # A crowd box in COCO is a bounding box around several instances. Exclude # them from training. A crowd box is given a negative class ID. crowd_ix = tf.where(gt_class_ids < 0)[:, 0] non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] crowd_boxes = tf.gather(gt_boxes, crowd_ix) crowd_masks = tf.gather(gt_masks, crowd_ix, axis=2) gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) gt_boxes = tf.gather(gt_boxes, non_crowd_ix) gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2) # Compute overlaps matrix [proposals, gt_boxes] overlaps = overlaps_graph(proposals, gt_boxes) # Compute overlaps with crowd boxes [anchors, crowds] crowd_overlaps = overlaps_graph(proposals, crowd_boxes) crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) no_crowd_bool = (crowd_iou_max < 0.001) # Determine positive and negative ROIs roi_iou_max = tf.reduce_max(overlaps, axis=1) # 1. Positive ROIs are those with >= 0.5 IoU with a GT box positive_roi_bool = (roi_iou_max >= 0.5) positive_indices = tf.where(positive_roi_bool)[:, 0] # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds. negative_indices = tf.where( tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] # Subsample ROIs. Aim for 33% positive # Positive ROIs positive_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) positive_indices = tf.random_shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] # Negative ROIs. Add enough to maintain positive:negative ratio. r = 1.0 / config.ROI_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.random_shuffle(negative_indices)[:negative_count] # Gather selected ROIs positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) # Assign positive ROIs to GT boxes. positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.cond( tf.greater(tf.shape(positive_overlaps)[1], 0), true_fn=lambda: tf.argmax(positive_overlaps, axis=1), false_fn=lambda: tf.cast(tf.constant([]), tf.int64)) roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # Compute bbox refinement for positive ROIs deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes) deltas /= config.BBOX_STD_DEV # Assign positive ROIs to GT masks # Permute masks to [N, height, width, 1] transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1) # Pick the right mask for each ROI roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment) # Compute mask targets boxes = positive_rois if config.USE_MINI_MASK: # Transform ROI coordinates from normalized image space # to normalized mini-mask space. y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1) gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1) gt_h = gt_y2 - gt_y1 gt_w = gt_x2 - gt_x1 y1 = (y1 - gt_y1) / gt_h x1 = (x1 - gt_x1) / gt_w y2 = (y2 - gt_y1) / gt_h x2 = (x2 - gt_x1) / gt_w boxes = tf.concat([y1, x1, y2, x2], 1) box_ids = tf.range(0, tf.shape(roi_masks)[0]) masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes, box_ids, config.MASK_SHAPE) # Remove the extra dimension from masks. masks = tf.squeeze(masks, axis=3) # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with # binary cross entropy loss. masks = tf.round(masks) # Append negative ROIs and pad bbox deltas and masks that # are not used for negative ROIs with zeros. rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)]) return rois, roi_gt_class_ids, deltas, masks
def _make_dataset(self, binaries_fname_pattern, data_augmentation=False, shuffle=True): """Creates a CIFAR-100 data set (helper used by ``.make_*_datset`` below). Args: binaries_fname_pattern (str): Pattern of the ``.bin`` files from which to load images and labels (e.g. ``some/path/data_batch_*.bin``). data_augmentation (bool): Whether to apply data augmentation operations. shuffle (bool): Switch to turn on or off shuffling of the data set. Defaults to ``True``. Returns: A tf.data.Dataset yielding batches of CIFAR-100 data. """ # Set number of bytes to read. label_bytes = 1 label_offset = 1 num_classes = 100 depth = 3 image_size = 32 image_bytes = image_size * image_size * depth record_bytes = label_bytes + label_offset + image_bytes def parse_func(raw_record): """Function parsing data from raw binary records.""" # Decode raw_record. record = tf.reshape(tf.decode_raw(raw_record, tf.uint8), [record_bytes]) label = tf.cast(tf.slice(record, [label_offset], [label_bytes]), tf.int32) depth_major = tf.reshape( tf.slice(record, [label_bytes], [image_bytes]), [depth, image_size, image_size]) image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32) # Add image pre-processing. if data_augmentation: image = tf.image.resize_image_with_crop_or_pad( image, image_size + 4, image_size + 4) image = tf.random_crop(image, [32, 32, 3]) image = tf.image.random_flip_left_right(image) image = tf.image.random_brightness(image, max_delta=63. / 255.) image = tf.image.random_saturation(image, lower=0.5, upper=1.5) image = tf.image.random_contrast(image, lower=0.2, upper=1.8) else: image = tf.image.resize_image_with_crop_or_pad(image, 32, 32) image = tf.image.per_image_standardization(image) label = tf.squeeze(tf.one_hot(label, depth=num_classes)) return image, label with tf.name_scope(self._name): with tf.device('/cpu:0'): filenames = tf.matching_files(binaries_fname_pattern) filenames = tf.random_shuffle(filenames) data = tf.data.FixedLengthRecordDataset( filenames=filenames, record_bytes=record_bytes) data = data.map( parse_func, num_parallel_calls=(8 if data_augmentation else 4)) if shuffle: data = data.shuffle(buffer_size=20000) data = data.batch(self._batch_size, drop_remainder=True) data = data.prefetch(buffer_size=4) return data
else: x = np.random.normal(3.0, 0.6) y = np.random.normal(1.0, 0.6) xy.append([x, y]) return xy # input data tensor를 생성한다 n = 1000 tf.reset_default_graph() inputXY = tf.constant(createData(n)) # 초기 중심 좌표를 설정한다. k = 5 tmpXY = tf.random_shuffle(inputXY) # input data를 shuffling 한다 tmpCent = tf.slice(tmpXY, [0, 0], [k, -1]) # 앞의 k개를 중심좌표로 설정한다 initCent = tf.Variable(tmpCent) # 초기 중심좌표 텐서 # 데이터 ~ 중심좌표 사이의 거래 계산을 위해 dimension을 맞춘다. 3-차원 텐서. #expXY.get_shape() --> (D0, D1, D2) = (1, n, 2) : D0을 확장함 #expCent.get_shape() --> (D0, D1, D2) = (k, 1, 2) : D1을 확장함 expXY = tf.expand_dims(inputXY, 0) # D0-축을 확장한다 expCent = tf.expand_dims(initCent, 1) # D1-축을 확장한다 # 데이터와 중삼좌표 사이의 거리를 계산한다 tmpDist = tf.square(tf.subtract(expXY, expCent)) dist = tf.sqrt(tf.reduce_sum(tmpDist, 2)) # D2 축으로 합침 error = tf.reduce_sum(dist) # 거리의 총합을 error로 정의한다 # 각 데이터를 거리가 작은 중점에 assign 한다.
def main(): parser = argparse.ArgumentParser() parser.add_argument("-l", "--learner-hosts", type=str, required=True, help="Comma-separated list of hostname:port pairs") parser.add_argument("-e", "--explorer-hosts", type=str, required=True, help="Comma-separated list of hostname:port pairs") parser.add_argument("-j", "--job-name", type=str, default="learner", help="One of 'learner', 'explorer'") parser.add_argument("-t", "--task-index", type=int, default=0, help="Index of task within the job") parser.add_argument( "-s", "--steps-per-explorer", type=int, default=1000000, help= "Max. steps an explorer should make in its env (in total before stopping)." ) parser.add_argument( "-m", "--max-steps-per-episode", type=int, default=100, help="Max. steps an explorer should make in one episode.") parser.add_argument( "-b", "--buffer-size", type=int, default=1000, help= "Number of time steps to store (round robin) in the local buffers of each explorer. " "The size of the global buffer will be this number times the number of explorers." ) parser.add_argument( "--learn-batch-size", type=int, default=16, help= "Size of a batch (number of episodes) to pull randomly from the main buffer " "for each learner iteration.") parser.add_argument( "-f", "--upload-frequency", type=int, default=4, help= "Every how many episodes does an explorer upload its local buffer of episodes " "to the learners?") parser.add_argument("--num-hidden", type=int, default=10, help="Number of hidden nodes.") parser.add_argument("-g", "--gamma", type=float, default=0.97, help="The discount factor gamma (default 0.9).") parser.add_argument( "-a", "--learning-rate", type=float, default=0.0005, help="The learning rate (alpha) to use for optimizing the cost.") args = parser.parse_args() learner_hosts = args.learner_hosts.split(",") explorer_hosts = args.explorer_hosts.split(",") # Create a cluster from the given hosts (learners and explorers). cluster = tf.train.ClusterSpec({ "learner": learner_hosts, "explorer": explorer_hosts }) # Create and start a server for the local task. server = tf.train.Server(cluster, job_name=args.job_name, task_index=args.task_index) # simple 1 hidden layer, feed-forward network num_inputs = 1 # input is always [1.0] -> blind env num_hidden = args.num_hidden num_out = 2 + 1 # left and right actions + value function # 1 disc. return, 1 action (0=left, 1=right), len_buffer_record = 2 #config = tf.ConfigProto(log_device_placement=True) ##config.gpu_options.allow_growth = True # need to set this in order to be able to run locally with GPU #config.gpu_options.per_process_gpu_memory_fraction = 0.4 # the global main policy: pi (live on the learner(s)) # - explorers sync their own policies (mu) with this at the beginning of an episode with tf.device( tf.train.replica_device_setter( ps_tasks=len(learner_hosts), ps_device="/job:learner", worker_device="/job:explorer/task:0")): weights_1_pi = tf.Variable(tf.truncated_normal(shape=(num_inputs, num_hidden), stddev=0.01), name="pi-W1") biases_1_pi = tf.Variable(tf.zeros(shape=(num_hidden, )), name="pi-b1") weights_2_pi = tf.Variable(tf.truncated_normal(shape=(num_hidden, num_out), stddev=0.01), name="pi-W2") biases_2_pi = tf.Variable(tf.zeros(shape=(num_out, )), name="pi-b2") # main experience buffer (on central learner) # - create experience buffer in both explorer and learner, but host it on learner # - each explorer writes to a certain chunk in round robin fashion size_main_experience_buffer = args.buffer_size * len(explorer_hosts) with tf.device("/job:learner/task:0"): main_experience_buffer = tf.Variable(tf.zeros( [size_main_experience_buffer, len_buffer_record]), name="global-episode-buffer") global_step = tf.train.get_or_create_global_step() if args.job_name == "explorer": # local policy (mu) -> all zero; will be sync'd with pi anyway at start of each episode weights_1_mu = tf.Variable(tf.zeros(shape=(num_inputs, num_hidden)), name="mu-W1") biases_1_mu = tf.Variable(tf.zeros(shape=(num_hidden, )), name="mu-b1") weights_2_mu = tf.Variable(tf.zeros(shape=(num_hidden, num_out)), name="mu-W2") biases_2_mu = tf.Variable(tf.zeros(shape=(num_out, )), name="mu-b2") # ops that sync from the main policy (pi) (using locking) # - must fetch these after a reset of the env (before querying the first action in each episode) sync_ops = [ tf.assign(weights_1_mu, weights_1_pi, name="sync-W1"), tf.assign(biases_1_mu, biases_1_pi, name="sync-b1"), tf.assign(weights_2_mu, weights_2_pi, name="sync-W2"), tf.assign(biases_2_mu, biases_2_pi, name="sync-b2") ] # number of actions (log-action-probs) to sample num_action_samples = tf.placeholder(dtype=tf.int32, shape=()) # Buffer to store n (capacity) episodes of experiences (round-robin). # This one gets inserted into the learners global memory after each m (upload-frequency) episodes. # rank0=episode, rank1=step in episode, rank2=[action(0=left, 1=right), reward, mu(a)] experience_buffer = tf.Variable(tf.zeros( [args.buffer_size, len_buffer_record]), name="episode-buffer") experience_buffer_idx = tf.placeholder(dtype=tf.int32, shape=()) # In case we would like to use LSTM -> need to pass the initial internal state to learner # so it can replay the episode through pi (instead of mu). # init_internal_buffer = tf.Variable(tf.zeros([args.buffer_size, num_internal_state]), # name="init-internal-buffer") # upload op (from local experience buffer to global one) start = args.task_index * args.buffer_size stop = start + args.buffer_size experience_upload = tf.assign(main_experience_buffer[start:stop, :], experience_buffer) # forward pass -> let local explorer handle this (as it's needed right here for querying actions) hidden_out = tf.add( tf.matmul(tf.ones(shape=(num_action_samples, num_inputs)), weights_1_mu), biases_1_mu) logits = tf.add(tf.matmul(hidden_out, weights_2_mu), biases_2_mu) action_prob = tf.nn.softmax( logits[:, :2]) # first two outputs are action logits action_distr = tf.distributions.Bernoulli(probs=action_prob[:, 1]) actions = action_distr.sample(sample_shape=num_action_samples) # store incoming step (a, R) in local experience buffer a_in = tf.placeholder(dtype=tf.float32, shape=(None, ), name="a-in") # 0.0=left, 1.0=right returns_in = tf.placeholder( dtype=tf.float32, shape=(None, ), name="returns-in") # None=timesteps in the episode # concat returns and log_aps within each timestep episode = tf.concat( [tf.expand_dims(a_in, 1), tf.expand_dims(returns_in, 1)], 1) episode_len = tf.shape(episode)[0] stop = experience_buffer_idx + episode_len # don't have to lock as the only one that's ever touching the local buffer is ourselves add_episode = tf.cond( stop <= args.buffer_size, # true fn lambda: tf.group( tf.assign(experience_buffer[experience_buffer_idx:stop], episode, use_locking=False)), # false fn lambda: tf.group( tf.assign(experience_buffer[experience_buffer_idx:], episode[:args.buffer_size - experience_buffer_idx], use_locking=False), tf.assign(experience_buffer[:episode_len - ( args.buffer_size - experience_buffer_idx)], episode[args.buffer_size - experience_buffer_idx:], use_locking=False))) # create our own private env env = Env() total_steps = 0 num_episodes = 0 with tf.train.MonitoredTrainingSession( master=server.target, is_chief=(args.task_index == 0), #config=config, hooks=[]) as mon_sess: while total_steps < args.steps_per_explorer: rs = [] # discounted accum. rewards over one episode as_ = [] # the actual actions taken # reset the env env.reset() episode_steps = 0 buffer_idx = 0 # update our mu with pi from learner fetches = mon_sess.run(sync_ops) while episode_steps < args.max_steps_per_episode and total_steps < args.steps_per_explorer: s = env.state a = mon_sess.run(actions, feed_dict={num_action_samples: 1}) a = a[0][0] # a=0 for 'left', a=1 for 'right' as_.append(a) r, is_terminal = env.execute(a) rs.append(r) print( "{:03d} explorer {}: s={} action {} (1=right) s'={} is-term={}" .format(total_steps, args.task_index, s, a, env.state, is_terminal)) total_steps += 1 episode_steps += 1 if is_terminal: # calculate discounted accumulated rewards (returns) returns = discount(rs, args.gamma) # add episode to our buffer mon_sess.run( [add_episode], feed_dict={ returns_in: returns, a_in: as_, experience_buffer_idx: buffer_idx }) env.reset() buffer_idx = (buffer_idx + len(rs)) % args.buffer_size rs = [] as_ = [] episode_steps = 0 num_episodes += 1 fetches = sync_ops if num_episodes % args.upload_frequency == 0: print( "num_episodes={}: uploading local buffer to global buffer" .format(num_episodes)) fetches.append(experience_upload) fetches = mon_sess.run(fetches) print("Explorer {} is done!".format(args.task_index)) # - every learner iteration, it samples randomly from the main buffer and learns else: # build the pi-network (similar to mu-network above) #with tf.device("/gpu:0"): hidden_out = tf.add( tf.matmul(tf.ones(shape=(args.learn_batch_size, num_inputs)), weights_1_pi), biases_1_pi) logits = tf.add(tf.matmul(hidden_out, weights_2_pi), biases_2_pi) action_right_prob = tf.nn.softmax( logits[:, :2])[:, 1] # first two outputs are action logits (1=right) avg_action_right_prob = tf.reduce_mean(action_right_prob) values = logits[:, 2:3] # last output is the state-value # for now: do simple REINFORCE (add v-trace later or directly to tensorforce as it's not really different) # get a random batch from the main buffer indexes = tf.random_shuffle(tf.range(size_main_experience_buffer)) sample = tf.gather(main_experience_buffer, indexes[:args.learn_batch_size]) # separate action (0=left, 1=right) and discounted accum. reward actions, returns = tf.split(sample, num_or_size_splits=len_buffer_record, axis=1) # probability of the action actually taken action_prob = tf.abs( tf.subtract(actions, tf.ones(tf.shape(actions))) + action_right_prob) log_action_prob = tf.log(action_prob) advantage = (returns - values) # define our loss function alpha_1 = 0.5 # log-action-prob loss alpha_2 = 0.4 # value function loss alpha_3 = 0.1 # regularization loss = - alpha_1 * tf.reduce_mean(tf.multiply(log_action_prob, advantage)) + \ alpha_2 * tf.reduce_mean(0.5 * tf.square(advantage)) + \ alpha_3 * 0 # TODO: add regularization train_op = tf.train.AdagradOptimizer(args.learning_rate).minimize( loss, global_step=global_step) with tf.train.MonitoredTrainingSession(master=server.target, is_chief=(args.task_index == 0), hooks=[]) as mon_sess: while not mon_sess.should_stop(): sample_out, _, loss_out, g_step, avg_right_prob_out = mon_sess.run( [ sample, train_op, loss, global_step, avg_action_right_prob ]) print("task {} step {} loss {} avg-right-prob={}".format( args.task_index, g_step, loss_out, avg_right_prob_out)) print("Learner {} is done!".format(args.task_index))
def batch_inputs(self, dataset, train): """Contruct batches of training or evaluation examples from the image input_data. Args: dataset: instance of Dataset class specifying the input_data. See input_data.py for details. batch_size: integer train: boolean num_preprocess_threads: integer, total number of preprocessing threads num_readers: integer, number of parallel readers Returns: images: 4-D float Tensor of a batch of images labels: 1-D integer Tensor of [batch_size]. Raises: ValueError: if data is not found """ with tf.name_scope('batch_processing'): data_files = dataset.data_files() if data_files is None: raise ValueError('No data files found for this input_data') # Create filename_queue if train: filename_queue = tf.train.string_input_producer(data_files, shuffle=True, capacity=16) else: filename_queue = tf.train.string_input_producer(data_files, shuffle=False, capacity=1) # Approximate number of examples per shard. examples_per_shard = 1024 # Size the random shuffle queue to balance between good global # mixing (more examples) and memory use (fewer examples). # 1 image uses 299*299*3*4 bytes = 1MB # The default input_queue_memory_factor is 16 implying a shuffling queue # size: examples_per_shard * 16 * 1MB = 17.6GB min_queue_examples = examples_per_shard * self.input_queue_memory_factor if train: examples_queue = tf.RandomShuffleQueue( capacity=min_queue_examples + 3 * self.batch_size, min_after_dequeue=min_queue_examples, dtypes=[tf.string]) else: examples_queue = tf.FIFOQueue(capacity=examples_per_shard + 3 * self.batch_size, dtypes=[tf.string]) # Create multiple readers to populate the queue of examples. if self.num_readers > 1: enqueue_ops = [] for _ in range(self.num_readers): reader = dataset.reader() _, value = reader.read(filename_queue) enqueue_ops.append(examples_queue.enqueue([value])) tf.train.queue_runner.add_queue_runner( tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops)) example_serialized = examples_queue.dequeue() else: reader = dataset.reader() _, example_serialized = reader.read(filename_queue) pos_queue = None neg_queue = None if self.batch_size < 2: pos_queue = tf.RandomShuffleQueue( name="pos-queue", capacity=10, min_after_dequeue=5, dtypes=[tf.float32, tf.float32, tf.string]) neg_queue = tf.RandomShuffleQueue( name="neg-queue", capacity=10, min_after_dequeue=5, dtypes=[tf.float32, tf.float32, tf.string]) pos_queue_enq = [] neg_queue_enq = [] with tf.name_scope('split-merge'): if train and self.ensure_posneg_balance: images_and_masks = [] for thread_id in range(self.num_preprocess_threads): # Parse a serialized Example proto to extract the image and metadata. image_buffer, mask_buffer, img_name_ = self.parse_example_proto( example_serialized) image_ = self.image_preprocessing( image_buffer, img_size=(self.input_size[0], self.input_size[1]), num_channels=self.input_size[2]) mask_ = self.image_preprocessing( mask_buffer, img_size=(self.mask_size[0], self.mask_size[1]), num_channels=self.mask_size[2]) image_ = tf.expand_dims(image_, 0) mask_ = tf.expand_dims(mask_, 0) img_name_ = tf.expand_dims(img_name_, 0) img_shape = tf.TensorShape([ image_.shape[1], image_.shape[2], image_.shape[3] ]) mask_shape = tf.TensorShape( [mask_.shape[1], mask_.shape[2], mask_.shape[3]]) img_name_shape = tf.TensorShape([]) # initialize pos/neg queues with proper shape size on first if pos_queue is None or neg_queue is None: pos_queue = tf.RandomShuffleQueue( name="pos-queue", capacity=10, min_after_dequeue=5, dtypes=[tf.float32, tf.float32, tf.string], shapes=[img_shape, mask_shape, img_name_shape]) neg_queue = tf.RandomShuffleQueue( name="neg-queue", capacity=10, min_after_dequeue=5, dtypes=[tf.float32, tf.float32, tf.string], shapes=[img_shape, mask_shape, img_name_shape]) is_pos = tf.squeeze( tf.reduce_sum(mask_, [1, 2], keep_dims=False)) neg_mask = tf.less_equal(is_pos, 0) pos_idx = tf.reshape( tf.where([tf.logical_not(neg_mask)]), [-1]) neg_idx = tf.reshape(tf.where([neg_mask]), [-1]) pos_data = [ tf.gather(image_, pos_idx), tf.gather(mask_, pos_idx), tf.gather(img_name_, pos_idx) ] neg_data = [ tf.gather(image_, neg_idx), tf.gather(mask_, neg_idx), tf.gather(img_name_, neg_idx) ] pos_queue_enq.append(pos_queue.enqueue_many(pos_data)) neg_queue_enq.append(neg_queue.enqueue_many(neg_data)) tf.train.queue_runner.add_queue_runner( tf.train.queue_runner.QueueRunner( pos_queue, pos_queue_enq)) tf.train.queue_runner.add_queue_runner( tf.train.queue_runner.QueueRunner( neg_queue, neg_queue_enq)) if self.batch_size >= 2: if self.batch_size % 2 != 0: raise Exception( "'batch_size' mod 2 != 0 ! only even batch sizes supported at the moment" ) num_deque = int(self.batch_size / 2) pos_data = pos_queue.dequeue_many(num_deque) neg_data = neg_queue.dequeue_many(num_deque) concat_data = [ tf.concat([pos_data[0], neg_data[0]], axis=0, name='Concat-img'), tf.concat([pos_data[1], neg_data[1]], axis=0, name='Concat-mask'), tf.concat([pos_data[2], neg_data[2]], axis=0, name='Concat-img-name') ] # randomly permute within batch size (is this even necessary ??) idx = tf.Variable(range(0, self.batch_size), trainable=False, dtype=tf.int32) idx = tf.random_shuffle(idx) images = tf.gather(concat_data[0], idx) masks = tf.gather(concat_data[1], idx) img_names = tf.gather(concat_data[2], idx) else: # positive only #images, masks, img_names = pos_queue.dequeue() # negative only #images, masks, img_names = neg_queue.dequeue() # mix 50/50 counter = tf.Variable(initial_value=0, trainable=False, dtype=tf.int32) counter = tf.assign_add(counter, 1) condition_term = tf.equal(tf.mod(counter, 2), tf.constant(0)) images, masks, img_names = tf.cond( condition_term, lambda: pos_queue.dequeue(), lambda: neg_queue.dequeue()) if self.use_random_rotation: images.set_shape( tensor_shape.as_shape([None, None, 1])) masks.set_shape( tensor_shape.as_shape([None, None, 1])) # randomly rotate image by 90 degrees rot_factor = tf.random_uniform([1], minval=0, maxval=3, dtype=tf.int32) rot_factor = tf.gather(rot_factor, 0) images = tf.image.rot90(images, k=rot_factor) masks = tf.image.rot90(masks, k=rot_factor) images = tf.expand_dims(images, axis=0) masks = tf.expand_dims(masks, axis=0) img_names = tf.expand_dims(img_names, axis=0) else: # Parse a serialized Example proto to extract the image and metadata. image_buffer, mask_buffer, img_names = self.parse_example_proto( example_serialized) images = self.image_preprocessing( image_buffer, img_size=(self.input_size[0], self.input_size[1]), num_channels=self.input_size[2]) masks = self.image_preprocessing( mask_buffer, img_size=(self.mask_size[0], self.mask_size[1]), num_channels=1) images = tf.expand_dims(images, axis=0) masks = tf.expand_dims(masks, axis=0) img_names = tf.expand_dims(img_names, axis=0) # Reshape images into these desired dimensions. images = tf.cast(images, tf.float32) masks = tf.cast(masks, tf.float32) images.set_shape( tensor_shape.as_shape( [self.batch_size, None, None, self.input_size[2]])) masks.set_shape( tensor_shape.as_shape([ self.batch_size, self.input_size[0], self.input_size[1], self.mask_size[2] ])) # Display the training images in the visualizer. tf.summary.image('images', images) tf.summary.image('masks', masks) return images, masks, img_names
for i in range(num_vectors): if np.random.random() > 0.5: x_values.append(np.random.normal(0.4, 0.7)) y_values.append(np.random.normal(0.2, 0.8)) else: x_values.append(np.random.normal(0.6, 0.4)) y_values.append(np.random.normal(0.8, 0.5)) # 將X陣列與Y陣列合併為一項量陣列-> [[x0,y0],[x1,y1]..] vector_values = list(zip(x_values, y_values)) vectors = tf.constant(vector_values) # 取得陣列維度大小 n_samples = tf.shape(vector_values)[0] # 製作一n_samples數量的編號陣列,範圍為0~n_samples-1 sample_range = tf.range(0, n_samples) # 將編號陣列進行洗牌 random_indices = tf.random_shuffle(sample_range) begin = [ 0, ] size = [ num_clusters, ] size[0] = num_clusters # 從洗牌後的編號陣列中取出num_clusters個值,型態為[a1,a2,a3,...] centroid_indices = tf.slice(random_indices, begin, size) # 再利用上述值從原陣列中取得num_clusters個中心點,型態為 [[xa,ya],[xb,yb],...] centroids = tf.Variable(tf.gather(vector_values, centroid_indices)) # 增加維度
def model(params, examples, labels, epsilon): serp_len = params['serp_len'] doc_emb_size = params['doc_emb'][-1] hidden_state_size = params['hidden_state_size'] docs = examples['doc_tensors'] batch_size = docs.shape[0].value batch_max_docs = tf.shape(docs)[1] docs_per_query = examples['n_docs'] if params['context_input']: to_shuffle = tf.concat([tf.cast(labels[:, :, None], tf.float32), docs], axis=2) shuffled = tf.random_shuffle(tf.transpose(to_shuffle, [1, 0, 2])) shuffled = tf.transpose(shuffled, [1, 0, 2]) labels = tf.cast(tf.slice(shuffled, [0, 0, 0], [-1, -1, 1]), tf.int32) labels = labels[:, :, 0] docs = tf.slice(shuffled, [0, 0, 1], [-1, -1, -1]) # assert not params['context_input'], 'Context not supported for GRU.' result = { 'docs_per_query': docs_per_query, } doc_emb = mu._shared_doc_embeddings(docs, params, '/main/doc_emb', inference=True) hidden_init = tf.zeros([batch_size, hidden_state_size]) if params['context_input']: context_gru_fn = ru.get_gru_layer(params, '/main/gru/context', label_network=False, inference=True, reuse_variable_scope=False) scan_input = tf.transpose(doc_emb, [1, 0, 2]) context = tf.scan(context_gru_fn, scan_input, hidden_init) ind_nd = tf.concat([docs_per_query - 1, tf.range(batch_size)[:, None]], axis=1) hidden_init = tf.gather_nd(context, ind_nd) gru_fn = ru.get_gru_layer(params, '/main/gru', label_network=False, inference=True, reuse_variable_scope=False) policy = mu.EpsilonGreedy(epsilon, batch_size, batch_max_docs, docs_per_query) hidden_state = hidden_init #tf.zeros([n_docs, hidden_state_size]) serp = [] serp_labels = [] serp_ind = [] for i in range(serp_len): hidden_states = tf.tile(hidden_state[:, None, :], [1, batch_max_docs, 1]) score_input = tf.concat([hidden_states, doc_emb], axis=2) scores = mu._create_subnetwork(score_input, params, subnetwork_name='/main/scoring', label_network=False, reuse_variable_scope=i > 0, inference=True) action = policy.choose(scores) serp_ind.append(action) nd_ind = tf.stack([tf.range(batch_size, dtype=tf.int64), action], axis=1) select_doc = tf.gather_nd(docs, nd_ind) select_labels = tf.gather_nd(labels, nd_ind)[:, None] tf.summary.scalar('policy/scores/pos_%d' % i, tf.reduce_mean(tf.gather_nd(scores, nd_ind))) serp_labels.append( tf.where( tf.less(i, docs_per_query), select_labels, tf.zeros([batch_size, 1], dtype=tf.int32), )) serp.append(select_doc) if i < serp_len - 1: select_emb = tf.gather_nd(doc_emb, nd_ind) hidden_state = gru_fn(hidden_state, select_emb) result['serp'] = tf.stack(serp, axis=1) result['serp_ind'] = tf.stack(serp_ind, axis=1) result['labels'] = tf.concat(serp_labels, axis=1) tf.summary.histogram("label/output", result['labels']) # if params['context_input']: max_docs = params['max_docs'] padding = tf.convert_to_tensor([[0, 0], [0, max_docs - batch_max_docs], [0, 0]]) padded_docs = tf.pad(docs, padding, "CONSTANT") padded_docs = tf.reshape(padded_docs, [batch_size, max_docs, docs.shape[2].value]) result['docs'] = padded_docs return result
def generate_curves(self): ''' 生成数据 生成函数输出为浮点型,输入为-2到2之间 返回:NPRegressionDescription 命名元组 ''' # 从均匀分布中随机采样 num_context = tf.random_uniform(shape=[], minval=3, maxval=self._max_num_context, dtype=tf.int32) # 测试过程中,生成更多的点以绘制图像 if self._testing: # 此处需要配合 x_size 进行变换 num_target = tf.cast(400 / self._x_size, dtype=tf.int32) num_total_points = num_target # tf.tile: 在同一维度上复制 # 生成 -2. 到 2. 的步长为 0.01 的数据 x_values = tf.tile(tf.expand_dims(tf.range(-2., 2., 1. / 100, dtype=tf.float32), axis=0), [self._batch_size, 1]) x_values = tf.expand_dims(x_values, axis=-1) # reshape 成 x_size 的形状 x_values = tf.reshape(x_values, (self._batch_size, num_total_points, self._x_size)) else: # 训练过程中,随机选择目标点个数与他们的 x 坐标 num_target = tf.random_uniform(shape=(), minval=0, maxval=self._max_num_context - num_context, dtype=tf.int32) num_total_points = num_context + num_target x_values = tf.random_uniform([self._batch_size, num_total_points, self._x_size], minval=-2, maxval=2) # 设置核函数参数 if self._random_kernel_parameters: # 随机选择参数 l1 = tf.random_uniform([self._batch_size, self._y_size, self._x_size], 0.1, self._l1_scale) sigma_f = tf.random_uniform([self._batch_size, self._y_size], 0.1, self._sigma_scale) else: # 使用固定参数 l1 = tf.ones([self._batch_size, self._y_size, self._x_size]) * self._l1_scale sigma_f = tf.ones([self._batch_size, self._y_size]) * self._sigma_scale # [B, y_size, num_total_points, num_total_points] kernel = self._gaussian_kernels(x_values, l1, sigma_f) # Cholesky 分解 cholesky = tf.cast(tf.cholesky(tf.cast(kernel, tf.float64)), tf.float32) # 采样 # [B, y_size, num_total_points, 1] y_values = tf.matmul(cholesky, tf.random_uniform([self._batch_size, self._y_size, num_total_points, 1])) # 矩阵转置,按照 perm 排列原始维度;tf.squeeze 删除所有大小为 1 的维度 # [B, num_total_points, y_size] y_values = tf.transpose(tf.squeeze(y_values, 3), perm=[0, 2, 1]) if self._testing: target_x = x_values target_y = y_values idx = tf.random_shuffle(tf.range(num_target)) # tf.gather: 根据索引从 params 中读取数据 context_x = tf.gather(params=x_values, indices=idx[:num_context], axis=1) context_y = tf.gather(params=y_values, indices=idx[:num_context], axis=1) else: target_x = x_values[:, :num_target + num_context, :] target_y = y_values[:, :num_target + num_context, :] context_x = x_values[:, :num_context, :] context_y = y_values[:, :num_context, :] query = ((context_x, context_y), target_x) return NPRegressionDescription( query=query, target_y=target_y, num_total_points=tf.shape(target_x)[1], num_context_points=num_context )
def _subsampling(self, normalized_rois, gt_bboxes, gt_labels, pos_iou_thresh=0.5, exclusive_iou_tresh=0.1, pos_ratio=0.25): """正解データとのIoUを基にRoIをサンプリングする。 IoUがpos_iou_thresh以上であるRoIをオブジェクトとみなす。 オブジェクトはサンプルの25%以内とする。(n_samples_per_batch * pos_ratio 以内) pos_iou_thresh未満、exclusive_iou_thresh以上は非オブジェクトとみなす。 exclusive_iou_thresh未満は偶然の一致であり意味なし(難解)なので無視。 ※論文ではheuristic for hard example mining.と記載されている点。 バッチ毎のサンプル数はn_samples_per_batch以内とする。 (n_samples_per_batch未満の場合は、n_samples_per_batchになるよう0パディングする。) 上記のサンプリングに対応する正解データのラベル、また、BBoxとのオフセットも得る。 Args: normalized_rois (tensor) : RegionProposalLayerで得られたRoI。 (N, n_rois, 4) 3軸目は領域の左上と右下の座標が0〜1に正規化された値。 入力画像サイズの高さ、幅で除算することで正規化された値。 (y1, x1, y2, x2) gt_bboxes (ndarray) : 正解BBox。 (N, config.n_max_gt_objects_per_image, 4) 座標は正規化されていない。 gt_labels (ndarray) : 正解ラベル。 (N, config.n_max_gt_objects_per_image) ==0:背景データ >=1:オブジェクト Returns: sample_rois (tensor): サンプリングしたRoI。 (N, n_samples_per_batch, 4) 3軸目の座標は0〜1に正規化された値。 sample_gt_offset (tensor): サンプリングしたRoIに対応するBBoxとのオフセット。 (N, n_samples_per_batch, 4) 3軸目の座標は0〜1に正規化された値をself.config.bbox_refinement_stdで割ることで標準化した値。 sample_gt_labels (tensor): サンプリングしたRoIに対応するBBoxのラベル。 (N, n_samples_per_batch) """ pos_roi_per_batch = round(self.n_samples_per_batch * pos_ratio) # gt_bboxesをnormalized_roisに合わせて正規化する。 # これでIoUが評価出来るようになる。 input_h = self.config.image_shape[0] input_w = self.config.image_shape[1] normalized_gt_bboxes = bbox.normalize_bbox(gt_bboxes, input_h, input_w) # 入力をバッチ毎に分割 normalized_rois = tf.split(normalized_rois, self.config.batch_size) normalized_gt_bboxes = tf.split(normalized_gt_bboxes, self.config.batch_size) gt_labels = tf.split(gt_labels, self.config.batch_size) sample_rois = [] sample_gt_offsets = [] sample_gt_labels = [] for roi, gt_bbox, gt_label in zip(normalized_rois, normalized_gt_bboxes, gt_labels): # 0次元目(バッチサイズ)は不要なので削除 roi = log.tfprint(roi, "roi: ") gt_bbox = log.tfprint(gt_bbox, "gt_bbox: ") gt_label = log.tfprint(gt_label, "gt_label: ") roi = K.squeeze(roi, 0) gt_bbox = K.squeeze(gt_bbox, 0) gt_label = K.squeeze(gt_label, 0) roi = log.tfprint(roi, "roi_squeezed: ") gt_bbox = log.tfprint(gt_bbox, "gt_bbox_squeezed: ") gt_label = log.tfprint(gt_label, "gt_label_squeezed: ") # ゼロパディング行を除外 # K.gather(zero, K.squeeze(tf.where(K.any(zero, axis=1)), -1) ) idx_roi_row = K.flatten(tf.where(K.any(roi, axis=1))) idx_gt_bbox = K.flatten(tf.where(K.any(gt_bbox, axis=1))) roi = K.gather(roi, idx_roi_row) # gt_bboxとgt_labelは行数と行の並びが同じなので同じidxを利用できる gt_bbox = K.gather(gt_bbox, idx_gt_bbox) gt_label = K.gather(gt_label, idx_gt_bbox) gt_bbox = log.tfprint(gt_bbox, "gt_bbox_gathered: ") gt_label = log.tfprint(gt_label, "gt_label_gathered: ") # IoUを求める。 # (n_rois, ) ious = bbox.get_iou_K(roi, gt_bbox) ious = log.tfprint(ious, "ious: ") # 各RoI毎にIoU最大のBBoxの位置を得る idx_max_gt = K.argmax(ious, axis=1) idx_max_gt = log.tfprint(idx_max_gt, "idx_max_gt: ") max_iou = K.max(ious, axis=1) # max_iouの行数はroiと同じになる max_iou = log.tfprint(max_iou, "max_iou: ") idx_pos = K.flatten(tf.where(max_iou >= pos_iou_thresh)) # positiveサンプル数をpos_roi_per_batch以内に制限 limit_pos = K.minimum(pos_roi_per_batch, K.shape(idx_pos)[0]) idx_pos = K.switch( K.shape(idx_pos)[0] > 0, tf.random_shuffle(idx_pos)[:limit_pos], idx_pos) limit_pos = log.tfprint(limit_pos, "limit_pos: ") idx_pos = log.tfprint(idx_pos, "idx_pos: ") # negativeサンプル数を # n_samples_per_batch - pos_roi_per_batch # に制限 idx_neg = K.flatten( tf.where((max_iou < pos_iou_thresh) & (max_iou >= exclusive_iou_tresh))) # negativeサンプル数は pos_roi_per_batch - limit_pos(つまり残り) 以内に制限 limit_neg = self.n_samples_per_batch - limit_pos limit_neg = K.minimum(limit_neg, K.shape(idx_neg)[0]) idx_neg = K.switch( K.shape(idx_neg)[0] > 0, tf.random_shuffle(idx_neg)[:limit_neg], idx_neg) limit_neg = log.tfprint(limit_neg, "limit_neg: ") idx_neg = log.tfprint(idx_neg, "idx_neg: ") # 返却するサンプルを抽出 # GTのoffsets, labelsは各roisに対応させる。つまり、同じ位置に格納する。 idx_keep = K.concatenate((idx_pos, idx_neg)) idx_keep = log.tfprint(idx_keep, "idx_keep: ") # 各RoIの最大IoUを示すIndexについても、上記返却するサンプルのみを残す。 idx_gt_keep = K.gather(idx_max_gt, idx_keep) # IoUが閾値以上のPositiveとみなされるサンプルのみを残すためのIndex。 idx_gt_keep_pos = K.gather(idx_max_gt, idx_pos) idx_gt_keep = log.tfprint(idx_gt_keep, "idx_gt_keep: ") sample_roi = K.gather(roi, idx_keep) sample_gt_offset = bbox.get_offset_K( sample_roi, K.gather(gt_bbox, idx_gt_keep)) # negativeな要素には0を設定 sample_gt_label = K.concatenate(( K.cast(K.gather(gt_label, idx_gt_keep_pos), dtype='int32'), K.zeros( [limit_neg], # K.zerosは0階テンソルを受け付けないので配列化。。。 dtype='int32'))) # 行数がn_samples_per_batch未満の場合は0パディング remain = tf.maximum( self.n_samples_per_batch - tf.shape(sample_roi)[0], 0) sample_roi = tf.pad(sample_roi, [(0, remain), (0, 0)], name='subsample_sample_roi') sample_gt_offset = tf.pad(sample_gt_offset, [(0, remain), (0, 0)], name='subsample_sample_gt_offset') sample_gt_offset /= self.config.bbox_refinement_std sample_gt_label = tf.pad(sample_gt_label, [(0, remain)], name='subsample_sample_gt_label') sample_roi = log.tfprint(sample_roi, "sample_roi: ") sample_gt_offset = log.tfprint(sample_gt_offset, "sample_gt_offset: ") sample_gt_label = log.tfprint(sample_gt_label, "sample_gt_label: ") sample_rois.append(sample_roi) sample_gt_offsets.append(sample_gt_offset) sample_gt_labels.append(sample_gt_label) return [ K.stack(sample_rois), K.stack(sample_gt_offsets), K.stack(sample_gt_labels) ]
def main(self): with tf.Graph().as_default() as graph, tf.device('/cpu:0'): num_gpu = len(cfgs.GPU_GROUP.strip().split(',')) global_step = slim.get_or_create_global_step() lr = self.warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu * cfgs.BATCH_SIZE) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) retinanet = build_whole_network_batch.DetectionNetworkRetinaNet( cfgs=self.cfgs, is_training=True) with tf.name_scope('get_batch'): if cfgs.IMAGE_PYRAMID: shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN) shortside_len = tf.random_shuffle(shortside_len_list)[0] else: shortside_len = cfgs.IMG_SHORT_SIDE_LEN img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ self.reader.next_batch(dataset_name=cfgs.DATASET_NAME, batch_size=cfgs.BATCH_SIZE * num_gpu, shortside_len=shortside_len, is_training=True) # data processing inputs_list = [] for i in range(num_gpu): start = i * cfgs.BATCH_SIZE end = (i + 1) * cfgs.BATCH_SIZE img = img_batch[start:end, :, :, :] pretrain_zoo = PretrainModelZoo() if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo: img = img / tf.constant([cfgs.PIXEL_STD]) gtboxes_and_label_r = tf.py_func( backward_convert, inp=[ tf.reshape(gtboxes_and_label_batch[start:end], [-1, 9]) ], Tout=tf.float32) gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [cfgs.BATCH_SIZE, -1, 6]) gtboxes_and_label_h = get_horizen_minAreaRectangle( tf.reshape(gtboxes_and_label_batch[start:end], [-1, 9])) gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [cfgs.BATCH_SIZE, -1, 5]) num_objects = num_objects_batch[start:end] num_objects = tf.cast( tf.reshape(num_objects, [ cfgs.BATCH_SIZE, -1, ]), tf.float32) img_h = img_h_batch[start:end] img_w = img_w_batch[start:end] inputs_list.append([ img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects, img_h, img_w ]) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer( cfgs.WEIGHT_DECAY) with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope( [ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf. constant_initializer(0.0)): gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func( self.get_gtboxes_and_label, inp=[ inputs_list[i][1], inputs_list[i][2], inputs_list[i][3] ], Tout=[tf.float32, tf.float32]) gtboxes_and_label_h = tf.reshape( gtboxes_and_label_h, [cfgs.BATCH_SIZE, -1, 5]) gtboxes_and_label_r = tf.reshape( gtboxes_and_label_r, [cfgs.BATCH_SIZE, -1, 6]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] h_crop = tf.reduce_max(img_shape[0]) w_crop = tf.reduce_max(img_shape[1]) img = tf.image.crop_to_bounding_box( image=img, offset_height=0, offset_width=0, target_height=tf.cast( h_crop, tf.int32), target_width=tf.cast(w_crop, tf.int32)) outputs = retinanet.build_whole_detection_network( input_img_batch=img, gtboxes_batch_h=gtboxes_and_label_h, gtboxes_batch_r=gtboxes_and_label_r, gpu_id=i) gtboxes_in_img_h = self.drawer.draw_boxes_with_categories( img_batch=tf.expand_dims( img[0, :, :, :], axis=0), boxes=gtboxes_and_label_h[0, :, :-1], labels=gtboxes_and_label_h[0, :, -1], method=0) gtboxes_in_img_r = self.drawer.draw_boxes_with_categories( img_batch=tf.expand_dims( img[0, :, :, :], axis=0), boxes=gtboxes_and_label_r[0, :, :-1], labels=gtboxes_and_label_r[0, :, -1], method=1) tf.summary.image( 'Compare/gtboxes_h_gpu:%d' % i, gtboxes_in_img_h) tf.summary.image( 'Compare/gtboxes_r_gpu:%d' % i, gtboxes_in_img_r) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = self.drawer.draw_boxes_with_categories_and_scores( img_batch=tf.expand_dims( img[0, :, :, :], axis=0), boxes=outputs[0], scores=outputs[1], labels=outputs[2], method=1) tf.summary.image( 'Compare/final_detection_gpu:%d' % i, detections_in_img) loss_dict = outputs[-1] total_loss_dict, total_losses = self.loss_dict( loss_dict, num_gpu) if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n( regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) if cfgs.GRADIENT_CLIPPING_BY_NORM is not None: grads = slim.learning.clip_gradient_norms( grads, cfgs.GRADIENT_CLIPPING_BY_NORM) tower_grads.append(grads) self.log_printer(retinanet, optimizer, global_step, tower_grads, total_loss_dict, num_gpu * cfgs.BATCH_SIZE, graph)
def detect_targets_graph(gt_boxes, gt_class_ids, proposals, train_rois_per_image, roi_positive_ratio): """ 每个图像生成检测网络的分类和回归目标 IoU>=0.5的为正样本;IoU<0.5的为负样本 :param gt_boxes: GT 边框坐标 [MAX_GT_BOXs, (y1,x1,y2,x2,tag)] ,tag=0 为padding :param gt_class_ids: GT 类别 [MAX_GT_BOXs, 1+1] ;最后一位为tag, tag=0 为padding :param proposals: [N,(y1,x1,y2,x2,tag)] ,tag=0 为padding :param train_rois_per_image: 每张图像训练的proposal数量 :param roi_positive_ratio: proposal正负样本比 :return: """ # 去除padding gt_boxes = tf_utils.remove_pad(gt_boxes) gt_class_ids = tf_utils.remove_pad(gt_class_ids)[:, 0] # 从[N,1]变为[N] proposals = tf_utils.remove_pad(proposals) proposals_num = tf.shape(proposals)[0] # 计算iou iou = compute_iou(gt_boxes, proposals) # [gt_num,rois_num] # iou >=0.5为正 proposals_iou_max = tf.reduce_max(iou, axis=0) # [rois_num] positive_indices = tf.where( tf.logical_and(tf.equal(iou, proposals_iou_max), tf.greater_equal(iou, 0.5))) gt_pos_idx = positive_indices[:, 0] # 第一维gt索引 proposal_pos_idx = positive_indices[:, 1] # 第二位rois索引 match_gt_num = tf.shape(tf.unique(gt_pos_idx)[0])[0] # shuffle 前匹配的gt num gt_boxes_pos = tf.gather(gt_boxes, gt_pos_idx) class_ids = tf.gather(gt_class_ids, gt_pos_idx) proposal_pos = tf.gather(proposals, proposal_pos_idx) # 根据正负样本比确定最终的正样本 positive_num = tf.minimum( tf.shape(proposal_pos)[0], int(train_rois_per_image * roi_positive_ratio)) gt_boxes_pos, class_ids, proposal_pos, gt_pos_idx = shuffle_sample( [gt_boxes_pos, class_ids, proposal_pos, gt_pos_idx], tf.shape(proposal_pos)[0], positive_num) match_gt_num_after_shuffle = tf.shape( tf.unique(gt_pos_idx)[0])[0] # shuffle 后匹配的gt num # 计算回归目标 deltas = regress_target(proposal_pos, gt_boxes_pos) # 负样本:与所有GT的iou<0.5且iou>0.1 proposal_iou_max = tf.reduce_max(iou, axis=0) proposal_neg_idx = tf.cond( # 需要考虑GT个数为0的情况;全部都是负样本 tf.greater(tf.shape(gt_boxes)[0], 0), true_fn=lambda: tf.where( tf.logical_and(proposal_iou_max < 0.5, proposal_iou_max > 0.1))[:, 0], false_fn=lambda: tf.cast(tf.range(proposals_num), dtype=tf.int64)) # 确定负样本数量 negative_num = tf.minimum(train_rois_per_image - positive_num, tf.shape(proposal_neg_idx)[0]) proposal_neg_idx = tf.random_shuffle(proposal_neg_idx)[:negative_num] # 收集负样本 proposal_neg = tf.gather(proposals, proposal_neg_idx) class_ids_neg = tf.zeros(shape=[negative_num]) # 背景类,类别id为0 deltas_neg = tf.zeros(shape=[negative_num, 4]) # 合并正负样本 train_rois = tf.concat([proposal_pos, proposal_neg], axis=0) deltas = tf.concat([deltas, deltas_neg], axis=0) class_ids = tf.concat([class_ids, class_ids_neg], axis=0) # 计算padding class_ids, train_rois = tf_utils.pad_list_to_fixed_size( [tf.expand_dims(class_ids, axis=1), train_rois], train_rois_per_image) # class_ids分类扩一维 # 为后续处理方便负样本tag设置为-1 deltas = tf_utils.pad_to_fixed_size_with_negative( deltas, train_rois_per_image, negative_num=negative_num) # 其它统计指标 gt_num = tf.shape(gt_class_ids)[0] # GT数 miss_gt_num = gt_num - match_gt_num miss_gt_num_shuffle = gt_num - match_gt_num_after_shuffle # shuffle后未分配roi的GT gt_min_max_iou = tf.reduce_min(tf.reduce_max(iou, axis=1)) # gt 匹配最小最大值 return [ deltas, class_ids, train_rois, tf_utils.scalar_to_1d_tensor(miss_gt_num), tf_utils.scalar_to_1d_tensor(miss_gt_num_shuffle), tf_utils.scalar_to_1d_tensor(gt_min_max_iou), tf_utils.scalar_to_1d_tensor(positive_num), tf_utils.scalar_to_1d_tensor(negative_num), tf_utils.scalar_to_1d_tensor(proposals_num) ]
def f1(): # Choose a random window-length range from the sequence. range_min = tf.random_shuffle(tf.range(seq_len - window))[0] range_max = range_min + window return tf.range(range_min, range_max)