def selective_crop_and_resize(features, boxes, box_levels, boundaries, output_size=7, sample_offset=0.5): """Crop and resize boxes on a set of feature maps. Given multiple features maps indexed by different levels, and a set of boxes where each box is mapped to a certain level, it selectively crops and resizes boxes from the corresponding feature maps to generate the box features. We follow the ROIAlign technique (see https://arxiv.org/pdf/1703.06870.pdf, figure 3 for reference). Specifically, for each feature map, we select an (output_size, output_size) set of pixels corresponding to the box location, and then use bilinear interpolation to select the feature value for each pixel. For performance, we perform the gather and interpolation on all layers as a single operation. This is op the multi-level features are first stacked and gathered into [2*output_size, 2*output_size] feature points. Then bilinear interpolation is performed on the gathered feature points to generate [output_size, output_size] RoIAlign feature map. Here is the step-by-step algorithm: 1. The multi-level features are gathered into a [batch_size, num_boxes, output_size*2, output_size*2, num_filters] Tensor. The Tensor contains four neighboring feature points for each vertice in the output grid. 2. Compute the interpolation kernel of shape [batch_size, num_boxes, output_size*2, output_size*2]. The last 2 axis can be seen as stacking 2x2 interpolation kernels for all vertices in the output grid. 3. Element-wise multiply the gathered features and interpolation kernel. Then apply 2x2 average pooling to reduce spatial dimension to output_size. Args: features: a 5-D tensor of shape [batch_size, num_levels, max_height, max_width, num_filters] where cropping and resizing are based. boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the information of each box w.r.t. the corresponding feature map. boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float) in terms of the number of pixels of the corresponding feature map size. box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing the 0-based corresponding feature level index of each box. boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing the boundary (in (y, x)) of the corresponding feature map for each box. Any resampled grid points that go beyond the bounary will be clipped. output_size: a scalar indicating the output crop size. sample_offset: a float number in [0, 1] indicates the subpixel sample offset from grid point. Returns: features_per_box: a 5-D tensor of shape [batch_size, num_boxes, output_size, output_size, num_filters] representing the cropped features. """ (batch_size, num_levels, max_feature_height, max_feature_width, num_filters) = features.get_shape().as_list() _, num_boxes, _ = boxes.get_shape().as_list() # Compute the grid position w.r.t. the corresponding feature map. box_grid_x = [] box_grid_y = [] for i in range(output_size): box_grid_x.append(boxes[:, :, 1] + (i + sample_offset) * boxes[:, :, 3] / output_size) box_grid_y.append(boxes[:, :, 0] + (i + sample_offset) * boxes[:, :, 2] / output_size) box_grid_x = tf.stack(box_grid_x, axis=2) box_grid_y = tf.stack(box_grid_y, axis=2) # Compute indices for gather operation. box_grid_y0 = tf.floor(box_grid_y) box_grid_x0 = tf.floor(box_grid_x) box_grid_x0 = tf.maximum(0., box_grid_x0) box_grid_y0 = tf.maximum(0., box_grid_y0) box_gridx0x1 = tf.stack( [tf.minimum(box_grid_x0, tf.expand_dims(boundaries[:, :, 1], -1)), tf.minimum(box_grid_x0 + 1, tf.expand_dims(boundaries[:, :, 1], -1))], axis=3) box_gridy0y1 = tf.stack( [tf.minimum(box_grid_y0, tf.expand_dims(boundaries[:, :, 0], -1)), tf.minimum(box_grid_y0 + 1, tf.expand_dims(boundaries[:, :, 0], -1))], axis=3) x_indices = tf.cast( tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2]), dtype=tf.int32) y_indices = tf.cast( tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2]), dtype=tf.int32) height_dim_offset = max_feature_width level_dim_offset = max_feature_height * height_dim_offset batch_dim_offset = num_levels * level_dim_offset indices = tf.reshape( tf.tile(tf.reshape(tf.range(batch_size) * batch_dim_offset, [batch_size, 1, 1, 1]), [1, num_boxes, output_size * 2, output_size * 2]) + tf.tile(tf.reshape(box_levels * level_dim_offset, [batch_size, num_boxes, 1, 1]), [1, 1, output_size * 2, output_size * 2]) + tf.tile(tf.reshape(y_indices * height_dim_offset, [batch_size, num_boxes, output_size * 2, 1]), [1, 1, 1, output_size * 2]) + tf.tile(tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]), [1, 1, output_size * 2, 1]), [-1]) features = tf.reshape(features, [-1, num_filters]) features_per_box = tf.reshape( tf.gather(features, indices), [batch_size, num_boxes, output_size * 2, output_size * 2, num_filters]) # The RoIAlign feature f can be computed by bilinear interpolation of four # neighboring feature points f0, f1, f2, and f3. # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T # [f10, f11]] # f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11 # f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11 ly = box_grid_y - box_grid_y0 lx = box_grid_x - box_grid_x0 hy = 1.0 - ly hx = 1.0 - lx kernel_x = tf.reshape(tf.stack([hx, lx], axis=3), [batch_size, num_boxes, 1, output_size*2]) kernel_y = tf.reshape(tf.stack([hy, ly], axis=3), [batch_size, num_boxes, output_size*2, 1]) # Uses implicit broadcast to generate the interpolation kernel. The # multiplier `4` is for avg pooling. interpolation_kernel = kernel_y * kernel_x * 4 # Interpolates the gathered features with computed interpolation kernels. features_per_box *= tf.cast( tf.expand_dims(interpolation_kernel, axis=4), dtype=features_per_box.dtype) features_per_box = tf.reshape( features_per_box, [batch_size * num_boxes, output_size*2, output_size*2, num_filters]) features_per_box = tf.nn.avg_pool( features_per_box, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID') features_per_box = tf.reshape( features_per_box, [batch_size, num_boxes, output_size, output_size, num_filters]) return features_per_box
def _generate_detections_v2(boxes, scores, max_total_size=100, nms_iou_threshold=0.3, score_threshold=0.05, pre_nms_num_boxes=5000): """Generate the final detections given the model outputs. This uses classes unrolling with while loop based NMS, could be parralled at batch dimension. Args: boxes: a tensor with shape [batch_size, N, num_classes, 4] or [batch_size, N, 1, 4], which box predictions on all feature levels. The N is the number of total anchors on all levels. scores: a tensor with shape [batch_size, N, num_classes], which stacks class probability on all feature levels. The N is the number of total anchors on all levels. The num_classes is the number of classes predicted by the model. Note that the class_outputs here is the raw score. max_total_size: a scalar representing maximum number of boxes retained over all classes. nms_iou_threshold: a float representing the threshold for deciding whether boxes overlap too much with respect to IOU. score_threshold: a float representing the threshold for deciding when to remove boxes based on score. pre_nms_num_boxes: an int number of top candidate detections per class before NMS. Returns: nmsed_boxes: `float` Tensor of shape [batch_size, max_total_size, 4] representing top detected boxes in [y1, x1, y2, x2]. nmsed_scores: `float` Tensor of shape [batch_size, max_total_size] representing sorted confidence scores for detected boxes. The values are between [0, 1]. nmsed_classes: `int` Tensor of shape [batch_size, max_total_size] representing classes for detected boxes. valid_detections: `int` Tensor of shape [batch_size] only the top `valid_detections` boxes are valid detections. """ with tf.name_scope('generate_detections'): nmsed_boxes = [] nmsed_classes = [] nmsed_scores = [] valid_detections = [] batch_size, _, num_classes_for_box, _ = boxes.get_shape().as_list() _, total_anchors, num_classes = scores.get_shape().as_list() # Selects top pre_nms_num scores and indices before NMS. scores, indices = _select_top_k_scores( scores, min(total_anchors, pre_nms_num_boxes)) for i in range(num_classes): boxes_i = boxes[:, :, min(num_classes_for_box - 1, i), :] scores_i = scores[:, :, i] # Obtains pre_nms_num_boxes before running NMS. boxes_i = tf.gather(boxes_i, indices[:, :, i], batch_dims=1, axis=1) # Filter out scores. boxes_i, scores_i = box_utils.filter_boxes_by_scores( boxes_i, scores_i, min_score_threshold=score_threshold) (nmsed_scores_i, nmsed_boxes_i) = nms.sorted_non_max_suppression_padded( tf.cast(scores_i, tf.float32), tf.cast(boxes_i, tf.float32), max_total_size, iou_threshold=nms_iou_threshold) nmsed_classes_i = tf.fill([batch_size, max_total_size], i) nmsed_boxes.append(nmsed_boxes_i) nmsed_scores.append(nmsed_scores_i) nmsed_classes.append(nmsed_classes_i) nmsed_boxes = tf.concat(nmsed_boxes, axis=1) nmsed_scores = tf.concat(nmsed_scores, axis=1) nmsed_classes = tf.concat(nmsed_classes, axis=1) nmsed_scores, indices = tf.nn.top_k( nmsed_scores, k=max_total_size, sorted=True) nmsed_boxes = tf.gather(nmsed_boxes, indices, batch_dims=1, axis=1) nmsed_classes = tf.gather(nmsed_classes, indices, batch_dims=1) valid_detections = tf.reduce_sum( input_tensor=tf.cast(tf.greater(nmsed_scores, -1), tf.int32), axis=1) return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
def generate_curves(self, num_context=None): """Builds the op delivering the data. Generated functions are `float32` with x values between -2 and 2. Args: num_context: Number of context points. If None, chosen randomly. Returns: A `CNPRegressionDescription` namedtuple. """ if num_context is None: num_context = tf.random_uniform( shape=[], minval=3, maxval=self._max_num_context, dtype=tf.int32) # If we are testing we want to have more targets and have them evenly # distributed in order to plot the function. if self._testing: num_target = 400 num_total_points = num_target x_values = tf.tile( tf.expand_dims(tf.range(-2., 2., 1. / 100, dtype=tf.float32), axis=0), [self._batch_size, 1]) x_values = tf.expand_dims(x_values, axis=-1) # During training the number of target points and their x-positions are # selected at random else: num_target = tf.random_uniform(shape=(), minval=0, maxval=self._max_num_context - num_context, dtype=tf.int32) num_total_points = num_context + num_target x_values = tf.random_uniform( [self._batch_size, num_total_points, self._x_size], -2, 2) # Set kernel parameters # Either choose a set of random parameters for the mini-batch if self._random_kernel_parameters: l1 = tf.random_uniform([self._batch_size, self._y_size, self._x_size], 0.1, self._l1_scale) sigma_f = tf.random_uniform([self._batch_size, self._y_size], 0.1, self._sigma_scale) # Or use the same fixed parameters for all mini-batches else: l1 = tf.ones(shape=[self._batch_size, self._y_size, self._x_size]) * self._l1_scale sigma_f = tf.ones(shape=[self._batch_size, self._y_size]) * self._sigma_scale # Pass the x_values through the Gaussian kernel # [batch_size, y_size, num_total_points, num_total_points] kernel = self._gaussian_kernel(x_values, l1, sigma_f) # Calculate Cholesky, using double precision for better stability: cholesky = tf.cast(tf.cholesky(tf.cast(kernel, tf.float64)), tf.float32) # Sample a curve # [batch_size, y_size, num_total_points, 1] y_values = tf.matmul( cholesky, tf.random_normal([self._batch_size, self._y_size, num_total_points, 1])) # [batch_size, num_total_points, y_size] y_values = tf.transpose(tf.squeeze(y_values, 3), [0, 2, 1]) if self._testing: # Select the targets target_x = x_values target_y = y_values # Select the observations idx = tf.random_shuffle(tf.range(num_target)) context_x = tf.gather(x_values, idx[:num_context], axis=1) context_y = tf.gather(y_values, idx[:num_context], axis=1) else: # Select the targets which will consist of the context points as well as # some new target points target_x = x_values[:, :num_target + num_context, :] target_y = y_values[:, :num_target + num_context, :] # Select the observations context_x = x_values[:, :num_context, :] context_y = y_values[:, :num_context, :] return NPRegressionDescription( context_x=context_x, context_y=context_y, target_x=target_x, target_y=target_y)
def get_batch_(data_): return tf.gather(data_, indices)
def _fixed_frame(signal, frame_length, frame_step, first_axis=False): """tflite-compatible tf.signal.frame for fixed-size input. Args: signal: Tensor containing signal(s). frame_length: Number of samples to put in each frame. frame_step: Sample advance between successive frames. first_axis: If true, framing is applied to first axis of tensor; otherwise, it is applied to last axis. Returns: A new tensor where the last axis (or first, if first_axis) of input signal has been replaced by a (num_frames, frame_length) array of individual frames where each frame is drawn frame_step samples after the previous one. Raises: ValueError: if signal has an undefined axis length. This routine only supports framing of signals whose shape is fixed at graph-build time. """ signal_shape = signal.shape.as_list() if first_axis: length_samples = signal_shape[0] else: length_samples = signal_shape[-1] if length_samples <= 0: raise ValueError( 'fixed framing requires predefined constant signal length') num_frames = max(0, 1 + (length_samples - frame_length) // frame_step) if first_axis: inner_dimensions = signal_shape[1:] result_shape = [num_frames, frame_length] + inner_dimensions gather_axis = 0 else: outer_dimensions = signal_shape[:-1] result_shape = outer_dimensions + [num_frames, frame_length] # Currently tflite's gather only supports axis==0, but that may still # work if we want the last of 1 axes. gather_axis = len(outer_dimensions) subframe_length = fractions.gcd(frame_length, frame_step) # pylint: disable=deprecated-method subframes_per_frame = frame_length // subframe_length subframes_per_hop = frame_step // subframe_length num_subframes = length_samples // subframe_length if first_axis: trimmed_input_size = [num_subframes * subframe_length ] + inner_dimensions subframe_shape = [num_subframes, subframe_length] + inner_dimensions else: trimmed_input_size = outer_dimensions + [ num_subframes * subframe_length ] subframe_shape = outer_dimensions + [num_subframes, subframe_length] subframes = tf.reshape( tf.slice(signal, begin=np.zeros(len(signal_shape), np.int32), size=trimmed_input_size), subframe_shape) # frame_selector is a [num_frames, subframes_per_frame] tensor # that indexes into the appropriate frame in subframes. For example: # [[0, 0, 0, 0], [2, 2, 2, 2], [4, 4, 4, 4]] frame_selector = np.reshape( np.arange(num_frames) * subframes_per_hop, [num_frames, 1]) # subframe_selector is a [num_frames, subframes_per_frame] tensor # that indexes into the appropriate subframe within a frame. For example: # [[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3]] subframe_selector = np.reshape(np.arange(subframes_per_frame), [1, subframes_per_frame]) # Adding the 2 selector tensors together produces a [num_frames, # subframes_per_frame] tensor of indices to use with tf.gather to select # subframes from subframes. We then reshape the inner-most subframes_per_frame # dimension to stitch the subframes together into frames. For example: # [[0, 1, 2, 3], [2, 3, 4, 5], [4, 5, 6, 7]]. selector = frame_selector + subframe_selector frames = tf.reshape( tf.gather(subframes, selector.astype(np.int32), axis=gather_axis), result_shape) return frames
def encode_labels(self, labels): """Generates label encodings.""" return tf.gather(self.label_embs, labels, axis=0)
def create_model(self): # 'seq_len' means question sequences self.q_data = tf.placeholder(tf.int32, [self.args.batch_size, self.args.seq_len], name='q_data') self.qa_data = tf.placeholder(tf.int32, [self.args.batch_size, self.args.seq_len], name='qa_data') self.target = tf.placeholder(tf.float32, [self.args.batch_size, self.args.seq_len], name='target') # Initialize Memory with tf.variable_scope('Memory'): init_memory_key = tf.get_variable('key', [self.args.memory_size, self.args.memory_key_state_dim], \ initializer=tf.truncated_normal_initializer(stddev=0.1)) init_memory_value = tf.get_variable('value', [self.args.memory_size,self.args.memory_value_state_dim], \ initializer=tf.truncated_normal_initializer(stddev=0.1)) # Broadcast memory value tensor to match [batch size, memory size, memory state dim] # First expand dim at axis 0 so that makes 'batch size' axis and tile it along 'batch size' axis # tf.tile(inputs, multiples) : multiples length must be thes saame as the number of dimensions in input # tf.stack takes a list and convert each element to a tensor init_memory_value = tf.tile(tf.expand_dims(init_memory_value, 0), tf.stack([self.args.batch_size, 1, 1])) print(init_memory_value.get_shape()) self.memory = DKVMN(self.args.memory_size, self.args.memory_key_state_dim, \ self.args.memory_value_state_dim, init_memory_key=init_memory_key, init_memory_value=init_memory_value, name='DKVMN') # Embedding to [batch size, seq_len, memory_state_dim(d_k or d_v)] with tf.variable_scope('Embedding'): # A q_embed_mtx = tf.get_variable('q_embed', [self.args.n_questions+1, self.args.memory_key_state_dim],\ initializer=tf.truncated_normal_initializer(stddev=0.1)) # B qa_embed_mtx = tf.get_variable('qa_embed', [2*self.args.n_questions+1, self.args.memory_value_state_dim], initializer=tf.truncated_normal_initializer(stddev=0.1)) # Embedding to [batch size, seq_len, memory key state dim] q_embed_data = tf.nn.embedding_lookup(q_embed_mtx, self.q_data) # List of [batch size, 1, memory key state dim] with 'seq_len' elements #print('Q_embedding shape : %s' % q_embed_data.get_shape()) slice_q_embed_data = tf.split(q_embed_data, self.args.seq_len, 1) #print(len(slice_q_embed_data), type(slice_q_embed_data), slice_q_embed_data[0].get_shape()) # Embedding to [batch size, seq_len, memory value state dim] qa_embed_data = tf.nn.embedding_lookup(qa_embed_mtx, self.qa_data) #print('QA_embedding shape: %s' % qa_embed_data.get_shape()) # List of [batch size, 1, memory value state dim] with 'seq_len' elements slice_qa_embed_data = tf.split(qa_embed_data, self.args.seq_len, 1) prediction = list() reuse_flag = False # Logics for i in range(self.args.seq_len): # To reuse linear vectors if i != 0: reuse_flag = True # k_t : [batch size, memory key state dim] q = tf.squeeze(slice_q_embed_data[i], 1) # Attention, [batch size, memory size] self.correlation_weight = self.memory.attention(q) # Read process, [batch size, memory value state dim] self.read_content = self.memory.read(self.correlation_weight) # Write process, [batch size, memory size, memory value state dim] # qa : [batch size, memory value state dim] qa = tf.squeeze(slice_qa_embed_data[i], 1) # Only last time step value is necessary self.new_memory_value = self.memory.write(self.correlation_weight, qa, reuse=reuse_flag) mastery_level_prior_difficulty = tf.concat([self.read_content, q], 1) # f_t summary_vector = tf.tanh(operations.linear(mastery_level_prior_difficulty, self.args.final_fc_dim, name='Summary_Vector', reuse=reuse_flag)) # p_t pred_logits = operations.linear(summary_vector, 1, name='Prediction', reuse=reuse_flag) prediction.append(pred_logits) # 'prediction' : seq_len length list of [batch size ,1], make it [batch size, seq_len] tensor # tf.stack convert to [batch size, seq_len, 1] self.pred_logits = tf.reshape(tf.stack(prediction, axis=1), [self.args.batch_size, self.args.seq_len]) # Define loss : standard cross entropy loss, need to ignore '-1' label example # Make target/label 1-d array target_1d = tf.reshape(self.target, [-1]) pred_logits_1d = tf.reshape(self.pred_logits, [-1]) index = tf.where(tf.not_equal(target_1d, tf.constant(-1., dtype=tf.float32))) # tf.gather(params, indices) : Gather slices from params according to indices filtered_target = tf.gather(target_1d, index) filtered_logits = tf.gather(pred_logits_1d, index) self.loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=filtered_logits, labels=filtered_target)) self.pred = tf.sigmoid(self.pred_logits) # Optimizer : SGD + MOMENTUM with learning rate decay self.global_step = tf.Variable(0, trainable=False) self.lr = tf.placeholder(tf.float32, [], name='learning_rate') # self.lr_decay = tf.train.exponential_decay(self.args.initial_lr, global_step=global_step, decay_steps=10000, decay_rate=0.667, staircase=True) # self.learning_rate = tf.maximum(lr, self.args.lr_lowerbound) optimizer = tf.train.MomentumOptimizer(self.lr, self.args.momentum) grads, vrbs = zip(*optimizer.compute_gradients(self.loss)) grad, _ = tf.clip_by_global_norm(grads, self.args.maxgradnorm) self.train_op = optimizer.apply_gradients(zip(grad, vrbs), global_step=self.global_step) # grad_clip = [(tf.clip_by_value(grad, -self.args.maxgradnorm, self.args.maxgradnorm), var) for grad, var in grads] self.tr_vrbs = tf.trainable_variables() for i in self.tr_vrbs: print(i.name) self.saver = tf.train.Saver()
k_max = 30 # number of test samples - to be able to reduce total execution time n = 5000 # TensorFlow Graph calculation model x_train_ph = tf.placeholder(tf.float32, shape=x_train.shape) y_train_ph = tf.placeholder(tf.float32, shape=y_train.shape) x_test_ph = tf.placeholder(tf.float32, shape=x_test.shape[1:]) # Calculate L1-distances as negative to allow picking first top K entries after DESC sorting distances = tf.negative( tf.reduce_sum(tf.reduce_sum(tf.abs(tf.subtract(x_train_ph, x_test_ph)), axis=1), axis=1)) # Find top K entries after DESC sorting top_k_values, top_k_indices = tf.nn.top_k(distances, k=k_max + 1, sorted=True) top_k_max_labels = tf.gather(y_train_ph, top_k_indices) predictions = [] # Calculate predictions for different k - [1, k_max] for k in range(1, k_max + 1): top_k_labels = tf.slice(top_k_max_labels, begin=[0], size=[k]) unique_classes, ids, top_k_labels_counts = tf.unique_with_counts( top_k_labels) prediction = tf.gather(unique_classes, tf.argmax(top_k_labels_counts)) predictions.append(prediction) predictions = tf.stack(predictions) # Start TensorFlow Session correct_predictions_nums = np.zeros(k_max) with tf.Session() as session: for i in tqdm(range(0, n)): predicted_values = session.run(predictions,
def build_graph(self): """ The model asks for three things to be trained: - input: training data X - targets: training label y - learning_rate: """ # inputs.shape = (number of examples, number of input, dimension of each input). self.learning_rate = tf.placeholder(tf.float32, None, name="learning_rate") # Stock symbols are mapped to integers. self.symbols = tf.placeholder(tf.int32, [None, 1], name='stock_labels') self.inputs = tf.placeholder(tf.float32, [None, self.num_steps, self.input_size], name="inputs") self.targets = tf.placeholder(tf.float32, [None, self.input_size], name="targets") def _create_one_cell(): lstm_cell = tf.contrib.rnn.LSTMCell(self.lstm_size, state_is_tuple=True) if self.keep_prob < 1.0: lstm_cell = tf.contrib.rnn.DropoutWrapper( lstm_cell, output_keep_prob=self.keep_prob) return lstm_cell cell = tf.contrib.rnn.MultiRNNCell( [_create_one_cell() for _ in range(self.num_layers)], state_is_tuple=True) if self.num_layers > 1 else _create_one_cell( ) # Run dynamic RNN val, state_ = tf.nn.dynamic_rnn(cell, self.inputs, dtype=tf.float32, scope="dynamic_rnn") # Before transpose, val.get_shape() = (batch_size, num_steps, lstm_size) # After transpose, val.get_shape() = (num_steps, batch_size, lstm_size) val = tf.transpose(val, [1, 0, 2]) last = tf.gather(val, int(val.get_shape()[0]) - 1, name="lstm_state") ws = tf.Variable(tf.truncated_normal([self.lstm_size, self.input_size]), name="w") bias = tf.Variable(tf.constant(0.1, shape=[self.input_size]), name="b") self.pred = tf.matmul(last, ws) + bias self.last_sum = tf.summary.histogram("lstm_state", last) self.w_sum = tf.summary.histogram("w", ws) self.b_sum = tf.summary.histogram("b", bias) self.pred_summ = tf.summary.histogram("pred", self.pred) # self.loss = -tf.reduce_sum(targets * tf.log(tf.clip_by_value(prediction, 1e-10, 1.0))) self.loss = tf.reduce_mean(tf.square(self.pred - self.targets), name="loss_mse") self.optim = tf.train.RMSPropOptimizer(self.learning_rate).minimize( self.loss, name="rmsprop_optim") self.loss_sum = tf.summary.scalar("loss_mse", self.loss) self.learning_rate_sum = tf.summary.scalar("learning_rate", self.learning_rate) self.t_vars = tf.trainable_variables() self.saver = tf.train.Saver()
def _encode_final_chars( self, final_char_input_seq: tf.Tensor, char_attention_mask: tf.Tensor, full_molecules: tf.Tensor, char_to_molecule_attention_mask: tf.Tensor, molecule_seq_length: tf.Tensor, final_seq_char_positions: Optional[tf.Tensor]) -> tf.Tensor: """Run a shallow/low-dim transformer to get a final character encoding.""" _, char_seq_length, _ = bert_modeling.get_shape_list( final_char_input_seq) # `final_char_input_seq` is a projected version of the deep molecule BERT # stack with slice-wise resnet connections. with tf.variable_scope("final_char_encoder"): # `repeated_molecules`: [batch_size, char_seq_len, molecule_hidden_size] repeated_molecules = self._repeat_molecules( full_molecules, char_seq_length=char_seq_length, molecule_seq_length=molecule_seq_length) layers = [final_char_input_seq, repeated_molecules] # `concat`: # [batch_size, char_seq_len, molecule_hidden_size+char_hidden_final] concat = tf.concat(layers, axis=-1) # `result`: [batch_size, char_seq_len, hidden_size] result = tf.layers.conv1d( inputs=concat, filters=self.config.hidden_size, kernel_size=self.config.upsampling_kernel_size, strides=1, padding="same", activation=bert_modeling.get_activation( self.config.hidden_act), name="conv") result = bert_modeling.layer_norm(result) if self._is_training: result = bert_modeling.dropout(result, self.config.hidden_dropout_prob) final_char_seq = result if final_seq_char_positions is not None: # Limit transformer query seq and attention mask to these character # positions to greatly reduce the compute cost. Typically, this is just # done for the MLM training task. # `query_seq`: [batch, final_char_seq, char_dim] query_seq = tf.gather(final_char_seq, final_seq_char_positions, batch_dims=1) # `char_to_molecule_attention_mask`: # [batch, final_len, molecule_seq] char_to_molecule_attention_mask = tf.gather( char_to_molecule_attention_mask, final_seq_char_positions, batch_dims=1) char_attention_mask = tf.gather(char_attention_mask, final_seq_char_positions, batch_dims=1) else: query_seq = final_char_seq # `char_to_molecule_attention_mask` remains unmodified. return bert_modeling.transformer_model( input_tensor=query_seq, input_kv_tensor=final_char_seq, attention_mask=char_attention_mask, hidden_size=self.config.hidden_size, num_hidden_layers=1, num_attention_heads=self.config.num_attention_heads, intermediate_size=self.config.intermediate_size, intermediate_act_fn=bert_modeling.get_activation( self.config.hidden_act), hidden_dropout_prob=self.config.hidden_dropout_prob, attention_probs_dropout_prob=( self.config.attention_probs_dropout_prob), initializer_range=self.config.initializer_range)
def non_max_suppression(scores_in, boxes_in, top_k_indices, labels, num_detections=ssd_constants.MAX_NUM_EVAL_BOXES): """Implement Non-maximum suppression. Args: scores_in: a Tensor with shape [batch_size, ssd_constants.MAX_NUM_EVAL_BOXES, num_classes]. The top ssd_constants.MAX_NUM_EVAL_BOXES box scores for each class. boxes_in: a Tensor with shape [batch_size, N, 4], which stacks box regression outputs on all feature levels. The N is the number of total anchors on all levels. top_k_indices: a Tensor with shape [batch_size, ssd_constants.MAX_NUM_EVAL_BOXES, num_classes]. The indices for these top boxes for each class. labels: labels tensor. num_detections: maximum output length. Returns: A tensor size of [batch_size, num_detections, 6] represents boxes, labels and scores after NMS. """ _, _, num_classes = scores_in.get_shape().as_list() source_id = tf.cast( tf.tile(tf.expand_dims(labels[ssd_constants.SOURCE_ID], 1), [1, num_detections]), scores_in.dtype) raw_shape = tf.cast( tf.tile(tf.expand_dims(labels[ssd_constants.RAW_SHAPE], 1), [1, num_detections, 1]), scores_in.dtype) list_of_all_boxes = [] list_of_all_scores = [] list_of_all_classes = [] # Skip background class. for class_i in range(1, num_classes, 1): boxes = tf.batch_gather(boxes_in, top_k_indices[:, :, class_i]) class_i_scores = scores_in[:, :, class_i] class_i_scores, boxes = _filter_scores(class_i_scores, boxes) (class_i_post_scores, class_i_post_boxes) = ssd_architecture.non_max_suppression_padded( scores=tf.cast(class_i_scores, scores_in.dtype), boxes=tf.cast(boxes, scores_in.dtype), max_output_size=num_detections, iou_threshold=ssd_constants.OVERLAP_CRITERIA) class_i_classes = tf.fill(tf.shape(class_i_post_scores), ssd_constants.CLASS_INV_MAP[class_i]) list_of_all_boxes.append(class_i_post_boxes) list_of_all_scores.append(class_i_post_scores) list_of_all_classes.append(class_i_classes) post_nms_boxes = tf.concat(list_of_all_boxes, axis=1) post_nms_scores = tf.concat(list_of_all_scores, axis=1) post_nms_classes = tf.concat(list_of_all_classes, axis=1) # sort all results. post_nms_scores, sorted_indices = tf.nn.top_k(tf.cast( post_nms_scores, scores_in.dtype), k=num_detections, sorted=True) post_nms_boxes = tf.gather(post_nms_boxes, sorted_indices, batch_dims=1) post_nms_classes = tf.gather(post_nms_classes, sorted_indices, batch_dims=1) detections_result = tf.stack([ source_id, post_nms_boxes[:, :, 1] * raw_shape[:, :, 1], post_nms_boxes[:, :, 0] * raw_shape[:, :, 0], (post_nms_boxes[:, :, 3] - post_nms_boxes[:, :, 1]) * raw_shape[:, :, 1], (post_nms_boxes[:, :, 2] - post_nms_boxes[:, :, 0]) * raw_shape[:, :, 0], post_nms_scores, tf.cast(post_nms_classes, scores_in.dtype), ], axis=2) return detections_result
def __init__(self, review_num_u, review_num_i, user_num, item_num, num_classes, n_latent, embedding_id, attention_size, embedding_size, l2_reg_lambda=0.0): # input_u较原来改成直接改成输入embedding,不需要lookup了 self.input_u = tf.placeholder(tf.float32, [None, review_num_u, embedding_size], name="input_u") self.input_i = tf.placeholder(tf.float32, [None, review_num_i, embedding_size], name="input_i") self.input_reuid = tf.placeholder(tf.int32, [None, review_num_u], name='input_reuid') self.input_reiid = tf.placeholder(tf.int32, [None, review_num_i], name='input_reiid') self.input_y = tf.placeholder(tf.float32, [None, 1], name="input_y") self.input_uid = tf.placeholder(tf.int32, [None, 1], name="input_uid") self.input_iid = tf.placeholder(tf.int32, [None, 1], name="input_iid") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.drop0 = tf.placeholder(tf.float32, name="dropout0") iidW = tf.Variable(tf.random_uniform([item_num + 2, embedding_id], -0.1, 0.1), name="iidW") uidW = tf.Variable(tf.random_uniform([user_num + 2, embedding_id], -0.1, 0.1), name="uidW") l2_loss_x = tf.constant(0.0) with tf.name_scope("dropout"): self.h_drop_u = tf.nn.dropout(self.input_u, 1.0) self.h_drop_i = tf.nn.dropout(self.input_i, 1.0) # self.h_drop_u = tf.Print(self.h_drop_u, ["h_drop_u: ", self.h_drop_u]) # self.h_drop_i = tf.Print(self.h_drop_i, ["h_drop_i: ", self.h_drop_i]) with tf.name_scope("attention"): Wau = tf.Variable(tf.random_uniform( [embedding_size, attention_size], -0.1, 0.1), name='Wau') Wru = tf.Variable(tf.random_uniform([embedding_id, attention_size], -0.1, 0.1), name='Wru') Wpu = tf.Variable(tf.random_uniform([attention_size, 1], -0.1, 0.1), name='Wpu') bau = tf.Variable(tf.constant(0.1, shape=[attention_size]), name="bau") bbu = tf.Variable(tf.constant(0.1, shape=[1]), name="bbu") # self.iid_a = tf.nn.relu(tf.nn.embedding_lookup(iidW, self.input_reuid)) self.iid_a = tf.nn.embedding_lookup(iidW, self.input_reuid) # self.u_j = tf.einsum('ajk,kl->ajl', tf.nn.relu( # tf.einsum('ajk,kl->ajl', self.h_drop_u, Wau) + tf.einsum('ajk,kl->ajl', self.iid_a, Wru) + bau), # Wpu) + bbu # None*u_len*1 self.u_j = tf.matmul( tf.einsum('ajk,kl->ajl', self.iid_a, Wru), tf.einsum('ajk,kl->ajl', self.h_drop_u, Wau), transpose_b=True) / tf.sqrt( tf.constant(attention_size, dtype=tf.float32)) # None*u_len*1 self.u_j = tf.Print( self.u_j, ["u_j:", self.u_j, tf.shape(self.u_j)], summarize=50) self.u_a = tf.nn.softmax(self.u_j, 1) # none*u_len*1 # self.u_a = tf.Print(self.u_a, ["u_a:", self.u_a, tf.shape(self.u_a)], summarize=50) Wai = tf.Variable(tf.random_uniform( [embedding_size, attention_size], -0.1, 0.1), name='Wai') Wri = tf.Variable(tf.random_uniform([embedding_id, attention_size], -0.1, 0.1), name='Wri') Wpi = tf.Variable(tf.random_uniform([attention_size, 1], -0.1, 0.1), name='Wpi') bai = tf.Variable(tf.constant(0.1, shape=[attention_size]), name="bai") bbi = tf.Variable(tf.constant(0.1, shape=[1]), name="bbi") # self.uid_a = tf.nn.relu(tf.nn.embedding_lookup(uidW, self.input_reiid)) self.uid_a = tf.nn.embedding_lookup(uidW, self.input_reiid) # self.i_j = tf.einsum('ajk,kl->ajl', tf.nn.relu( # tf.einsum('ajk,kl->ajl', self.h_drop_i, Wai) + tf.einsum('ajk,kl->ajl', self.uid_a, Wri) + bai), # Wpi) + bbi self.i_j = tf.matmul(tf.einsum('ajk,kl->ajl', self.uid_a, Wri), tf.einsum('ajk,kl->ajl', self.h_drop_i, Wai), transpose_b=True) / tf.sqrt( tf.constant(attention_size, dtype=tf.float32)) self.i_a = tf.nn.softmax(self.i_j, 1) # none*len*1 # self.i_a = tf.Print(self.i_a, ["i_a:", self.i_a, tf.shape(self.i_a)], summarize=50) l2_loss_x += tf.nn.l2_loss(Wau) l2_loss_x += tf.nn.l2_loss(Wru) l2_loss_x += tf.nn.l2_loss(Wri) l2_loss_x += tf.nn.l2_loss(Wai) with tf.name_scope("add_reviews"): self.u_feas = tf.reduce_sum(tf.multiply(self.u_a, self.h_drop_u), 1) self.u_feas = tf.nn.dropout(self.u_feas, self.dropout_keep_prob) self.i_feas = tf.reduce_sum(tf.multiply(self.i_a, self.h_drop_i), 1) self.i_feas = tf.nn.dropout(self.i_feas, self.dropout_keep_prob) # self.u_feas = tf.Print(self.u_feas, ["u_feas: ", self.u_feas, tf.shape(self.u_feas)], summarize=50) # self.i_feas = tf.Print(self.i_feas, ["i_feas: ", self.i_feas, tf.shape(self.u_feas)], summarize=50) with tf.name_scope("get_fea"): # uidmf = tf.Variable(tf.random_uniform([user_num + 2, embedding_id], -0.1, 0.1), name="uidmf") # iidmf = tf.Variable(tf.random_uniform([item_num + 2, embedding_id], -0.1, 0.1), name="iidmf") # uidmf = tf.Print(uidmf, ["uidmf: ", uidmf, tf.shape(uidmf)], summarize=50) # iidmf = tf.Print(iidmf, ["iidmf: ", iidmf, tf.shape(iidmf)], summarize=50) self.uid = tf.nn.embedding_lookup(uidW, self.input_uid) self.iid = tf.nn.embedding_lookup(iidW, self.input_iid) # self.uid = tf.Print(self.uid, ["uid: ", self.uid, tf.shape(self.uid)], summarize=50) # self.iid = tf.Print(self.iid, ["iid: ", self.iid, tf.shape(self.iid)], summarize=50) self.uid = tf.reshape(self.uid, [-1, embedding_id]) self.iid = tf.reshape(self.iid, [-1, embedding_id]) # self.uid = tf.Print(self.uid, ["uid: ", self.uid, tf.shape(self.uid)], summarize=50) # self.iid = tf.Print(self.iid, ["iid: ", self.iid, tf.shape(self.iid)], summarize=50) Wu = tf.Variable(tf.random_uniform([embedding_size, n_latent], -0.1, 0.1), name='Wu') bu = tf.Variable(tf.constant(0.1, shape=[n_latent]), name="bu") # qu(即uid)+Xu self.u_feas = tf.matmul(self.u_feas, Wu) + self.uid + bu Wi = tf.Variable(tf.random_uniform([embedding_size, n_latent], -0.1, 0.1), name='Wi') bi = tf.Variable(tf.constant(0.1, shape=[n_latent]), name="bi") # pi+Yi(W0*Oi+b0) self.i_feas = tf.matmul(self.i_feas, Wi) + self.iid + bi # self.u_feas = tf.Print(self.u_feas, ["u_feas: ", self.u_feas, tf.shape(self.u_feas)], summarize=50) # self.i_feas = tf.Print(self.i_feas, ["i_feas: ", self.i_feas, tf.shape(self.u_feas)], summarize=50) with tf.name_scope('prediction'): # h0 self.FM = tf.multiply(self.u_feas, self.i_feas, name="h0") self.FM = tf.nn.relu(self.FM) self.FM = tf.nn.dropout(self.FM, self.dropout_keep_prob) # self.FM = tf.Print(self.FM, ["FM: ", self.FM, tf.shape(self.FM)], summarize=50) # Wmul = tf.Variable( # tf.random_uniform([n_latent, 1], -0.1, 0.1), name='wmul') Wmul = tf.constant(1, shape=[n_latent, 1], name='wmul', dtype=tf.float32) # W1T*h0 self.mul = tf.matmul(self.FM, Wmul) self.score = tf.reduce_sum(self.mul, 1, keep_dims=True) # self.score = tf.Print(self.score, ["score: ", self.score, tf.shape(self.score)], summarize=50) self.uidW2 = tf.Variable(tf.constant(0.1, shape=[user_num + 2]), name="uidW2") self.iidW2 = tf.Variable(tf.constant(0.1, shape=[item_num + 2]), name="iidW2") self.u_bias = tf.gather(self.uidW2, self.input_uid) self.i_bias = tf.gather(self.iidW2, self.input_iid) self.Feature_bias = self.u_bias + self.i_bias self.bised = tf.Variable(tf.constant(0.1), name='bias') self.predictions = self.score + self.Feature_bias + self.bised with tf.name_scope("loss"): losses = tf.nn.l2_loss(tf.subtract(self.predictions, self.input_y)) self.loss = losses + l2_reg_lambda * l2_loss_x with tf.name_scope("accuracy"): self.mae = tf.reduce_mean( tf.abs(tf.subtract(self.predictions, self.input_y))) self.accuracy = tf.sqrt( tf.reduce_mean( tf.square(tf.subtract(self.predictions, self.input_y))))
def _parse_example(data): with tf.name_scope('augmentation'): source_id = data['source_id'] image = data['image'] # dtype uint8 raw_shape = tf.shape(image) boxes = data['groundtruth_boxes'] classes = tf.reshape(data['groundtruth_classes'], [-1, 1]) # Only 80 of the 90 COCO classes are used. class_map = tf.convert_to_tensor(constants.CLASS_MAP) classes = tf.gather(class_map, classes) classes = tf.cast(classes, dtype=tf.float32) if self._is_training: image, boxes, classes = ssd_crop(image, boxes, classes) # ssd_crop resizes and returns image of dtype float32 and does not # change its range (i.e., value in between 0--255). Divide by 255. # converts it to [0, 1] range. Not doing this before cropping to # avoid dtype cast (which incurs additional memory copy). image /= 255.0 # random_horizontal_flip() is hard coded to flip with 50% chance. image, boxes = preprocessor.random_horizontal_flip( image=image, boxes=boxes) # TODO(shibow): Investigate the parameters for color jitter. image = color_jitter( image, brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05) if params['dtype'] == 'bf16': image = tf.cast(image, dtype=tf.bfloat16) encoded_classes, encoded_boxes, num_matched_boxes = encode_labels( boxes, classes) # We transpose in dataloader instead of in the topology to save time encoded_classes, encoded_boxes = transpose_labels(encoded_classes, encoded_boxes) encoded_classes = tf.cast(encoded_classes, tf.int32) labels = { constants.NUM_MATCHED_BOXES: num_matched_boxes, constants.BOXES: encoded_boxes, constants.CLASSES: tf.squeeze(encoded_classes, axis=1), } # This is for dataloader visualization; actual model doesn't use this. if params['visualize_dataloader']: box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder( scale_factors=constants.BOX_CODER_SCALES) decoded_boxes = tf.expand_dims(box_coder.decode( rel_codes=tf.squeeze(encoded_boxes), anchors=box_list.BoxList( tf.convert_to_tensor(DefaultBoxes()('ltrb'))) ).get(), axis=0) labels['decoded_boxes'] = tf.squeeze(decoded_boxes) return image, labels else: image = tf.image.resize_images( image, size=(constants.IMAGE_SIZE, constants.IMAGE_SIZE)) # resize_image returns image of dtype float32 and does not change its # range. Divide by 255 to convert image to [0, 1] range. image /= 255. if params['dtype'] == 'bf16': image = tf.cast(image, dtype=tf.bfloat16) def trim_and_pad(inp_tensor, dim_1): """Limit the number of boxes, and pad if necessary.""" inp_tensor = inp_tensor[:constants.MAX_NUM_EVAL_BOXES] num_pad = constants.MAX_NUM_EVAL_BOXES - tf.shape(inp_tensor)[0] inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]]) return tf.reshape( inp_tensor, [constants.MAX_NUM_EVAL_BOXES, dim_1]) boxes, classes = trim_and_pad(boxes, 4), trim_and_pad(classes, 1) sample = { constants.IMAGE: image, constants.BOXES: boxes, constants.CLASSES: classes, constants.SOURCE_ID: tf.string_to_number(source_id, tf.int32), constants.RAW_SHAPE: raw_shape, } if not self._is_training and self._count > params['eval_samples']: sample[constants.IS_PADDED] = data[constants.IS_PADDED] return sample
def Evaluation(net, input, roi_box, target, weights, data_ids, checkpoint_dir, is_training=False): new_weights = np.zeros(weights.shape[0]) gt_classification = tf.argmax(target, dimension=1, name="gt_classification") logits = net._inference(input, roi_box, is_training) with tf.name_scope('calculation_accuracy'): I = tf.not_equal(net.pred, gt_classification) I = tf.cast(I, dtype=tf.float32) corresponding_weights = tf.gather(weights, data_ids) corresponding_weights = tf.reshape(corresponding_weights, [-1]) I = tf.reshape(I, [-1]) batch_err = tf.reduce_sum(tf.multiply(corresponding_weights, I)) # Numerator of Ek batch_weight = tf.reduce_sum( corresponding_weights) # denominator of Ek correct_prediction = tf.equal(net.pred, gt_classification) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) pred_result = tf.cast(correct_prediction, tf.int32) minus_yg = tf.add(tf.multiply(pred_result, -2), 1) minus_yg = tf.cast(minus_yg, tf.float32) new_weight = tf.multiply( corresponding_weights, tf.exp(minus_yg)) # w_{k-1,i}exp(-y_i*G_k(x_i)) saver = tf.train.Saver(max_to_keep=2) sess_init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) count = 0 w = 0 wI = 0 with tf.Session() as sess: sess.run(sess_init_op) saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir + '/')) # optimistic_restore(session=sess, save_file=checkpoint_dir + '/-5') print("model restored!") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: tp = 0 tn = 0 fp = 0 fn = 0 while True: a, b, pre, acc, pred, gt, im, la, bat_e, bat_w, bat_new_w, ids = sess.run( [ minus_yg, pred_result, correct_prediction, accuracy, net.pred, gt_classification, input, target, batch_err, batch_weight, new_weight, data_ids ]) wI += bat_e w += bat_w ids = np.reshape(ids, [-1]) count += len(ids) new_weights[ids] = bat_new_w progress = "evaluate %d/%d" % (count, len(new_weights)) sys.stdout.write('\r' + progress) for i in range(len(pred)): if pred[i] == gt[i] and pred[i] == 0: tn += 1 if pred[i] == gt[i] and pred[i] == 1: tp += 1 if pred[i] != gt[i] and pred[i] == 0: fn += 1 if pred[i] != gt[i] and pred[i] == 1: fp += 1 except tf.errors.OutOfRangeError: print('\nDone testing -- epoch limit reached') finally: coord.request_stop() coord.join(threads) print("tp, tn, fp, fn: %d, %d, %d, %d" % (tp, tn, fp, fn)) Ek = wI / w alpha = 0.5 * np.log((1 - Ek) / Ek) new_weights = np.exp(alpha) * new_weights return alpha, new_weights
def compute_knowledge_selection_and_loss(self, features, encoder_output, fact_embedding, fact_lengths, margin, num_negative_samples): """Compute knowledge selection and loss. Args: features: features. encoder_output: <tf.float32>[batch_size, input_length, hidden_dim] fact_embedding: <tf.float32>[batch_size*triple_num, max_triple_length, emb_dim] fact_lengths: # <tf.int32>[batch_size*triple_num] margin: integer value for max margin in TransE loss, num_negative_samples: shuffle and sample multiple negative examples for the TransE loss Returns: knowledge_weights: knowledge_loss: """ hparams = self._hparams encoder_output_shape = common_layers.shape_list(encoder_output) encoder_hidden_dim = encoder_output_shape[-1] inputs = features["inputs"] # <tf.float32>[batch_size, input_length, emb_dim] inputs = tf.squeeze(inputs, 2) # <tf.float32>[batch_size, input_length] context_padding = common_attention.embedding_to_padding(inputs) # <tf.float32>[batch_size] context_lens = tf.to_float( common_attention.padding_to_length(context_padding)) # <tf.float32>[batch_size, 1] context_lens = tf.expand_dims(context_lens, -1) # Compute context vector summary. # <tf.float32>[batch_size, hidden_dim] context_vector_summary = compute_summary_embedding( encoder_output, context_lens, hparams) knowledge_encoder_output = compute_average_embedding( fact_embedding, fact_lengths) # <tf.float32>[batch_size, triple_num, emb_dim] knowledge_encoder_output = tf.reshape( knowledge_encoder_output, [-1, self.triple_num, encoder_hidden_dim]) original_knowledge_encoder_output = knowledge_encoder_output if hparams.similarity_fuction == "dot_product": triple_logits = tf.squeeze( tf.matmul(knowledge_encoder_output, tf.expand_dims(context_vector_summary, 2)), -1) elif hparams.similarity_fuction == "bilinear": # Tile the context vector summary. # <tf.float32>[batch_size, triple_num*hidden_dim] tiled_context_vector = tf.tile(context_vector_summary, [1, self.triple_num]) # <tf.float32>[batch_size, triple_num, hidden_dim] context_vector = tf.reshape( tiled_context_vector, [-1, self.triple_num, encoder_hidden_dim]) # compute outer product context_vector = tf.expand_dims(context_vector, -1) knowledge_encoder_output = tf.expand_dims(knowledge_encoder_output, 2) # <tf.float32>[batch_size, triple_num, hidden_dim, hidden_dim] outer_product = tf.matmul(context_vector, knowledge_encoder_output) outer_product = tf.reshape( outer_product, [-1, self.triple_num, encoder_hidden_dim * encoder_hidden_dim]) triple_logits = tf.squeeze( tf.layers.dense(outer_product, 1, name="knolwedge_final_mlp"), -1) avg_triple_loss = 0.0 triple_labels = features["triple_labels"] subject_mask = tf.reshape( features["subject_mask"], [-1, self.triple_num, hparams.max_triple_length]) subject_mask = tf.reshape(subject_mask, [-1, hparams.max_triple_length]) predicate_mask = tf.reshape( features["predicate_mask"], [-1, self.triple_num, hparams.max_triple_length]) predicate_mask = tf.reshape(predicate_mask, [-1, hparams.max_triple_length]) object_mask = tf.reshape( features["object_mask"], [-1, self.triple_num, hparams.max_triple_length]) object_mask = tf.reshape(object_mask, [-1, hparams.max_triple_length]) # mask : [bs, max_seq_len, triple_num] # the below operation will result in [bs*triple_num,emb_dim] subject_length = tf.cast( tf.expand_dims(tf.reduce_sum(subject_mask, -1), 1), tf.float32) # [bs*tn] object_length = tf.cast( tf.expand_dims(tf.reduce_sum(object_mask, -1), 1), tf.float32) predicate_length = tf.cast( tf.expand_dims(tf.reduce_sum(predicate_mask, -1), 1), tf.float32) # expand dimension 2 to be able to broadcast subject_mask = tf.cast(tf.expand_dims(subject_mask, 2), tf.float32) predicate_mask = tf.cast(tf.expand_dims(predicate_mask, 2), tf.float32) object_mask = tf.cast(tf.expand_dims(object_mask, 2), tf.float32) subject_vect = tf.reduce_sum(tf.multiply( fact_embedding, subject_mask), 1) / ( subject_length + tf.broadcast_to(tf.constant([1e-5]), tf.shape(subject_length))) object_vect = tf.reduce_sum(tf.multiply( fact_embedding, object_mask), 1) / ( object_length + tf.broadcast_to(tf.constant([1e-5]), tf.shape(object_length))) predicate_vect = tf.reduce_sum( tf.multiply(fact_embedding, predicate_mask), 1) / (predicate_length + tf.broadcast_to( tf.constant([1e-5]), tf.shape(predicate_length))) # Shuffled rows to generate adversarial samples shuffled_subject_vect = [] shuffled_object_vect = [] for _ in range(num_negative_samples): shuffled_subject_vect += [ tf.gather( subject_vect, tf.random.shuffle(tf.range(tf.shape(subject_vect)[0]))) ] # [bs*tn,d] shuffled_object_vect += [ tf.gather( object_vect, tf.random.shuffle(tf.range(tf.shape(object_vect)[0]))) ] # [bs*tn,d] # KB pretraining loss positive_loss = tf.reduce_mean( tf.squared_difference(subject_vect + predicate_vect, object_vect)) negative_loss = 0 for n_adv in range(num_negative_samples): negative_loss += tf.reduce_mean( tf.squared_difference( shuffled_subject_vect[n_adv] + predicate_vect, object_vect)) negative_loss += tf.reduce_mean( tf.squared_difference(subject_vect + predicate_vect, shuffled_object_vect[n_adv])) # TransE Loss negative_loss = negative_loss / (2 * num_negative_samples) transe_loss = tf.clip_by_value(margin + positive_loss - negative_loss, clip_value_min=0, clip_value_max=100) if hparams.mode != tf.estimator.ModeKeys.PREDICT: triple_losses = tf.nn.weighted_cross_entropy_with_logits( labels=triple_labels, logits=triple_logits, pos_weight=hparams.pos_weight) avg_triple_loss = tf.reduce_mean(triple_losses) tf.summary.scalar("triple_loss", avg_triple_loss) return triple_logits, avg_triple_loss, original_knowledge_encoder_output, transe_loss
def __call__(self, location_losses, cls_losses, decoded_boxlist_list, match_list=None): """Computes localization and classification losses after hard mining. Args: location_losses: a float tensor of shape [num_images, num_anchors] representing anchorwise localization losses. cls_losses: a float tensor of shape [num_images, num_anchors] representing anchorwise classification losses. decoded_boxlist_list: a list of decoded BoxList representing location predictions for each image. match_list: an optional list of matcher.Match objects encoding the match between anchors and groundtruth boxes for each image of the batch, with rows of the Match objects corresponding to groundtruth boxes and columns corresponding to anchors. Match objects in match_list are used to reference which anchors are positive, negative or ignored. If self._max_negatives_per_positive exists, these are then used to enforce a prespecified negative to positive ratio. Returns: mined_location_loss: a float scalar with sum of localization losses from selected hard examples. mined_cls_loss: a float scalar with sum of classification losses from selected hard examples. Raises: ValueError: if location_losses, cls_losses and decoded_boxlist_list do not have compatible shapes (i.e., they must correspond to the same number of images). ValueError: if match_list is specified but its length does not match len(decoded_boxlist_list). """ mined_location_losses = [] mined_cls_losses = [] location_losses = tf.unstack(location_losses) cls_losses = tf.unstack(cls_losses) num_images = len(decoded_boxlist_list) if not match_list: match_list = num_images * [None] if not len(location_losses) == len(decoded_boxlist_list) == len( cls_losses): raise ValueError( 'location_losses, cls_losses and decoded_boxlist_list ' 'do not have compatible shapes.') if not isinstance(match_list, list): raise ValueError('match_list must be a list.') if len(match_list) != len(decoded_boxlist_list): raise ValueError('match_list must either be None or have ' 'length=len(decoded_boxlist_list).') num_positives_list = [] num_negatives_list = [] for ind, detection_boxlist in enumerate(decoded_boxlist_list): box_locations = detection_boxlist.get() match = match_list[ind] image_losses = cls_losses[ind] if self._loss_type == 'loc': image_losses = location_losses[ind] elif self._loss_type == 'both': image_losses *= self._cls_loss_weight image_losses += location_losses[ind] * self._loc_loss_weight if self._num_hard_examples is not None: num_hard_examples = self._num_hard_examples else: num_hard_examples = detection_boxlist.num_boxes() selected_indices = tf.image.non_max_suppression( box_locations, image_losses, num_hard_examples, self._iou_threshold) if self._max_negatives_per_positive is not None and match: (selected_indices, num_positives, num_negatives ) = self._subsample_selection_to_desired_neg_pos_ratio( selected_indices, match, self._max_negatives_per_positive, self._min_negatives_per_image) num_positives_list.append(num_positives) num_negatives_list.append(num_negatives) mined_location_losses.append( tf.reduce_sum(tf.gather(location_losses[ind], selected_indices))) mined_cls_losses.append( tf.reduce_sum(tf.gather(cls_losses[ind], selected_indices))) location_loss = tf.reduce_sum(tf.stack(mined_location_losses)) cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses)) if match and self._max_negatives_per_positive: self._num_positives_list = num_positives_list self._num_negatives_list = num_negatives_list return (location_loss, cls_loss)
f.close() df = pd.DataFrame({"highest": [v[0] for v in vectors_set], "lowest": [v[1] for v in vectors_set]}) sns.lmplot("highest", "lowest", data=df, fit_reg=False, height=6) plt.show() with tf.Session() as sess: vectors = tf.constant(vectors_set) k = 6 centroides = tf.Variable(tf.slice(tf.random_shuffle(vectors), [0, 0], [k, -1])) expanded_vectors = tf.expand_dims(vectors, 0) expanded_centroides = tf.expand_dims(centroides, 1) assignments = tf.argmin(tf.reduce_sum(tf.square(tf.subtract(expanded_vectors, expanded_centroides)), 2), 0) means = tf.concat([tf.reduce_mean(tf.gather(vectors, tf.reshape(tf.where(tf.equal(assignments, c)), [1, -1])), reduction_indices=[1]) for c in range(k)], axis=0) update_centroides = tf.assign(centroides, means) init_op = tf.initialize_all_variables() sess = tf.Session() sess.run(init_op) for step in range(100): _, centroid_values, assignments_values = sess.run([update_centroides, centroides, assignments]) data = {"highest": [], "lowest": [], "cluster": []} one = [] two = [] three = []
def GetTargetSpec( name, num_dims = 100, t_dof = 1.0, regression_dataset = "covertype", regression_num_points = 0, regression_normalize = False, regression_hier_type = "none", # none, centered, non_centered regression_beta_prior = "normal", # normal, student_t regression_type = "regular", # regular, gamma_scales regression_use_beta_scales = True, eig_source = "linear", batch_size = 0, regression_stochastic_points = 0, gamma_shape = 0.5, precomputed_stats_path = None, **kwargs): if name == "funnel": spec = TargetSpec( name=name, num_dims=num_dims, x_min=-4.0, x_max=4.0, y_min=-10.0, y_max=10.0, stats=None, bijector=None) def funnel_forward(x): shift = tf.zeros_like(x) log_scale = tf.concat( [tf.zeros_like(x[Ellipsis, :1]), tf.tile(x[Ellipsis, :1], [1, num_dims - 1])], -1) return shift, log_scale mg = tfd.MultivariateNormalDiag( loc=tf.zeros(num_dims), scale_identity_multiplier=1.0) target = tfd.TransformedDistribution( mg, bijector=tfb.MaskedAutoregressiveFlow(funnel_forward)) elif name == "ill_cond_gaussian": # For backwards compatibility with earlier experiments. spec = TargetSpec( name=name, num_dims=num_dims, x_min=-5.0, x_max=5.0, y_min=-5.0, y_max=5.0, stats=None, bijector=None) rng = np.random.RandomState(seed=10) diag_precisions = np.linspace(1., 1000., num_dims)**-1 q, _ = np.linalg.qr(rng.randn(num_dims, num_dims)) scg_prec = (q * diag_precisions).dot(q.T) scg_prec = scg_prec.astype(np.float32) scg_var = np.linalg.inv(scg_prec) / 1000.0 target = tfd.MultivariateNormalFullCovariance( loc=tf.zeros(num_dims), covariance_matrix=scg_var) elif name == "new_ill_cond_gaussian": spec = TargetSpec( name=name, num_dims=num_dims, x_min=-5.0, x_max=5.0, y_min=-5.0, y_max=5.0, stats=None, bijector=None) rng = np.random.RandomState(seed=10) if eig_source == "linear": eigenvalues = np.linspace(1., 1000., num_dims)**-1 elif eig_source == "gamma": eigenvalues = np.sort( rng.gamma(shape=gamma_shape, scale=1., size=num_dims)).astype(np.float32) q, _ = np.linalg.qr(rng.randn(num_dims, num_dims)) covariance = (q * eigenvalues**-1).dot(q.T).astype(np.float32) target = tfd.MultivariateNormalFullCovariance( loc=tf.zeros(num_dims), covariance_matrix=covariance) elif name == "ill_cond_t": # For backwards compatibility with earlier experiments. spec = TargetSpec( name=name, num_dims=num_dims, x_min=-10.0, x_max=10.0, y_min=-10.0, y_max=10.0, stats=None, bijector=None) rng = np.random.RandomState(seed=10) diag_precisions = np.linspace(1., 1000., num_dims)**-1 q, _ = np.linalg.qr(rng.randn(num_dims, num_dims)) scg_prec = (q * diag_precisions).dot(q.T) scg_prec = scg_prec.astype(np.float32) scg_var = np.linalg.inv(scg_prec) / 1000.0 scale = tf.linalg.LinearOperatorFullMatrix(scg_var) target = tfd.MultivariateStudentTLinearOperator( loc=tf.zeros(num_dims), scale=scale, df=t_dof) elif name == "new_ill_cond_t": spec = TargetSpec( name=name, num_dims=num_dims, x_min=-5.0, x_max=5.0, y_min=-5.0, y_max=5.0, stats=None, bijector=None) rng = np.random.RandomState(seed=10) if eig_source == "linear": eigenvalues = np.linspace(1., 1000., num_dims)**-1 elif eig_source == "gamma": eigenvalues = np.sort(rng.gamma(shape=0.5, scale=1., size=num_dims)).astype(np.float32) q, _ = np.linalg.qr(rng.randn(num_dims, num_dims)) covariance = (q * eigenvalues**-1).dot(q.T).astype(np.float32) scale = tf.linalg.LinearOperatorFullMatrix(covariance) target = tfd.MultivariateStudentTLinearOperator( loc=tf.zeros(num_dims), scale=scale, df=t_dof) elif name == "logistic_reg": if regression_hier_type == "none": extra_dims = 0 else: extra_dims = 2 if regression_dataset == "covertype": x, y = utils.LoadCovertype() if regression_num_points > 0: rng = np.random.RandomState(seed=10) chosen_rows = rng.choice( x.shape[0], regression_num_points, replace=False) x = x[chosen_rows] y = y[chosen_rows] num_features = x.shape[-1] + 1 num_classes = 7 num_dims = num_features * num_classes + extra_dims x = tf.to_float(x) y = tf.to_int32(y) elif regression_dataset == "german": x, y = utils.LoadGerman() num_features = int(x.shape[-1]) + 1 num_classes = 2 num_dims = num_features * num_classes + extra_dims x = tf.to_float(x) y = tf.to_int32(y) if regression_num_points > 0: rng = np.random.RandomState(seed=10) chosen_rows = rng.choice( x.shape[0], regression_num_points, replace=False) x = tf.gather(x, chosen_rows) y = tf.gather(y, chosen_rows) if regression_stochastic_points > 0: chosen_rows = tf.random.uniform([int(regression_stochastic_points)], 0, int(x.shape[0]), dtype=tf.int32) x = tf.gather(x, chosen_rows) y = tf.gather(y, chosen_rows) if regression_normalize: x_min = tf.reduce_min(x, 0, keep_dims=True) x_max = tf.reduce_max(x, 0, keep_dims=True) x /= (x_max - x_min) x = 2.0 * x - 1.0 x = tf.concat([x, tf.ones([int(x.shape[0]), 1])], -1) def regular_log_prob_fn(params): if regression_hier_type == "none": beta = params beta_scaled = beta elif regression_hier_type == "centered": mu_0 = params[Ellipsis, -1] tau_0 = tf.nn.softplus(params[Ellipsis, -2]) beta = params[Ellipsis, :-2] beta_scaled = beta elif regression_hier_type == "non_centered": mu_0 = params[Ellipsis, -1] tau_0 = tf.nn.softplus(params[Ellipsis, -2]) beta = params[Ellipsis, :-2] beta_scaled = beta / tf.expand_dims(tau_0, -1) + tf.expand_dims( mu_0, -1) else: raise ValueError("Unknown regression_hier_type:" + regression_hier_type) if batch_size: def body(_, i): y_dist = tfd.Categorical( logits=tf.einsum( "ij,kjm->kim", x[i:i + batch_size], tf.reshape(beta_scaled, [-1, num_features, num_classes]))) return tf.reduce_sum(y_dist.log_prob(y[i:i + batch_size]), -1) log_prob = tf.reduce_sum( tf.scan( body, tf.range(0, x.shape[0], batch_size), initializer=tf.zeros(tf.shape(params)[:1]), parallel_iterations=1), 0) else: y_dist = tfd.Categorical( logits=tf.einsum( "ij,kjm->kim", x, tf.reshape(beta_scaled, [-1, num_features, num_classes]))) log_prob = tf.reduce_sum(y_dist.log_prob(y), -1) def make_beta_dist(loc, scale): if regression_beta_prior == "normal": return tfd.Normal(loc=loc, scale=scale) else: if tf.convert_to_tensor(loc).shape.ndims == 0: loc = tf.fill( tf.stack([tf.shape(params)[0], num_features * num_classes]), loc) if tf.convert_to_tensor(scale).shape.ndims == 0: scale = tf.fill( tf.stack([tf.shape(params)[0], num_features * num_classes]), scale) scale = tf.linalg.LinearOperatorDiag(scale) return tfd.MultivariateStudentTLinearOperator( loc=loc, scale=scale, df=t_dof) if regression_hier_type == "none": beta_dist = make_beta_dist(loc=0.0, scale=10.0) else: mu_0_dist = tfd.Normal(loc=0.0, scale=10.0) tau_0_dist = tfd.Gamma(2.0, 1.0) log_prob += mu_0_dist.log_prob(mu_0) + tau_0_dist.log_prob(tau_0) if regression_hier_type == "centered": mu_0 = tf.tile( tf.expand_dims(mu_0, -1), [1, num_features * num_classes]) tau_0 = tf.tile( tf.expand_dims(tau_0, -1), [1, num_features * num_classes]) beta_dist = make_beta_dist(loc=mu_0, scale=1.0 / tau_0) elif regression_hier_type == "non_centered": beta_dist = make_beta_dist(loc=0.0, scale=1.0) log_prob += tf.reduce_sum(beta_dist.log_prob(beta), -1) return log_prob def gamma_scales_log_prob_fn(params): assert num_classes == 2 def unmarshal(params): results = [] n_dimensions_used = 0 if regression_use_beta_scales: dim_list = [num_features, num_features, 1] else: dim_list = [num_features, 1] for n_to_add in dim_list: results.append( params[Ellipsis, n_dimensions_used:n_dimensions_used + n_to_add]) n_dimensions_used += n_to_add return tuple(results) log_prob = 0. if regression_use_beta_scales: beta, beta_log_scales, overall_log_scale = unmarshal(params) # p(per-variable scales) log_prob += tf.reduce_sum( tfd.TransformedDistribution( tfd.Gamma(0.5, 0.5), tfb.Invert(tfb.Exp())).log_prob(beta_log_scales), -1) else: beta, overall_log_scale = unmarshal(params) beta_log_scales = 0.0 # p(overall scale) log_prob += tf.reduce_sum( tfd.Normal(0., 10.).log_prob(overall_log_scale), -1) # p(beta) log_prob += tf.reduce_sum(tfd.Normal(0., 1.).log_prob(beta), -1) # p(y | x, beta) scaled_beta = beta * tf.exp(overall_log_scale) * tf.exp(beta_log_scales) if batch_size: def body(_, i): logits = tf.einsum("nd,md->mn", x[i:i + batch_size], scaled_beta) return tf.reduce_sum( tfd.Bernoulli(logits=logits).log_prob(y[i:i + batch_size]), -1) log_prob += tf.reduce_sum( tf.scan( body, tf.range(0, x.shape[0], batch_size), initializer=tf.zeros(tf.shape(params)[:1]), parallel_iterations=1), 0) else: logits = tf.einsum("nd,md->mn", x, scaled_beta) log_prob += tf.reduce_sum(tfd.Bernoulli(logits=logits).log_prob(y), -1) return log_prob def horseshoe_log_prob_fn(params): assert num_classes == 2 (z, r1_local, r2_local, r1_global, r2_global) = tf.split( params, [num_features, num_features, num_features, 1, 1], axis=-1) def indep(d): return tfd.Independent(d, 1) zero = tf.zeros(num_features) one = tf.ones(num_features) half = 0.5 * one p_z = indep(tfd.Normal(zero, one)) p_r1_local = indep(tfd.HalfNormal(one)) p_r2_local = indep(tfd.InverseGamma(half, half)) p_r1_global = indep(tfd.HalfNormal([1.])) p_r2_global = indep(tfd.InverseGamma([0.5], [0.5])) log_prob = ( p_z.log_prob(z) + p_r1_local.log_prob(r1_local) + p_r2_local.log_prob(r2_local) + p_r1_global.log_prob(r1_global) + p_r2_global.log_prob(r2_global)) lambda_ = r1_local * tf.sqrt(r2_local) tau = r1_global * tf.sqrt(r2_global) beta = z * lambda_ * tau if batch_size: def body(_, i): logits = tf.einsum("nd,md->mn", x[i:i + batch_size], beta) return tfd.Independen(tfd.Bernoulli(logits=logits), 1).log_prob(y[i:i + batch_size]) log_prob += tf.reduce_sum( tf.scan( body, tf.range(0, x.shape[0], batch_size), initializer=tf.zeros(tf.shape(params)[:1]), parallel_iterations=1), 0) else: logits = tf.einsum("nd,md->mn", x, beta) log_prob += tfd.Independent(tfd.Bernoulli(logits=logits), 1).log_prob(y) return log_prob def gamma_scales2_log_prob_fn(params): assert num_classes == 2 (z, local_scale, global_scale) = tf.split( params, [num_features, num_features, 1], axis=-1) def indep(d): return tfd.Independent(d, 1) zero = tf.zeros(num_features) one = tf.ones(num_features) half = 0.5 * one p_z = indep(tfd.Normal(zero, one)) p_local_scale = indep(tfd.Gamma(half, half)) p_global_scale = indep(tfd.Gamma([0.5], [0.5])) log_prob = ( p_z.log_prob(z) + p_local_scale.log_prob(local_scale) + p_global_scale.log_prob(global_scale)) beta = z * local_scale * global_scale if batch_size: def body(_, i): logits = tf.einsum("nd,md->mn", x[i:i + batch_size], beta) return tfd.Independen(tfd.Bernoulli(logits=logits), 1).log_prob(y[i:i + batch_size]) log_prob += tf.reduce_sum( tf.scan( body, tf.range(0, x.shape[0], batch_size), initializer=tf.zeros(tf.shape(params)[:1]), parallel_iterations=1), 0) else: logits = tf.einsum("nd,md->mn", x, beta) log_prob += tfd.Independent(tfd.Bernoulli(logits=logits), 1).log_prob(y) return log_prob bijector = None if regression_type == "regular": log_prob_fn = regular_log_prob_fn elif regression_type == "gamma_scales": log_prob_fn = gamma_scales_log_prob_fn num_dims = num_features + 1 if regression_use_beta_scales: num_dims += num_features elif regression_type == "horseshoe": log_prob_fn = horseshoe_log_prob_fn num_dims = num_features * 3 + 2 bijector = tfb.Blockwise([tfb.Identity(), tfb.Exp()], [num_features, num_features * 2 + 2]) elif regression_type == "gamma_scales2": log_prob_fn = gamma_scales2_log_prob_fn num_dims = num_features * 2 + 1 bijector = tfb.Blockwise([tfb.Identity(), tfb.Exp()], [num_features, num_features + 1]) target = utils.LogProbDist(num_dims=num_dims, log_prob_fn=log_prob_fn) spec = TargetSpec( name=name, num_dims=num_dims, x_min=0.10, x_max=0.15, y_min=0.10, y_max=0.15, stats=None, bijector=bijector) elif name == "mog": comp_1 = tfd.MultivariateNormalDiag( loc=[-1., 1.] + [0.] * (num_dims - 2), scale_identity_multiplier=2.) comp_2 = tfd.MultivariateNormalDiag( loc=[1., 1.] + [0.] * (num_dims - 2), scale_identity_multiplier=4.) comp_3 = tfd.MultivariateNormalDiag( loc=[0., 0.] + [0.] * (num_dims - 2), scale_identity_multiplier=2.) cat = tfd.Categorical(logits=[0] * 3) target = tfd.Mixture(cat=cat, components=[comp_1, comp_2, comp_3]) spec = TargetSpec( name=name, num_dims=num_dims, x_min=-2., x_max=2., y_min=-2., y_max=2., stats=None, bijector=None) elif name == "easy_gaussian": spec = TargetSpec( name=name, num_dims=num_dims, x_min=-5.0, x_max=5.0, y_min=-5.0, y_max=5.0, stats=None, bijector=None) rng = np.random.RandomState(seed=10) eigenvalues = np.linspace(0.5, 2., num_dims)**-1 q, _ = np.linalg.qr(rng.randn(num_dims, num_dims)) covariance = (q * eigenvalues**-1).dot(q.T).astype(np.float32) target = tfd.MultivariateNormalFullCovariance( loc=tf.zeros(num_dims), covariance_matrix=covariance) elif name == "gp_reg": x, y = utils.LoadCloud() if regression_num_points > 0: rng = np.random.RandomState(seed=10) chosen_rows = rng.choice( x.shape[0], regression_num_points, replace=False) x = x[chosen_rows] y = y[chosen_rows] x = tf.convert_to_tensor(x, dtype=tf.float32) y = tf.convert_to_tensor(y, dtype=tf.float32) num_features = int(x.shape[-1]) num_dims = num_features + 2 def log_prob_fn(params): rho, alpha, sigma = tf.split(params, [num_features, 1, 1], -1) one = tf.ones(num_features) def indep(d): return tfd.Independent(d, 1) p_rho = indep(tfd.InverseGamma(5. * one, 5. * one)) p_alpha = indep(tfd.HalfNormal([1.])) p_sigma = indep(tfd.HalfNormal([1.])) rho_shape = tf.shape(rho) alpha_shape = tf.shape(alpha) x1 = tf.expand_dims(x, -2) x2 = tf.expand_dims(x, -3) exp = -0.5 * tf.squared_difference(x1, x2) exp /= tf.reshape(tf.square(rho), tf.concat([rho_shape[:1], [1, 1], rho_shape[1:]], 0)) exp = tf.reduce_sum(exp, -1, keep_dims=True) exp += 2. * tf.reshape(tf.log(alpha), tf.concat([alpha_shape[:1], [1, 1], alpha_shape[1:]], 0)) exp = tf.exp(exp[Ellipsis, 0]) exp += tf.matrix_diag(tf.tile(tf.square(sigma), [1, int(x.shape[0])]) + 1e-6) exp = tf.check_numerics(exp, "exp 2 has NaNs") with tf.control_dependencies([tf.print(exp[0], summarize=99999)]): exp = tf.identity(exp) p_y = tfd.MultivariateNormalFullCovariance( covariance_matrix=exp) log_prob = ( p_rho.log_prob(rho) + p_alpha.log_prob(alpha) + p_sigma.log_prob(sigma) + p_y.log_prob(y)) return log_prob bijector = tfb.Softplus()#tfb.Exp() target = utils.LogProbDist(num_dims=num_dims, log_prob_fn=log_prob_fn) spec = TargetSpec( name=name, num_dims=num_dims, x_min=0.10, x_max=0.15, y_min=0.10, y_max=0.15, stats=None, bijector=bijector) if precomputed_stats_path is not None: with tf.gfile.Open(precomputed_stats_path) as f: stats = simplejson.load(f) stats = {k: np.array(v) for k, v in stats.items()} spec = spec._replace(stats=stats) return target, spec._replace(**kwargs)
def fn(args): y, index = args return tf.gather(y, index)
def get_predictions_and_loss(self, inputs): tokens, context_word_emb, lm_emb, char_index, text_len, is_training, gold_labels = inputs self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(tokens)[0] max_sentence_length = tf.shape(tokens)[1] context_emb_list = [] context_emb_list.append(context_word_emb) char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) if self.lm_file is not None: # Only add these layers if we're using contextualized embeddings lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape(lm_emb, [ num_sentences * max_sentence_length * lm_emb_size, lm_num_layers ]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] candidate_scores_mask = tf.logical_and( tf.expand_dims(text_len_mask, [1]), tf.expand_dims( text_len_mask, [2])) #[num_sentence, max_sentence_length,max_sentence_length] sentence_ends_leq_starts = tf.tile( tf.expand_dims( tf.logical_not( tf.sequence_mask(tf.range(max_sentence_length), max_sentence_length)), 0), [num_sentences, 1, 1 ]) #[num_sentence, max_sentence_length,max_sentence_length] candidate_scores_mask = tf.logical_and(candidate_scores_mask, sentence_ends_leq_starts) flattened_candidate_scores_mask = tf.reshape( candidate_scores_mask, [-1]) #[num_sentence * max_sentence_length * max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, self.lstm_dropout) # [num_sentence, max_sentence_length, emb] with tf.variable_scope("candidate_starts_ffnn"): candidate_starts_emb = util.projection( context_outputs, self.config["ffnn_size"] ) #[num_sentences, max_sentences_length,emb] with tf.variable_scope("candidate_ends_ffnn"): candidate_ends_emb = util.projection( context_outputs, self.config["ffnn_size"] ) #[num_sentences, max_sentences_length, emb] candidate_ner_scores = util.bilinear_classifier( candidate_starts_emb, candidate_ends_emb, self.dropout, output_size=self.num_types + 1 ) #[num_sentence, max_sentence_length,max_sentence_length,types+1] candidate_ner_scores = tf.boolean_mask( tf.reshape(candidate_ner_scores, [-1, self.num_types + 1]), flattened_candidate_scores_mask) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=gold_labels, logits=candidate_ner_scores) loss = tf.reduce_sum(loss) return candidate_ner_scores, loss
def do_on_tensor(a): n_feat = a.shape.as_list()[1] n_active = tf.size(grad_idx) reshaped = tf.reshape(a, [n_words, n_word_feat, n_feat]) sub_reshaped = tf.gather(reshaped, grad_idx) return tf.reshape(sub_reshaped, [n_active * n_word_feat, n_feat])
def body_rank_update(self, log_weights, log_likelihood, log_likelihood_tilde, jump_chains, jump_chain_tensor, core, leafnode_num_record, left_branches, right_branches, v_minus, potentials, r): """ Define tensors for log_weights, log_likelihood, jump_chain_tensor and core (state data for distribution over characters for ancestral taxa) by iterating over rank events. """ # Resample log_likelihood_tilde, core, leafnode_num_record, jump_chains, jump_chain_tensor = tf.cond( r > 0, lambda: self.cond_true_resample(log_likelihood_tilde, core, leafnode_num_record, log_weights, log_likelihood, jump_chains, jump_chain_tensor, r), lambda: self.cond_false_resample(log_likelihood_tilde, core, leafnode_num_record, log_weights, log_likelihood, jump_chains, jump_chain_tensor, r)) # Twist the proposal potentials, map_to_indices, l_br, r_br = self.compute_potentials(r, core, leafnode_num_record) # Extend partial states coalesced_indices, remaining_indices, q_log_proposal, l_br, r_br, jump_chain_tensor = \ self.extend_partial_state(jump_chain_tensor, potentials, map_to_indices, l_br, r_br, r) # Branch lengths left_branches = tf.concat([left_branches, [l_br]], axis=0) right_branches = tf.concat([right_branches, [r_br]], axis=0) # Update partial set data remaining_core = gather_across_core(core, remaining_indices, self.N-r, self.N-r-2, self.A) # Kx(N-r-2)xSxA l_coalesced_indices = tf.reshape(tf.gather(tf.transpose(coalesced_indices), 0), (self.K, 1)) r_coalesced_indices = tf.reshape(tf.gather(tf.transpose(coalesced_indices), 1), (self.K, 1)) l_data_KxSxA = tf.squeeze(gather_across_core(core, l_coalesced_indices, self.N-r, 1, self.A)) r_data_KxSxA = tf.squeeze(gather_across_core(core, r_coalesced_indices, self.N-r, 1, self.A)) new_mtx_KxSxA = self.broadcast_conditional_likelihood_K(l_data_KxSxA, r_data_KxSxA, l_br, r_br) new_mtx_Kx1xSxA = tf.expand_dims(new_mtx_KxSxA, axis=1) core = tf.concat([remaining_core, new_mtx_Kx1xSxA], axis=1) # Kx(N-r-1)xSxA reamining_leafnode_num_record = gather_across_2d(leafnode_num_record, remaining_indices, self.N-r, self.N-r-2) new_leafnode_num = tf.expand_dims(tf.reduce_sum(gather_across_2d( leafnode_num_record, coalesced_indices, self.N-r, 2), axis=1), axis=1) leafnode_num_record = tf.concat([reamining_leafnode_num_record, new_leafnode_num], axis=1) # Comptue weights log_likelihood_r = self.compute_forest_posterior(core, leafnode_num_record, r) left_branches_param_r = tf.gather(self.left_branches_param, r) right_branches_param_r = tf.gather(self.right_branches_param, r) left_branches_select = tf.gather(left_branches, tf.range(1, r+2)) # (r+1)xK right_branches_select = tf.gather(right_branches, tf.range(1, r+2)) # (r+1)xK left_branches_logprior = tf.reduce_sum( -left_branches_param_r * left_branches_select + tf.log(left_branches_param_r), axis=0) right_branches_logprior = tf.reduce_sum( -right_branches_param_r * right_branches_select + tf.log(right_branches_param_r), axis=0) log_likelihood_r = log_likelihood_r + left_branches_logprior + right_branches_logprior v_minus = self.overcounting_correct(leafnode_num_record) l_branch = tf.gather(left_branches, r+1) r_branch = tf.gather(right_branches, r+1) log_weights_r = log_likelihood_r - log_likelihood_tilde - \ (tf.log(left_branches_param_r) - left_branches_param_r * l_branch + tf.log(right_branches_param_r) - \ right_branches_param_r * r_branch) + tf.log(tf.cast(v_minus, tf.float64)) - q_log_proposal log_weights = tf.concat([log_weights, [log_weights_r]], axis=0) log_likelihood = tf.concat([log_likelihood, [log_likelihood_r]], axis=0) # pi(t) = pi(Y|t, b, theta) * pi(t, b|theta) / pi(Y) r = r + 1 return log_weights, log_likelihood, log_likelihood_tilde, jump_chains, jump_chain_tensor, \ core, leafnode_num_record, left_branches, right_branches, v_minus, potentials, r
def get_2D_slice(mat, indices): mat_shape = tf.shape(mat) n_rows, n_cols = mat_shape[0], mat_shape[1] ind_mul = tf.range(n_rows) mat_flat = tf.reshape(mat, [-1]) return tf.gather(mat_flat, ind_mul*n_cols + indices)
def main(unused_argv=None): with tf.Graph().as_default(): # Force all input processing onto CPU in order to reserve the GPU for the # forward inference and back-propagation. device = '/cpu:0' if not FLAGS.ps_tasks else '/job:worker/cpu:0' with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks, worker_device=device)): inputs, _ = image_utils.imagenet_inputs(FLAGS.batch_size, FLAGS.image_size) # Load style images and select one at random (for each graph execution, a # new random selection occurs) _, style_labels, style_gram_matrices = image_utils.style_image_inputs( os.path.expanduser(FLAGS.style_dataset_file), batch_size=FLAGS.batch_size, image_size=FLAGS.image_size, square_crop=True, shuffle=True) with tf.device(tf.train.replica_device_setter(FLAGS.ps_tasks)): # Process style and weight flags num_styles = FLAGS.num_styles if FLAGS.style_coefficients is None: style_coefficients = [1.0 for _ in range(num_styles)] else: style_coefficients = ast.literal_eval(FLAGS.style_coefficients) if len(style_coefficients) != num_styles: raise ValueError( 'number of style coefficients differs from number of styles') content_weights = ast.literal_eval(FLAGS.content_weights) style_weights = ast.literal_eval(FLAGS.style_weights) # Rescale style weights dynamically based on the current style image style_coefficient = tf.gather( tf.constant(style_coefficients), style_labels) style_weights = dict((key, style_coefficient * value) for key, value in style_weights.items()) # Define the model stylized_inputs = model.transform( inputs, alpha=FLAGS.alpha, normalizer_params={ 'labels': style_labels, 'num_categories': num_styles, 'center': True, 'scale': True }) # Compute losses. total_loss, loss_dict = learning.total_loss( inputs, stylized_inputs, style_gram_matrices, content_weights, style_weights) for key, value in loss_dict.items(): tf.summary.scalar(key, value) instance_norm_vars = [var for var in slim.get_variables('transformer') if 'InstanceNorm' in var.name] other_vars = [var for var in slim.get_variables('transformer') if 'InstanceNorm' not in var.name] # Function to restore VGG16 parameters. init_fn_vgg = slim.assign_from_checkpoint_fn(vgg.checkpoint_file(), slim.get_variables('vgg_16')) # Function to restore N-styles parameters. init_fn_n_styles = slim.assign_from_checkpoint_fn( os.path.expanduser(FLAGS.checkpoint), other_vars) def init_fn(session): init_fn_vgg(session) init_fn_n_styles(session) # Set up training. optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) train_op = slim.learning.create_train_op( total_loss, optimizer, clip_gradient_norm=FLAGS.clip_gradient_norm, variables_to_train=instance_norm_vars, summarize_gradients=False) # Run training. slim.learning.train( train_op=train_op, logdir=os.path.expanduser(FLAGS.train_dir), master=FLAGS.master, is_chief=FLAGS.task == 0, number_of_steps=FLAGS.train_steps, init_fn=init_fn, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def _generate_detections_per_image(boxes, scores, max_total_size=100, nms_iou_threshold=0.3, score_threshold=0.05, pre_nms_num_boxes=5000): """Generate the final detections per image given the model outputs. Args: boxes: a tensor with shape [N, num_classes, 4] or [N, 1, 4], which box predictions on all feature levels. The N is the number of total anchors on all levels. scores: a tensor with shape [N, num_classes], which stacks class probability on all feature levels. The N is the number of total anchors on all levels. The num_classes is the number of classes predicted by the model. Note that the class_outputs here is the raw score. max_total_size: a scalar representing maximum number of boxes retained over all classes. nms_iou_threshold: a float representing the threshold for deciding whether boxes overlap too much with respect to IOU. score_threshold: a float representing the threshold for deciding when to remove boxes based on score. pre_nms_num_boxes: an int number of top candidate detections per class before NMS. Returns: nmsed_boxes: `float` Tensor of shape [max_total_size, 4] representing top detected boxes in [y1, x1, y2, x2]. nmsed_scores: `float` Tensor of shape [max_total_size] representing sorted confidence scores for detected boxes. The values are between [0, 1]. nmsed_classes: `int` Tensor of shape [max_total_size] representing classes for detected boxes. valid_detections: `int` Tensor of shape [1] only the top `valid_detections` boxes are valid detections. """ nmsed_boxes = [] nmsed_scores = [] nmsed_classes = [] num_classes_for_box = boxes.get_shape().as_list()[1] num_classes = scores.get_shape().as_list()[1] for i in range(num_classes): boxes_i = boxes[:, min(num_classes_for_box - 1, i)] scores_i = scores[:, i] # Obtains pre_nms_num_boxes before running NMS. scores_i, indices = tf.nn.top_k( scores_i, k=tf.minimum(tf.shape(scores_i)[-1], pre_nms_num_boxes)) boxes_i = tf.gather(boxes_i, indices) (nmsed_indices_i, nmsed_num_valid_i) = tf.image.non_max_suppression_padded( tf.cast(boxes_i, tf.float32), tf.cast(scores_i, tf.float32), max_total_size, iou_threshold=nms_iou_threshold, score_threshold=score_threshold, pad_to_max_output_size=True, name='nms_detections_' + str(i)) nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i) nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i) # Sets scores of invalid boxes to -1. nmsed_scores_i = tf.where( tf.less(tf.range(max_total_size), [nmsed_num_valid_i]), nmsed_scores_i, -tf.ones_like(nmsed_scores_i)) nmsed_classes_i = tf.fill([max_total_size], i) nmsed_boxes.append(nmsed_boxes_i) nmsed_scores.append(nmsed_scores_i) nmsed_classes.append(nmsed_classes_i) # Concats results from all classes and sort them. nmsed_boxes = tf.concat(nmsed_boxes, axis=0) nmsed_scores = tf.concat(nmsed_scores, axis=0) nmsed_classes = tf.concat(nmsed_classes, axis=0) nmsed_scores, indices = tf.nn.top_k( nmsed_scores, k=max_total_size, sorted=True) nmsed_boxes = tf.gather(nmsed_boxes, indices) nmsed_classes = tf.gather(nmsed_classes, indices) valid_detections = tf.reduce_sum( tf.cast(tf.greater(nmsed_scores, -1), tf.int32)) return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
def _parse_train_data(self, data): """Parse data for ShapeMask training.""" classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] masks = data['groundtruth_instance_masks'] is_crowds = data['groundtruth_is_crowd'] # Skips annotations with `is_crowd` = True. if self._skip_crowd_during_training and self._is_training: num_groundtrtuhs = tf.shape(classes)[0] with tf.control_dependencies([num_groundtrtuhs, is_crowds]): indices = tf.cond( tf.greater(tf.size(is_crowds), 0), lambda: tf.where(tf.logical_not(is_crowds))[:, 0], lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64)) classes = tf.gather(classes, indices) boxes = tf.gather(boxes, indices) masks = tf.gather(masks, indices) # Gets original image and its size. image = data['image'] image_shape = tf.shape(image)[0:2] # If not using category, makes all categories with id = 0. if not self._use_category: classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32) # Normalizes image with mean and std pixel values. image = input_utils.normalize_image(image) # Flips image randomly during training. if self._aug_rand_hflip: image, boxes, masks = input_utils.random_horizontal_flip( image, boxes, masks) # Converts boxes from normalized coordinates to pixel coordinates. boxes = box_utils.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = input_utils.resize_and_crop_image( image, self._output_size, self._output_size, aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) image_scale = image_info[2, :] offset = image_info[3, :] # Resizes and crops boxes and masks. boxes = input_utils.resize_and_crop_boxes( boxes, image_scale, self._output_size, offset) # Filters out ground truth boxes that are all zeros. indices = box_utils.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) masks = tf.gather(masks, indices) # Assigns anchors. input_anchor = anchor.Anchor( self._min_level, self._max_level, self._num_scales, self._aspect_ratios, self._anchor_size, self._output_size) anchor_labeler = anchor.AnchorLabeler( input_anchor, self._match_threshold, self._unmatched_threshold) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) # Sample groundtruth masks/boxes/classes for mask branch. num_masks = tf.shape(masks)[0] mask_shape = tf.shape(masks)[1:3] # Pad sampled boxes/masks/classes to a constant batch size. padded_boxes = input_utils.pad_to_fixed_size(boxes, self._num_sampled_masks) padded_classes = input_utils.pad_to_fixed_size( classes, self._num_sampled_masks) padded_masks = input_utils.pad_to_fixed_size(masks, self._num_sampled_masks) # Randomly sample groundtruth masks for mask branch training. For the image # without groundtruth masks, it will sample the dummy padded tensors. rand_indices = tf.random.shuffle( tf.range(tf.maximum(num_masks, self._num_sampled_masks))) rand_indices = tf.mod(rand_indices, tf.maximum(num_masks, 1)) rand_indices = rand_indices[0:self._num_sampled_masks] rand_indices = tf.reshape(rand_indices, [self._num_sampled_masks]) sampled_boxes = tf.gather(padded_boxes, rand_indices) sampled_classes = tf.gather(padded_classes, rand_indices) sampled_masks = tf.gather(padded_masks, rand_indices) # Jitter the sampled boxes to mimic the noisy detections. sampled_boxes = box_utils.jitter_boxes( sampled_boxes, noise_scale=self._box_jitter_scale) sampled_boxes = box_utils.clip_boxes(sampled_boxes, self._output_size) # Compute mask targets in feature crop. A feature crop fully contains a # sampled box. mask_outer_boxes = box_utils.compute_outer_boxes( sampled_boxes, tf.shape(image)[0:2], scale=self._outer_box_scale) mask_outer_boxes = box_utils.clip_boxes(mask_outer_boxes, self._output_size) # Compensate the offset of mask_outer_boxes to map it back to original image # scale. mask_outer_boxes_ori = mask_outer_boxes mask_outer_boxes_ori += tf.tile(tf.expand_dims(offset, axis=0), [1, 2]) mask_outer_boxes_ori /= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2]) norm_mask_outer_boxes_ori = box_utils.normalize_boxes( mask_outer_boxes_ori, mask_shape) # Set sampled_masks shape to [batch_size, height, width, 1]. sampled_masks = tf.cast(tf.expand_dims(sampled_masks, axis=-1), tf.float32) mask_targets = tf.image.crop_and_resize( sampled_masks, norm_mask_outer_boxes_ori, box_ind=tf.range(self._num_sampled_masks), crop_size=[self._mask_crop_size, self._mask_crop_size], method='bilinear', extrapolation_value=0, name='train_mask_targets') mask_targets = tf.where(tf.greater_equal(mask_targets, 0.5), tf.ones_like(mask_targets), tf.zeros_like(mask_targets)) mask_targets = tf.squeeze(mask_targets, axis=-1) if self._up_sample_factor > 1: fine_mask_targets = tf.image.crop_and_resize( sampled_masks, norm_mask_outer_boxes_ori, box_ind=tf.range(self._num_sampled_masks), crop_size=[self._mask_crop_size * self._up_sample_factor, self._mask_crop_size * self._up_sample_factor], method='bilinear', extrapolation_value=0, name='train_mask_targets') fine_mask_targets = tf.where( tf.greater_equal(fine_mask_targets, 0.5), tf.ones_like(fine_mask_targets), tf.zeros_like(fine_mask_targets)) fine_mask_targets = tf.squeeze(fine_mask_targets, axis=-1) else: fine_mask_targets = mask_targets # If bfloat16 is used, casts input image to tf.bfloat16. if self._use_bfloat16: image = tf.cast(image, dtype=tf.bfloat16) valid_image = tf.cast(tf.not_equal(num_masks, 0), tf.int32) if self._mask_train_class == 'all': mask_is_valid = valid_image * tf.ones_like(sampled_classes, tf.int32) else: # Get the intersection of sampled classes with training splits. mask_valid_classes = tf.cast( tf.expand_dims( class_utils.coco_split_class_ids(self._mask_train_class), 1), sampled_classes.dtype) match = tf.reduce_any(tf.equal( tf.expand_dims(sampled_classes, 0), mask_valid_classes), 0) mask_is_valid = valid_image * tf.cast(match, tf.int32) # Packs labels for model_fn outputs. labels = { 'cls_targets': cls_targets, 'box_targets': box_targets, 'anchor_boxes': input_anchor.multilevel_boxes, 'num_positives': num_positives, 'image_info': image_info, # For ShapeMask. 'mask_boxes': sampled_boxes, 'mask_outer_boxes': mask_outer_boxes, 'mask_targets': mask_targets, 'fine_mask_targets': fine_mask_targets, 'mask_classes': sampled_classes, 'mask_is_valid': mask_is_valid, } return image, labels
def _enas_layer(self, layer_id, prev_layers, arc, out_filters): """ Args: layer_id: current layer prev_layers: cache of previous layers. for skip connections start_idx: where to start looking at. technically, we can infer this from layer_id, but why bother... """ assert len(prev_layers) == 2, "need exactly 2 inputs" layers = [prev_layers[0], prev_layers[1]] layers = self._maybe_calibrate_size(layers, out_filters, is_training=True) used = [] for cell_id in range(self.num_cells): prev_layers = tf.stack(layers, axis=0) with tf.variable_scope("cell_{0}".format(cell_id)): with tf.variable_scope("x"): x_id = arc[4 * cell_id] x_op = arc[4 * cell_id + 1] x = prev_layers[x_id, :, :, :, :] x = self._enas_cell(x, cell_id, x_id, x_op, out_filters) x_used = tf.one_hot(x_id, depth=self.num_cells + 2, dtype=tf.int32) with tf.variable_scope("y"): y_id = arc[4 * cell_id + 2] y_op = arc[4 * cell_id + 3] y = prev_layers[y_id, :, :, :, :] y = self._enas_cell(y, cell_id, y_id, y_op, out_filters) y_used = tf.one_hot(y_id, depth=self.num_cells + 2, dtype=tf.int32) out = x + y used.extend([x_used, y_used]) layers.append(out) used = tf.add_n(used) indices = tf.where(tf.equal(used, 0)) indices = tf.to_int32(indices) indices = tf.reshape(indices, [-1]) num_outs = tf.size(indices) out = tf.stack(layers, axis=0) out = tf.gather(out, indices, axis=0) inp = prev_layers[0] if self.data_format == "NHWC": N = tf.shape(inp)[0] H = tf.shape(inp)[1] W = tf.shape(inp)[2] C = tf.shape(inp)[3] out = tf.transpose(out, [1, 2, 3, 0, 4]) out = tf.reshape(out, [N, H, W, num_outs * out_filters]) elif self.data_format == "NCHW": N = tf.shape(inp)[0] C = tf.shape(inp)[1] H = tf.shape(inp)[2] W = tf.shape(inp)[3] out = tf.transpose(out, [1, 0, 2, 3, 4]) out = tf.reshape(out, [N, num_outs * out_filters, H, W]) else: raise ValueError("Unknown data_format '{0}'".format(self.data_format)) with tf.variable_scope("final_conv"): w = create_weight("w", [self.num_cells + 2, out_filters * out_filters]) w = tf.gather(w, indices, axis=0) w = tf.reshape(w, [1, 1, num_outs * out_filters, out_filters]) out = tf.nn.relu(out) out = tf.nn.conv2d(out, w, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) out = batch_norm(out, is_training=True, data_format=self.data_format) out = tf.reshape(out, tf.shape(prev_layers[0])) return out
def _parse_predict_data(self, data): """Parse data for ShapeMask training.""" classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] masks = data['groundtruth_instance_masks'] # Gets original image and its size. image = data['image'] image_shape = tf.shape(image)[0:2] # If not using category, makes all categories with id = 0. if not self._use_category: classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32) # Normalizes image with mean and std pixel values. image = input_utils.normalize_image(image) # Converts boxes from normalized coordinates to pixel coordinates. boxes = box_utils.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = input_utils.resize_and_crop_image( image, self._output_size, self._output_size, aug_scale_min=1.0, aug_scale_max=1.0) image_scale = image_info[2, :] offset = image_info[3, :] # Resizes and crops boxes and masks. boxes = input_utils.resize_and_crop_boxes( boxes, image_scale, self._output_size, offset) masks = input_utils.resize_and_crop_masks( tf.expand_dims(masks, axis=-1), image_scale, self._output_size, offset) # Filters out ground truth boxes that are all zeros. indices = box_utils.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) # Assigns anchors. input_anchor = anchor.Anchor( self._min_level, self._max_level, self._num_scales, self._aspect_ratios, self._anchor_size, self._output_size) anchor_labeler = anchor.AnchorLabeler( input_anchor, self._match_threshold, self._unmatched_threshold) # If bfloat16 is used, casts input image to tf.bfloat16. if self._use_bfloat16: image = tf.cast(image, dtype=tf.bfloat16) labels = { 'anchor_boxes': input_anchor.multilevel_boxes, 'image_info': image_info, } if self._mode == ModeKeys.PREDICT_WITH_GT: # Converts boxes from normalized coordinates to pixel coordinates. groundtruths = { 'source_id': data['source_id'], 'num_detections': tf.shape(data['groundtruth_classes']), 'boxes': box_utils.denormalize_boxes( data['groundtruth_boxes'], image_shape), 'classes': data['groundtruth_classes'], # 'masks': tf.squeeze(masks, axis=-1), 'areas': data['groundtruth_area'], 'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32), } groundtruths['source_id'] = dataloader_utils.process_source_id( groundtruths['source_id']) groundtruths = dataloader_utils.pad_groundtruths_to_fixed_size( groundtruths, self._max_num_instances) # Computes training labels. (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32)) # Packs labels for model_fn outputs. labels.update({ 'cls_targets': cls_targets, 'box_targets': box_targets, 'num_positives': num_positives, 'groundtruths': groundtruths, }) return { 'images': image, 'labels': labels, }
def _encoder_preprocessor( self, position_sequence, n_node, global_context, particle_types): # Extract important features from the position_sequence. most_recent_position = position_sequence[:, -1] velocity_sequence = time_diff(position_sequence) # Finite-difference. # Get connectivity of the graph. (senders, receivers, n_edge ) = connectivity_utils.compute_connectivity_for_batch_pyfunc( most_recent_position, n_node, self._connectivity_radius) # Collect node features. node_features = [] # Normalized velocity sequence, merging spatial an time axis. velocity_stats = self._normalization_stats["velocity"] normalized_velocity_sequence = ( velocity_sequence - velocity_stats.mean) / velocity_stats.std flat_velocity_sequence = snt.MergeDims(start=1, size=2)( normalized_velocity_sequence) node_features.append(flat_velocity_sequence) # Normalized clipped distances to lower and upper boundaries. # boundaries are an array of shape [num_dimensions, 2], where the second # axis, provides the lower/upper boundaries. boundaries = tf.constant(self._boundaries, dtype=tf.float32) distance_to_lower_boundary = ( most_recent_position - tf.expand_dims(boundaries[:, 0], 0)) distance_to_upper_boundary = ( tf.expand_dims(boundaries[:, 1], 0) - most_recent_position) distance_to_boundaries = tf.concat( [distance_to_lower_boundary, distance_to_upper_boundary], axis=1) normalized_clipped_distance_to_boundaries = tf.clip_by_value( distance_to_boundaries / self._connectivity_radius, -1., 1.) node_features.append(normalized_clipped_distance_to_boundaries) # Particle type. if self._num_particle_types > 1: particle_type_embeddings = tf.nn.embedding_lookup( self._particle_type_embedding, particle_types) node_features.append(particle_type_embeddings) # Collect edge features. edge_features = [] # Relative displacement and distances normalized to radius normalized_relative_displacements = ( tf.gather(most_recent_position, senders) - tf.gather(most_recent_position, receivers)) / self._connectivity_radius edge_features.append(normalized_relative_displacements) normalized_relative_distances = tf.norm( normalized_relative_displacements, axis=-1, keepdims=True) edge_features.append(normalized_relative_distances) # Normalize the global context. if global_context is not None: context_stats = self._normalization_stats["context"] # Context in some datasets are all zero, so add an epsilon for numerical # stability. global_context = (global_context - context_stats.mean) / tf.math.maximum( context_stats.std, STD_EPSILON) return gn.graphs.GraphsTuple( nodes=tf.concat(node_features, axis=-1), edges=tf.concat(edge_features, axis=-1), globals=global_context, # self._graph_net will appending this to nodes. n_node=n_node, n_edge=n_edge, senders=senders, receivers=receivers, )
def single_level_feature_crop(features, level_boxes, detection_prior_levels, min_mask_level, mask_crop_size): """Crop the FPN features at the appropriate levels for each detection. Args: features: a float tensor of shape [batch_size, num_levels, max_feature_size, max_feature_size, num_downsample_channels]. level_boxes: a float Tensor of the level boxes to crop from. [batch_size, num_instances, 4]. detection_prior_levels: an int Tensor of instance assigned level of shape [batch_size, num_instances]. min_mask_level: minimum FPN level to crop mask feature from. mask_crop_size: an int of mask crop size. Returns: crop_features: a float Tensor of shape [batch_size * num_instances, mask_crop_size, mask_crop_size, num_downsample_channels]. This is the instance feature crop. """ (batch_size, num_levels, max_feature_size, _, num_downsample_channels) = features.get_shape().as_list() _, num_of_instances, _ = level_boxes.get_shape().as_list() level_boxes = tf.cast(level_boxes, tf.int32) assert num_of_instances == detection_prior_levels.get_shape().as_list()[1] x_start_indices = level_boxes[:, :, 1] y_start_indices = level_boxes[:, :, 0] # generate the full indices (not just the starting index) x_idx_list = [] y_idx_list = [] for i in range(mask_crop_size): x_idx_list.append(x_start_indices + i) y_idx_list.append(y_start_indices + i) x_indices = tf.stack(x_idx_list, axis=2) y_indices = tf.stack(y_idx_list, axis=2) levels = detection_prior_levels - min_mask_level height_dim_size = max_feature_size level_dim_size = max_feature_size * height_dim_size batch_dim_size = num_levels * level_dim_size # TODO(weicheng) change this to gather_nd for better readability. indices = tf.reshape( tf.tile( tf.reshape( tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]), [1, num_of_instances, mask_crop_size, mask_crop_size]) + tf.tile( tf.reshape(levels * level_dim_size, [batch_size, num_of_instances, 1, 1]), [1, 1, mask_crop_size, mask_crop_size]) + tf.tile( tf.reshape(y_indices * height_dim_size, [batch_size, num_of_instances, mask_crop_size, 1]), [1, 1, 1, mask_crop_size]) + tf.tile( tf.reshape(x_indices, [batch_size, num_of_instances, 1, mask_crop_size]), [1, 1, mask_crop_size, 1]), [-1]) features_r2 = tf.reshape(features, [-1, num_downsample_channels]) crop_features = tf.reshape( tf.gather(features_r2, indices), [batch_size * num_of_instances, mask_crop_size, mask_crop_size, num_downsample_channels]) return crop_features