def generate_encoding_template(batch_size, *args): boxes_batch = [] boxes_list = args[0] n_classes = args[1] variances = args[2] # Create boxes_list first. for boxes in boxes_list: boxes = tf.expand_dims(boxes, 0) boxes = tf.tile(boxes, (batch_size, 1, 1, 1, 1)) # Reshape -> (Batch, Feature_Height * Feature_Width * n_boxes, 4) boxes = tf.reshape(boxes, (batch_size, -1, 4)) boxes_batch.append(boxes) boxes_tensor = tf.concatenate(boxes_batch, 1) classes_tensor = tf.zeros((batch_size, boxes_tensor.shape[1], n_classes)) variances_tensor = tf.zeros_like(boxes_tensor) variances_tensor += variances y_encoding_template = tf.concatenate((classes_tensor, boxes_tensor, boxes_tensor, variances_tensor), 2) return y_encoding_template
def pool(state, action, next_state, reward, pool_size, state_pool=None, action_pool=None, next_state_pool=None, reward_pool=None): if state_pool == None: state_pool = tf.expand_dims(state, axis=0) action_pool = tf.expand_dims(action, axis=0) next_state_pool = tf.expand_dims(next_state, axis=0) reward_pool = tf.expand_dims(reward, axis=0) else: state_pool = tf.concatenate(state_pool, tf.expand_dims(state, axis=0)) action_pool = tf.concatenate(action_pool, tf.expand_dims(action, axis=0)) next_state_pool = tf.concatenate(next_state_pool, tf.expand_dims(next_state, axis=0)) reward_pool = tf.concatenate(reward_pool, tf.expand_dims(reward, axis=0)) if len(state_pool) > pool_size: state_pool = state_pool[1:] action_pool = action_pool[1:] next_state_pool = next_state_pool[1:] reward_pool = reward_pool[1:] return state_pool, action_pool, next_state_pool, reward_pool
def rpn_graph(self, rpn_feature_maps, num_anchors_per_location, weight_decay=0.0005): """ Args: rpn_feature_maps:tensor,(N,H,W,C), used for region proposals """ rpn_probs = [] rpn_bboxes_delta = [] rpn_logits = [] with slim.arg_scope([slim.conv2d], padding='SAME', weights_initializer=slim.l2_regularizer(weight_decay), activation_fn=None): for stage_i in enumerate(rpn_feature_maps): # start from 2 with tf.variable_scope('rpn' + str(stage_i + 2)): shared = slim.conv2d(rpn_feature_maps, 512, kernel_size=3, stride=1, scope='shared') x = slim.conv2d(shared, 2 * num_anchors_per_location[stage_i], kernel_size=1, stride=1, scope='rpn_class_logit') rpn_logit = tf.reshape(x, (-1, 2)) rpn_logits.append(rpn_logit) # BG/FG rpn_prob = slim.softmax(rpn_logit, scope='rpn_class_probs') rpn_probs.append(rpn_prob) # box delta x = slim.conv2d(shared, 4 * num_anchors_per_location, kernel_size=1, stride=1, scope='rpn_box_pred') rpn_bbox_delta = tf.reshape(x, tf.shape(x)[0], -1, 4) rpn_bboxes_delta.append(rpn_bbox_delta) all_rpn_bboxes_delta = tf.concatenate(rpn_bboxes_delta, axis=1, name='rpn_bboxes_delta') all_rpn_probs = tf.concatenate(rpn_probs, axis=1, name='rpn_probs') all_rpn_logits = tf.concatenate(rpn_logits, axis=1, name='rpn_logits') # shape (N,all_num_anchors,4) return all_rpn_bboxes_delta, all_rpn_logits, all_rpn_probs
def f(pre_logits, **c): logits = layers.conv(pre_logits, 1, class_count * 2, bias=True, name='logits/conv') if resize: logits = tf.image.resize_bilinear(logits, c['image_shape']) logits_mean = logits[..., :class_count] # NHWC logits_logvar = logits[..., class_count:] # NHWC logits_var = tf.exp(logits_logvar) logits_std = tf.sqrt(logits_var) # TODO: try log(1+exp(logits[..., class_count:])) for logits_std samples_shape = tf.concatenate( [sample_count, tf.shape(logits_std)]) noise = tf.random.normal(samples_shape) # nNHWC logits_samples = logits_mean + noise * logits_std # nNHWC return logits_samples, { 'logits_mean': logits_mean, 'logits_logvar': logits_logvar, 'logits_var': logits_var, 'logits_std': logits_std, 'logits_samples': logits_samples }
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = tf.reshape(tf.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = tf.shape(feats)[1:3] # height, width grid_y = tf.tile( tf.reshape(tf.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = tf.tile( tf.reshape(tf.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = tf.concatenate([grid_x, grid_y]) grid = tf.cast(grid, tf.dtype(feats)) feats = tf.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (tf.sigmoid(feats[..., :2]) + grid) / tf.cast( grid_shape[::-1], tf.dtype(feats)) box_wh = tf.exp(feats[..., 2:4]) * anchors_tensor / tf.cast( input_shape[::-1], tf.dtype(feats)) box_confidence = tf.sigmoid(feats[..., 4:5]) box_class_probs = tf.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def rotate(self, batch_data): mini_batch = batch_data.shape[0] rotated_batch = tf.reshape(batch_data[:1], (1, int(np.sqrt(self.input_size)), int(np.sqrt(self.input_size)), 1)) rotated_angles = tf.zeros(shape=(1, )) for i in range(int(batch_data.shape[0] / mini_batch)): #random_angles = tf.random.uniform(shape = (mini_batch, ), minval = -np.pi / 2, maxval = np.pi / 2) random_angles = np.random.uniform(low=-np.pi / 2, high=np.pi / 2, size=(mini_batch, 1)) self.rotated_images = tf.contrib.image.transform( self.images, tf.contrib.image.angles_to_projective_transforms( self.angles, tf.cast(tf.shape(self.images)[1], tf.float32), tf.cast(tf.shape(self.images)[2], tf.float32))) new_batch = self.sess.run(self.rotated_images, \ feed_dict = {self.original_images: batch_data[i * mini_batch: (i + 1) * mini_batch], self.angles: random_angles.flatten()}) if mini_batch == batch_data.shape[0]: #return new_batch, np.reshape(random_angles.numpy(), [mini_batch, 1]) return new_batch, random_angles else: rotated_angles = tf.concatentate( [rotated_angles, self.random_angles], concat_dim=0) rotated_batch = tf.concatenate([rotated_batch, new_batch], concat_dim=0) return rotated_batch.numpy(), np.reshape(rotated_angles.numpy(), [rotated_angles.shape[0], 1])
def call(self, x, mask=None): assert(len(x) == 2) img = x[0] rois = x[1] input_shape = tf.shape(img) outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] row_length = w / float(self.pool_size) col_length = h / float(self.pool_size) num_pool_regions = self.pool_size x = tf.cast(x, 'int32') y = tf.cast(y, 'int32') w = tf.cast(w, 'int32') h = tf.cast(h, 'int32') rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size)) outputs.append(rs) final_output = tf.concatenate(outputs, axis=0) final_output = tf.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) final_output = tf.permute_dimensions(final_output, (0, 1, 2, 3, 4)) return final_output
def bias_initializer(shape, *args, **kwargs): return tf.concatenate([ self.bias_initializer((self.units, ), *args, **kwargs), initializers.Ones()((self.units, ), *args, **kwargs), self.bias_initializer((self.units * 2, ), *args, **kwargs), ])
def extract_features_mfcc(input_path, class_name, train_test, class_names, bands, frames): window_size = 512 * (frames - 1) log_spectograms = [] labels = [] class_files = os.listdir(input_path + class_name + "/" + train_test) n_files = len(class_files) for i, aud_filename in enumerate(class_files): audio_path = input_path + class_name + "/" + train_test + "/" + aud_filename print("Preprocessing: " + class_name + "_" + train_test + ": " + str(i) + " of " + str(n_files) + " :" + class_name + "/" + train_test + "/" + aud_filename) audio_clip, sr = librosa.load(audio_path) for (start, end) in windows(audio_clip, window_size): if (len(audio_clip[start:end]) == int(window_size)): audio_signal = audio_clip[start:end] mel_spec = librosa.feature.melspectrogram(audio_signal, n_mels=bands) log_spec = librosa.logamplitude(mel_spec) log_spec = log_spec.T.flatten()[:, np.newaxis].T log_spectograms.append(log_spec) labels.append(encode_class(class_name, class_names)) log_specgrams = np.asarray(log_spectograms).reshape( len(log_spectograms), bands, frames, 1) features = np.concatenate( (log_specgrams, np.zeros(np.shape(log_specgrams))), axis=3) for i in range(len(features)): features[i, :, :, 1] = librosa.feature.delta(features[i, :, :, 0]) return np.array(features), np.array(labels)
def context_module(self, x, channels, name): # see Figure 4 (SSH Context Module) with tf.variable_scope(name): with argscope([tf.layers.conv2d], kernel_size=3, activation=tf.nn.relu, padding='same'): c1 = tf.layers.conv2d(x, channels // 2, name='conv1') # upper path c2 = tf.layers.conv2d(c1, channels // 2, name='conv2') # lower path c3 = tf.layers.conv2d(c1, channels // 2, name='conv3a') c3 = tf.layers.conv2d(c3, channels // 2, name='conv3b') return tf.concatenate([c2, c3], axis=-1)
def __call__(self, x, support): out = [x] for a in support: x1 = self.nconv(x, a) out.append(x1) for k in range(2, self.order + 1): x2 = self.nconv(x1, a) out.append(x2) x1 = x2 h = tf.concatenate(out, axis=3) h = self.mlp(h) h = tf.nn.dropout(h, self.dropout) return h
def main(np_location, data_name): np_list = os.listdir(np_location) first_train = 1 first_valid = 1 for np_file in np_list: np_array = np.load(np_file) np_fname = basename(np_file) if np_fname[6:9] == 'Data': label_name = np_fname[0:4] + '_Labels' + np_fname[10:] np_labels = np.load(label_name) iter_dataset = tf.data.Dataset.from_tensor_slices((np_array, np_labels)) # Check if training or validation set and assign accordingly if np_fname[0:4] == 'Train' if first_train == 1: main_dataset = iter_dataset first_train = 0 else: main_dataset = tf.concatenate(main_dataset, iter_dataset) elif np_fname[0:4] == 'Valid' if first_valid == 1: main_dataset = iter_dataset first_valid = 0 else: main_dataset = tf.concatenate(main_dataset, iter_dataset)
def tf_roll(a, shift, axis=None): if axis is None: n = a.get_shape()[0] reshape = True else: try: n = a.get_shape()[axis] except IndexError: raise ValueError('axis must be >= 0 and < %d' % len(a.get_shape())) reshape = False if n == 0: return a shift %= n indexes = tf.concatenate((tf.range(n - shift, n), tf.range(n - shift))) res = tf.gather_nd(indexes, axis) if reshape: res = res.reshape(a.shape) return res
def encode_ssd(gt_labels, *args): n_classes = args[1] class_id = 0 xmin = 1 ymin = 2 xmax = 3 ymax = 4 batch_size = len(gt_labels) y_encoded = generate_encoding_template(batch_size, args) y_encoded[:, :, background_id] = 1 n_boxes = y_encoded.shape[1] class_vectors = tf.eye(n_classes) for i in range(batch_size): labels = gt_labels[1] classes_one_hot = class_vectors[labels[:, class_id]] labels_one_hot = tf.concatenate([classes_one_hot, labels[:, [xmin, ymin, xmax, ymax]]], -1) similarities = iou(labels[:,[xmin, ymin, xmax, ymax]], y_encoded[i,:,-12:-8]) bipartite_matches = match_bipartite_greedy(weight_matrix=similarities) y_encoded[i, bipartite_matches, :-8] = labels_one_hot similarities[:, bipartite_matches] = 0 max_background_similiarites = tf.amax(similarities, 0) neutral_boxes = tf.nonzero(max_background_similiarites >= neg_iou_limit)[0] y_encoded[i, neutral_boxes, 0] = 0 y_encoded[:,:,-12:-8] -= y_encoded[:,:,-8:-4] y_encoded[:,:,[-12,-10]] /= tf.expand_dims(y_encoded[:,:,-6] - y_encoded[:,:,-8], axis=-1) # (xmin(gt) - xmin(anchor)) / w(anchor), (xmax(gt) - xmax(anchor)) / w(anchor) y_encoded[:,:,[-11,-9]] /= tf.expand_dims(y_encoded[:,:,-5] - y_encoded[:,:,-7], axis=-1) # (ymin(gt) - ymin(anchor)) / h(anchor), (ymax(gt) - ymax(anchor)) / h(anchor) y_encoded[:,:,-12:-8] /= y_encoded[:,:,-4:] # (gt - anchor) / size(anchor) / variance for all four coordinates, where 'size' refers to w and h respectively return y_encoded
def __init__(self, target_height, target_width, target_control_points, **kwargs): ''' target_control_points should have the extent from -1 to 1 ''' super(TPSGRidGen_Layer, self).__init__(**kwargs) assert tf.rank(target_control_points) == 3 assert tf.shape(target_control_points)[1] == 2 N = tf.shape(target_control_points)[0] self.num_points = N target_control_points = tf.cast(target_control_points, tf.float32) U_target_control = U_matrix(target_control_points, target_control_points) ones_vec = tf.ones([N, 1], tf.float32) P = tf.concat([ones_vec, target_control_points]) L_upperRows = tf.concat([U_target_control, P], axis=1) L_lowerRows = tf.concat([tf.transpose(P), tf.zeros([3, 3])], axis=1) L = tf.concat([L_upperRows, L_lowerRows], axis=0) self.L_inverse = tf.linalg.inv(L) # create target coordinate matrix HW = target_height * target_width self.HW = HW y = tf.range(limit=target_height, dtype=tf.int32) x = tf.range(limit=target_width, dtype=tf.int32) X, Y = tf.meshgrid(x, y) # Scale x,y to (-1,1) Y = Y * 2.0 / (target_height - 1) - 1.0 X = X * 2.0 / (target_width - 1) - 1.0 target_coordinate = tf.concat([X, Y], axis=1) U_target_coordinate2Control = U_matrix(target_coordinate, target_control_points) self.P_target_coordinate2Control = tf.concatenate([ U_target_coordinate2Control, tf.ones([HW, 1], dtype=tf.float32), target_coordinate ], axis=1)
def __call__(self, i, condition=None): output_dims = self.config.get("output dims", 3) output_act_fn = get_activation( self.config.get('output_activation', 'none')) x, end_points = self.network(i) x = tcl.flatten(x) if condition is not None: x = tf.concatenate([x, condition], axis=-1) with tf.variable_scope(self.name): if self.reuse: tf.get_variable_scope().reuse_variables() else: assert tf.get_variable_scope().reuse is False self.reuse = True if self.output_distribution == 'gaussian': mean = self.fc('fc_out_mean', x, output_dims, **self.out_fc_args) log_var = self.fc('fc_out_log_var', x, output_dims, **self.out_fc_args) return mean, log_var elif self.output_distribution == 'mean': mean = self.fc('fc_out_mean', x, output_dims, **self.out_fc_args) return mean elif self.output_distribution == 'none': out = self.fc('fc_out_mean', x, output_dims, **self.out_fc_args) return out else: raise Exception("None output distribution named " + self.output_distribution)
def detection_module(self, x, channels, name): # see Figure 3 (SSH Detection Module) yc = self.context_module(x, channels, 'context_%s' % name) with argscope([tf.layers.conv2d], padding='same'): y = tf.layers.conv2d(x, channels, kernel_size=3, activation=tf.nn.relu, name='conv1') y = tf.concatenate([yc, y], axis=-1) logits = tf.layers.conv2d(x, 2, kernel_size=1, activation=tf.identity, name='conv2') reg = tf.layers.conv2d(x, 8, kernel_size=1, activation=tf.identity, name='conv2') return logits, reg
def setup(self): passage = tf.placeholder( tf.int32, [None, passage_max_length], name='passage') # shape (batch_size, passage_max_length) question = tf.placeholder( tf.int32, [None, question_max_length], name='question') # shape (batch_size, question_max_length) desired_output = tf.placeholder( tf.float32, [None, passage_max_length], name='desired_output') # shape (batch_size, passage_max_length) embedding = tf.constant(embedding_matrix, name='embedding', dtype=tf.float32) ####################### # Preprocessing layer # ####################### passage_embedded = tf.nn.embedding_lookup( embedding, passage) # shape (batch_size, passage_max_length, embedding_size) question_embedded = tf.nn.embedding_lookup( embedding, question ) # shape (batch_size, question_max_length, embedding_size) dropout = tf.placeholder(tf.float32) with tf.variable_scope('passage_lstm'): passage_cell = tf.nn.rnn_cell.LSTMCell(hidden_size) passage_cell = tf.nn.rnn_cell.DropoutWrapper( passage_cell, output_keep_prob=dropout) passage_cell = tf.nn.rnn_cell.MultiRNNCell([passage_cell] * 2) H_p, _ = tf.nn.dynamic_rnn( passage_cell, passage_embedded, dtype=tf.float32 ) # shape (batch_size, passage_max_length, hidden_size) with tf.variable_scope('question_lstm'): question_cell = tf.nn.rnn_cell.LSTMCell(hidden_size) question_cell = tf.nn.rnn_cell.DropoutWrapper( question_cell, output_keep_prob=dropout) question_cell = tf.nn.rnn_cell.MultiRNNCell([question_cell] * 2) H_q, _ = tf.nn.dynamic_rnn( question_cell, question_embedded, dtype=tf.float32 ) # shape (batch_size, question_max_length, hidden_size) #################### # Match-LSTM layer # #################### # Weights and bias to compute `G` W_q = self.weight_variable(shape=[hidden_size, hidden_size]) W_p = self.weight_variable(shape=[hidden_size, hidden_size]) W_r = self.weight_variable(shape=[hidden_size, hidden_size]) b_p = self.bias_variable(shape=[hidden_size]) # Weight and bias to compute `a` w = self.weight_variable(shape=[hidden_size]) b_alpha = self.bias_variable(shape=[]) # In the paper, this is `b` # Only calculate `WH_q` once WH_q = tf.matmul(W_q, H_q) # Results for forward and backward LSTMs H_r_forward = [] H_r_backward = [] with tf.variable_scope('forward_match_lstm'): forward_cell = tf.nn.rnn_cell.DropoutWrapper( tf.nn.rnn_cell.LSTMCell(hidden_size), output_keep_prob=dropout) forward_state = forward_cell.zero_state(batch_size, dtype=tf.float32) h = forward_state.h for i in range(len(H_p)): G_forward = tf.tanh(WH_q + tf.tile( (tf.matmul(W_p, H_p[i]) + tf.matmul(W_r, h) + b_p), [question_max_length, 1])) alpha_forward = tf.nn.softmax( w * G_forward + tf.tile(b_alpha, [question_max_length, 1])) z_forward = tf.concatenate(H_p[i], H_q * alpha_forward[i]) h, forward_state = forward_cell(z_forward, forward_state) H_r_forward.append(h) with tf.variable_scope('backward_match_lstm'): backward_cell = tf.nn.rnn_cell.DropoutWrapper( tf.nn.rnn_cell.LSTMCell(hidden_size), output_keep_prob=dropout) backward_state = backward_cell.zero_state(batch_size, dtype=tf.float32) h = backward_state.h for i in reversed(range(len(H_p))): G_backward = tf.tanh(WH_q + tf.tile( (tf.matmul(W_p, H_p[i]) + tf.matmul(W_r, h) + b_p), [question_max_length, 1])) alpha_backward = tf.nn.softmax( w * G_backward + tf.tile(b_alpha, [question_max_length, 1])) z_backward = tf.concatenate(H_p[i], H_q * alpha_backward[i]) h, backward_state = backward_cell(z_backward, backward_state) H_r_backward.append(h) # After finding forward and backward `H_r[i]` for all `i`, concatenate `H_r_forward` and `H_r_backward` H_r = tf.concatenate(H_r_forward, H_r_backward) # TODO: Assert that the shape of `H_r` is (2 * hidden_size, passage_max_length) ######################## # Answer-Pointer layer # ######################## # TODO: Switch this over to boundary model or add zero vector padding at end of H_r # ^ Might not be necessary ?? # Weights and bias to compute `F` V = self.weight_variable(shape=[hidden_size, 2 * hidden_size]) W_a = self.weight_variable(shape=[hidden_size, hidden_size]) b_a = self.bias_variable(shape=[hidden_size ]) # In the paper, this is `c` # Weight and bias to compute `beta` v = self.weight_variable(shape=[hidden_size]) b_beta = self.bias_variable(shape=[]) # Only calculate `VH` once VH = tf.matmul(V, H_r) # shape (hidden_size, passage_max_length) H_a = [] with tf.variable_scope('answer_pointer_lstm'): pointer_cell = tf.nn.rnn_cell.DropoutWrapper( tf.nn.rnn_cell.LSTMCell(hidden_size), output_keep_prob=dropout) pointer_state = pointer_cell.zero_state(batch_size, dtype=tf.float32) h = pointer_state.h for k in range(len(H_p)): F = tf.tanh(VH + tf.tile((tf.matmul(W_a, H_a[k]) + b_a), [passage_max_length, 1])) beta = tf.nn.softmax(v * F + tf.tile(b_beta, [passage_max_length, 1])) h, pointer_state = pointer_cell(tf.matmul(H_r, beta), pointer_state) H_a.append(h) # TODO: Replace the loss function below with the loss function from the paper loss = tf.reduce_mean( tf.reduce_sum(tf.pow(desired_output - output, 2), reduction_indices=[1])) train_step = tf.train.AdamOptimizer(0.001).minimize(loss) self.passage = passage self.question = question self.output = output self.desired_output = desired_output self.train_step = train_step self.loss = loss self.dropout = dropout
def train(with_gan=False, load_x=True, with_y=True, match_mask=False, in_frames=4): """Train ring_net for a number of steps.""" with tf.Graph().as_default(): x_all = tf.placeholder(tf.float32, [None, FLAGS.seq_length, 512, 1]) if match_mask: with_gan = False # possible dropout inside keep_prob = tf.placeholder("float") #x_dropout = tf.nn.dropout(x, keep_prob) x_in = x_all[:, :in_frames, :, :] # conv network hidden = None x_unwrap = [] for i in range(FLAGS.seq_length - 1): if i < FLAGS.seq_start: x_1, hidden = network_template(x_all[:, i:i + in_frames, :, :], hidden) x_unwrap.append(x_all[:, i + 1, :, :]) else: #conditional generation x_1, hidden = network_template( tf.concatenate(x_unwrap[-in_frames:], append(x_1), axis=1), hidden) x_unwrap.append(x_1) # pack them all together x_unwrap = tf.stack(x_unwrap) x_unwrap = tf.transpose(x_unwrap, [1, 0, 2, 3]) # this part will be used for generating video x_unwrap_g = [] hidden_g = None for i in range(30): if i < FLAGS.seq_start: x_1_g, hidden_g = network_template( x_all[:, i:i + in_frames, :, :], hidden_g) x_unwrap_g.append(x_dropout[:, i + 1, :, :]) else: #conditional generation x_1_g, hidden_g = network_template( tf.concatenate(x_unwrap_g[-in_frames:], append(x_1_g), axis=1), hidden_g) x_unwrap_g.append(x_1_g) # pack them generated ones x_unwrap_g = tf.stack(x_unwrap_g) x_unwrap_g = tf.transpose(x_unwrap_g, [1, 0, 2, 3]) img = x[:, FLAGS.seq_start + 1:, :, :] img_ = x_unwrap[:, FLAGS.seq_start:, :, :] # calc total loss (compare x_t to x_t+1) loss_l2 = tf.nn.l2_loss(img - img_) #loss_l2 = rms_loss(img - img_) * 50 tf.summary.scalar('loss_l2', loss_l2) if with_gan: img = x_all[:, FLAGS.seq_start:, :, :] img_ = y_1 #import IPython; IPython.embed() D, D_logits, D3 = discriminator(img, reuse=False) #import IPython; IPython.embed() D_, D_logits_, D3_ = discriminator( y_1, reuse=True, fc_shape=D3.get_shape().as_list()) d_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=D_logits, labels=tf.ones_like(D))) d_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=D_logits_, labels=tf.zeros_like(D_))) d_loss = d_loss_real + d_loss_fake g_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=D_logits_, labels=tf.ones_like(D_))) D3_loss = tf.nn.l2_loss(D3 - D3_) t_vars = tf.trainable_variables() d_vars = [var for var in t_vars if 'd_' in var.name] g_vars = [var for var in t_vars if 'd_' not in var.name] tf.summary.scalar('loss_g', g_loss) tf.summary.scalar('loss_d', d_loss) tf.summary.scalar('loss_feature', D3_loss) loss = 0.05 * (past_loss_l2 + future_loss_l2) + g_loss + D3_loss * 1.e-4 tf.summary.scalar('past_loss_l2', past_loss_l2) tf.summary.scalar('future_loss_l2', future_loss_l2) d_optim = tf.train.AdamOptimizer(FLAGS.lr).minimize( d_loss, var_list=d_vars) g_optim = tf.train.AdamOptimizer(FLAGS.lr).minimize( loss, var_list=g_vars) #import IPython; IPython.embed() train_op = tf.group(d_optim, d_optim, g_optim) else: loss = past_loss_l2 + future_loss_l2 tf.summary.scalar('loss', loss) # training optimizer = tf.train.AdamOptimizer(FLAGS.lr) gvs = optimizer.compute_gradients(loss) # gradient clipping capped_gvs = [(tf.clip_by_value(grad, -3., 3.), var) for grad, var in gvs] train_op = optimizer.apply_gradients(capped_gvs) # List of all Variables variables = tf.global_variables() # Build a saver saver = tf.train.Saver(tf.global_variables()) # Summary op summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. sess = tf.Session() # init if this is the very time training sess.run(init) if FLAGS.resume: latest = tf.train.latest_checkpoint(FLAGS.train_dir) if not latest: print("No checkpoint to continue from in", FLAGS.train_dir) sys.exit(1) print("resume", latest) saver.restore(sess, latest) else: print("init network from scratch") # Summary op graph_def = sess.graph.as_graph_def(add_shapes=True) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph_def=graph_def) if not with_y: files = find_files(FLAGS.train_data_index) else: files = find_pairs(FLAGS.train_data_index) sample_dir = FLAGS.train_dir + '/samples/' if not os.path.exists(sample_dir): os.makedirs(sample_dir) for step in range(FLAGS.max_step): #<<<<<<< HEAD #dat = generate_bouncing_ball_sample(FLAGS.batch_size, FLAGS.seq_length, 32, FLAGS.num_balls) if load_x: dat = load_batch(FLAGS.batch_size, files, step) else: tgen = tf.range(start=0., limit=FLAGS.seq_length, dtype=tf.float32)[tf.newaxis, tf.newaxis, ..., tf.newaxis] fgen = tf.range(start=0., limit=512., dtype=tf.float32)[tf.newaxis, tf.newaxis, tf.newaxis, ...] dat = sess.run(generate_x_batch(FLAGS.batch_size, tgen, fgen)) fdict = {x: dat, keep_prob: FLAGS.keep_prob} #import IPython; IPython.embed() #======= dat = load_batch(FLAGS.batch_size, files, step, with_y=with_y, normalize=FLAGS.norm_input) dat = random_flip(dat) #>>>>>>> gan-l t = time.time() errG, errD = sess.run([g_loss, d_loss], feed_dict={ x_all: dat, keep_prob: FLAGS.keep_prob }) if errG > 0.6 and errD > 0.6: _, loss_r = sess.run([train_op, loss], feed_dict={ x_all: dat, keep_prob: FLAGS.keep_prob }) else: i = 0 while errG > 0.6: _ = sess.run(g_optim, feed_dict={ x_all: dat, keep_prob: FLAGS.keep_prob }) i += 1 if i > 2: break else: errG = sess.run(g_loss, feed_dict={ x_all: dat, keep_prob: FLAGS.keep_prob }) print('G', i, errG) i = 0 while errD > 0.6: # only update discriminator if loss are within given bounds _ = sess.run(d_optim, feed_dict={ x_all: dat, keep_prob: FLAGS.keep_prob }) i += 1 if i > 2: break else: errD = sess.run(d_loss, feed_dict={ x_all: dat, keep_prob: FLAGS.keep_prob }) print('D', i, errD) loss_r = sess.run(loss, feed_dict={ x_all: dat, keep_prob: FLAGS.keep_prob }) #_, loss_r = sess.run([train_op, loss],feed_dict={x:dat, keep_prob:FLAGS.keep_prob}) elapsed = time.time() - t if step % 1000 == 0 and step != 0: summary_str = sess.run(summary_op, feed_dict={ x_all: dat, keep_prob: FLAGS.keep_prob }) summary_writer.add_summary(summary_str, step) print("time per batch is " + str(elapsed)) print(step) print(loss_r) assert not np.isnan(loss_r), 'Model diverged with loss = NaN' if step % 4000 == 0: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) print("saved to " + FLAGS.train_dir) print("now saving sample!") im_x, im_y = sess.run([x_1, y_1], feed_dict={ x_all: dat, keep_prob: FLAGS.keep_prob }) if match_mask: im_x = im_x[..., 1] im_y = im_y[..., 1] _plot_samples(dat[:, :FLAGS.seq_start, :, :].squeeze(), sample_dir + 'step_{}_past_t.png'.format(step)) _plot_samples(im_x.squeeze(), sample_dir + 'step_{}_past.png'.format(step)) _plot_samples(dat[:, FLAGS.seq_start:, :, :].squeeze(), sample_dir + 'step_{}_future_t.png'.format(step)) _plot_samples(im_y.squeeze(), sample_dir + 'step_{}_future.png'.format(step))
tf.linalg.matmul(t4, tf.transpose(t5), transpose_A=True) tf.linalg.matmul(t4, tf.transpose(t5), transpose_a=True) tf.math.reduce_mean(t5) tf.math.reduce_std(t5) tf.math.reduce_sum(t5) tf.linalg.norm(t5, 2) # How to split, stack, and concatenate ? (difference stack / concatenate ?) tf.split(t3, num_or_size_splits=3, axis=0) tf.split(t5, num_or_size_splits=[1, 2], axis=1) tf.stack([t4, tf.transpose(t5)], axis=1) tf.concatenate([t3, t4], axis=0) # TF DATASETS API # When can Keras api .fit() be used or not # how to construct a tf Dataset from existing tensors, list or array ? # What are some preprocessing Dataset: how to get different rows of dataset ? how to create batches ? # How to create dataset from t_x(features) and t_y(labels) # How to apply on each element of dataset a transformation ? (for instance function(x,y) -> (x_normalized, y) # When to shuffle ? batch ? how to go through different epochs ? --> how to iterate 3 times on a dataset that is shuffled in batch of 2 ? # How to create a dataset from file on my locale storage disk ? # Take the images folder to create a data set and display images # show built in datasets. Import mnist et montrer caracteristiques # creer train, test datasets et visualiser 10 images
middle_center = x # middle center middle_right = x[:, :, :p] # middle right bottom_left = x[:, :p, -p:] # bottom left bottom_center = x[:, :p, :] # bottom center bottom_right = x[:, :p, :p] # bottom right top = tf.concat([top_left, top_center, top_right], axis=2) middle = tf.concat([middle_left, middle_center, middle_right], axis=2) bottom = tf.concat([bottom_left, bottom_center, bottom_right], axis=2) padded_x = tf.concat([top, middle, bottom], axis=1) return padded_x import tensorflow as tf import numpy as np y = tf.placeholder(name='y', dtype=tf.float32, shape=[None,3,3]) z_rand = tf.placeholder(name='z_r', dtype=tf.float32, shape = [None, 3,3]) z = tf.concatenate([z_rand,y], axis=1) sess = tf.InteractiveSession() b = np.random.normal(size = [5,3,3]) # # # print('a:') # print(b) # # print('padded a:') c = np.array([[1],[2],[1],[2],[3]]) d = np.repeat(c,9).reshape(5,3,3) print( sess.run(z, feed_dict = {z_rand:b, y:d}) ) print(z) # sess.close() # import tensorflow as tf # import numpy as np # import matplotlib.pyplot as plt # import matplotlib.gridspec as gridspec
def __call__(self, x, condition=None): if condition is not None: x = tf.concatenate([x, condition], axis=-1) x, end_points = self.network(x) return x
def attention(self, pre_q, pre_v, pre_k, out_seq_len: int, d_model: int, training=None): """ Calculates the output of the attention once the affine transformations of the inputs are done. Here's the shapes of the arguments: :param pre_q: (batch_size, q_seq_len, num_heads, d_model // num_heads) :param pre_v: (batch_size, v_seq_len, num_heads, d_model // num_heads) :param pre_k: (batch_size, k_seq_len, num_heads, d_model // num_heads) :param out_seq_len: the length of the output sequence :param d_model: dimensionality of the model (by the paper) :param training: Passed by tferas. Should not be defined manually. Optional scalar tensor indicating if we're in training or inference phase. """ # shaping Q and V into (batch_size, num_heads, seq_len, d_model//heads) q = tf.transpose(pre_q, [0, 2, 1, 3]) v = tf.transpose(pre_v, [0, 2, 1, 3]) if self.compression_window_size is None: k_transposed = tf.transpose(pre_k, [0, 2, 3, 1]) else: # Memory-compressed attention described in paper # "Generating Wikipedia by Summarizing Long Sequences" # (https://arxiv.org/pdf/1801.10198.pdf) # It compresses keys and values using 1D-convolution which reduces # the size of Q * tf_transposed from roughly seq_len^2 # to convoluted_seq_len^2. If we use strided convolution with # window size = 3 and stride = 3, memory requirements of such # memory-compressed attention will be 9 times smaller than # that of the original version. if self.use_masking: raise NotImplementedError( "Masked memory-compressed attention has not " "been implemented yet") k = tf.transpose(pre_k, [0, 2, 1, 3]) k, v = [ tf.reshape( # Step 3: Return the result to its original dimensions # (batch_size, num_heads, seq_len, d_model//heads) tf.bias_add( # Step 3: ... and add bias tf.conv1d( # Step 2: we "compress" tf and V using strided conv tf.reshape( # Step 1: we reshape tf and V to # (batch + num_heads, seq_len, d_model//heads) item, (-1, tf.shape(item)[-2], d_model // self.num_heads)), kernel, strides=self.compression_window_size, padding='valid', data_format='channels_last'), bias, data_format='channels_last'), # new shape tf.concatenate( [tf.shape(item)[:2], [-1, d_model // self.num_heads]])) for item, kernel, bias in ((k, self.k_conv_kernel, self.k_conv_bias), (v, self.v_conv_kernel, self.v_conv_bias)) ] k_transposed = tf.transpose(k, [0, 1, 3, 2]) # shaping tf into (batch_size, num_heads, d_model//heads, seq_len) # for further matrix multiplication a = tf.cast(d_model // self.num_heads, dtype=tf.float32) sqrt_d = tf.math.sqrt(a) q_shape = tf.shape(q) k_t_shape = tf.shape(k_transposed) v_shape = tf.shape(v) # before performing batch_dot all tensors are being converted to 3D # shape (batch_size * num_heads, rows, cols) to make sure batch_dot # performs identically on all backends new_q_shape = tf.concat([[-1], q_shape[-2:]], axis=0) new_k_shape = tf.concat([[-1], k_t_shape[-2:]], axis=0) new_v_shape = tf.concat([[-1], v_shape[-2:]], axis=0) factor1 = tf.reshape(q, new_q_shape) factor2 = tf.reshape(k_transposed, new_k_shape) factor3 = tf.reshape(v, new_v_shape) batch_dot_raw = K.batch_dot(factor1, factor2) attention_heads = tf.reshape( K.batch_dot( self.apply_dropout_if_needed(tf.nn.softmax( self.mask_attention_if_needed(batch_dot_raw / sqrt_d)), training=training), factor3), (-1, self.num_heads, q_shape[-2], v_shape[-1])) attention_heads_merged = tf.reshape( tf.transpose(attention_heads, [0, 2, 1, 3]), (-1, d_model)) attention_out = tf.reshape( tf.tensordot(attention_heads_merged, self.output_weights, axes=1), (-1, out_seq_len, d_model)) return attention_out
def choose_action(self, state, goal, epsilon=None): return self.controller.choose_action(np.concatenate((state, goal), axis=0), epsilon=epsilon)
def augment(pointcloud_inp, pointcloud_indices_0_inp, heatmapBatches, augmentation, numPoints=50000, numInputChannels=7): pointcloud_indices_inp = tf.zeros((FETCH_BATCH_SIZE, 6, NUM_POINTS), dtype='int32') newHeatmapBatches = [[] for heatmapIndex in xrange(len(heatmapBatches))] for imageIndex in xrange(pointcloud_inp.shape[0]): # pointcloud = pointcloud_inp[imageIndex] # pointcloud_indices_0 = pointcloud_indices_0_inp[imageIndex] # corner = corner_gt[imageIndex] # icon = icon_gt[imageIndex] # room = room_gt[imageIndex] # feature = feature_inp[imageIndex] # if 'w' in augmentation: # pointcloud_indices_0, [corner, icon, room, feature] = augmentWarping(pointcloud_indices_0, [corner, icon, room, feature], gridStride=32., randomScale=4) # pass # if 's' in augmentation: # pointcloud_indices_0, [corner, icon, room, feature] = augmentScaling(pointcloud_indices_0, [corner, icon, room, feature], randomScale=0) # pass # if 'f' in augmentation: # pointcloud_indices_0, [corner, icon, room, feature] = augmentFlipping(pointcloud_indices_0, [corner, icon, room, feature]) # pass # if 'd' in augmentation: # pointcloud, pointcloud_indices_0 = augmentDropping(pointcloud, pointcloud_indices_0, changeIndices=True) # pass # if 'p' in augmentation: # pointcloud, pointcloud_indices_0 = augmentDropping(pointcloud, pointcloud_indices_0, changeIndices=False) # pass # pointcloud_inp[imageIndex] = pointcloud # pointcloud_indices_inp[imageIndex] = getCoarseIndicesMaps(pointcloud_indices_0, WIDTH, HEIGHT, 0) # corner_gt[imageIndex] = corner # icon_gt[imageIndex] = icon # room_gt[imageIndex] = room # feature_inp[imageIndex] = feature newHeatmaps = [heatmapBatch[imageIndex] for heatmapBatch in heatmapBatches] if 'w' in augmentation: pointcloud_indices_0_inp[imageIndex], newHeatmaps = augmentWarping(pointcloud_indices_0_inp[imageIndex], newHeatmaps, gridStride=32, randomScale=4) pass if 's' in augmentation: pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex], newHeatmaps = augmentScaling( pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex], newHeatmaps) pass if 'f' in augmentation: pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex], newHeatmaps = augmentFlipping( pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex], newHeatmaps) pass if 'd' in augmentation: pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex] = augmentDropping( pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex], changeIndices=True) pass if 'p' in augmentation: pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex] = augmentDropping( pointcloud_inp[imageIndex], pointcloud_indices_0_inp[imageIndex], changeIndices=False) pass # print(pointcloud_indices_0_inp[imageIndex].shape, pointcloud_indices_inp[imageIndex].shape) pointcloud_indices_inp[imageIndex] = getCoarseIndicesMaps(pointcloud_indices_0_inp[imageIndex], WIDTH, HEIGHT, 0) for heatmapIndex, newHeatmap in enumerate(newHeatmaps): newHeatmapBatches[heatmapIndex].append(newHeatmap) continue continue newHeatmapBatches = [tf.array(newHeatmapBatch) for newHeatmapBatch in newHeatmapBatches] pointcloud_inp = tf.concatenate([pointcloud_inp, tf.ones((FETCH_BATCH_SIZE, NUM_POINTS, 1))], axis=2) # print(pointcloud_itf.shape) # writePointCloud('test/pointcloud.ply', pointcloud_inp[0, :, :6]) # exit(1) if numPoints < pointcloud_itf.shape[1]: sampledInds = tf.range(pointcloud_itf.shape[1]) tf.random.shuffle(sampledInds) sampledInds = sampledInds[:numPoints] pointcloud_inp = pointcloud_inp[:, sampledInds] pointcloud_indices_inp = pointcloud_indices_inp[:, :, sampledInds] pass if numInputChannels == 4: pointcloud_inp = tf.concatenate([pointcloud_inp[:, :, :3], pointcloud_inp[:, :, 6:]], axis=2) pass return pointcloud_inp, pointcloud_indices_inp, newHeatmapBatches
def dense_layer(x, layer_configs): layers = [] for i in range(2): if layer_configs[i]["layer_type"] == "Conv2D": layer = Conv2D(layer_configs[i]["filters"], layer_configs[i]["kernel_size"], strides = layer_configs[i]["strides"], padding = layer_configs[i]["padding"], activation = layer_configs[i]["activation"])(x) layers.append(layer) for n in range(2, len(layer_configs)): if layer_configs[n]["layer_type"] == "Conv2D": layer = Conv2D(layer_configs[n]["filters"], layer_configs[n]["kernel_size"], strides = layer_configs[n]["strides"], padding = layer_configs[n]["padding"], activation = layer_configs[n]["activation"])(concatenate(layers, axis = 3)) layers.append(layer) return layer
def yolo4_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = tf.cast( tf.shape(yolo_outputs[0])[1:3] * 32, tf.dtype(y_true[0])) grid_shapes = [ tf.cast(tf.shape(yolo_outputs[l])[1:3], tf.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = tf.shape(yolo_outputs[0])[0] # batch size, tensor mf = tf.cast(m, tf.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = tf.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = tf.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = tf.switch( object_mask, raw_true_wh, tf.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(tf.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = tf.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = tf.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, tf.cast(best_iou < ignore_thresh, tf.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = tf.control_flow_ops.while_loop( lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = tf.expand_dims(ignore_mask, -1) # tf.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * tf.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * tf.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * tf.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * tf.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * tf.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = tf.sum(xy_loss) / mf wh_loss = tf.sum(wh_loss) / mf confidence_loss = tf.sum(confidence_loss) / mf class_loss = tf.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, tf.sum(ignore_mask) ], message='loss: ') return loss
def filter_detections(boxes, classification, other=[], class_specific_filter=True, nms=True, score_threshold=0.05, max_detections=300, nms_threshold=0.5): """ Filter detections using the boxes and classification values. Args boxes : Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format. classification : Tensor of shape (num_boxes, num_classes) containing the classification scores. other : List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores. class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those. nms : Flag to enable/disable non maximum suppression. score_threshold : Threshold used to prefilter the boxes with. max_detections : Maximum number of detections to keep. nms_threshold : Threshold for the IoU value to determine when a box should be suppressed. Returns A list of [boxes, scores, labels, other[0], other[1], ...]. boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes. scores is shaped (max_detections,) and contains the scores of the predicted class. labels is shaped (max_detections,) and contains the predicted label. other[i] is shaped (max_detections, ...) and contains the filtered other[i] data. In case there are less than max_detections detections, the tensors are padded with -1's. """ def _filter_detections(scores, labels): # threshold based on score indices = tf.where(tfgreater(scores, score_threshold)) if nms: filtered_boxes = tf.gather_nd(boxes, indices) filtered_scores = tf.gather(scores, indices)[:, 0] # perform NMS nms_indices = tf.non_max_suppression( filtered_boxes, filtered_scores, max_output_size=max_detections, iou_threshold=nms_threshold) # filter indices based on NMS indices = tf.gather(indices, nms_indices) # add indices to list of all indices labels = tf.gather_nd(labels, indices) indices = tf.stack([indices[:, 0], labels], axis=1) return indices if class_specific_filter: all_indices = [] # perform per class filtering for c in range(int(classification.shape[1])): scores = classification[:, c] labels = c * tf.ones((tf.shape(scores)[0], ), dtype='int64') all_indices.append(_filter_detections(scores, labels)) # concatenate indices to single tensor indices = tf.concatenate(all_indices, axis=0) else: scores = tf.max(classification, axis=1) labels = tf.argmax(classification, axis=1) indices = _filter_detections(scores, labels) # select top k scores = tf.gather_nd(classification, indices) labels = indices[:, 1] scores, top_indices = tf.top_k(scores, k=tf.minimum(max_detections, tf.shape(scores)[0])) # filter input using the final set of indices indices = tf.gather(indices[:, 0], top_indices) boxes = tf.gather(boxes, indices) labels = tf.gather(labels, top_indices) other_ = [tf.gather(o, indices) for o in other] # zero pad the outputs pad_size = tf.maximum(0, max_detections - tf.shape(scores)[0]) boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1) scores = tf.pad(scores, [[0, pad_size]], constant_values=-1) labels = tf.pad(labels, [[0, pad_size]], constant_values=-1) labels = tf.cast(labels, 'int32') other_ = [ tf.pad(o, [[0, pad_size]] + [[0, 0] for _ in range(1, len(o.shape))], constant_values=-1) for o in other_ ] # set shapes, since we know what they are boxes.set_shape([max_detections, 4]) scores.set_shape([max_detections]) labels.set_shape([max_detections]) for o, s in zip(other_, [list(tf.int_shape(o)) for o in other]): o.set_shape([max_detections] + s[1:]) return [boxes, scores, labels] + other_
def build_model(self): self.x_real = tf.placeholder( tf.float32, shape=[None, np.product(self.input_shape)], name='x_input') self.y_real = tf.placeholder(tf.float32, shape=[None, self.nb_classes], name='y_input') # self.encoder_input_shape = int(np.product(self.input_shape)) self.config['encoder parmas']['name'] = 'EncoderX' self.config['encoder params']["output dims"] = self.z_dim self.encoder = get_encoder(self.config['x encoder'], self.config['encoder params'], self.is_training) self.config['decoder params']['name'] = 'Decoder' self.config['decoder params']["output dims"] = self.encoder_input_shape # self.y_encoder = get_encoder(self.config['y encoder'], self.config['y encoder params'], self.is_training) self.decoder = get_decoder(self.config['decoder'], self.config['decoder params'], self.is_training) # build encoder self.z_mean, self.z_log_var = self.x_encoder( tf.concatenate([self.x_real, self.y_real])) self.z_mean_y = self.y_encoder(self.y_real) # sample z from z_mean and z_log_var self.z_sample = self.draw_sample(self.z_mean, self.z_log_var) # build decoder self.x_decode = self.decoder(self.z_sample) # build test decoder self.z_test = tf.placeholder(tf.float32, shape=[None, self.z_dim], name='z_test') self.x_test = self.decoder(self.z_test, reuse=True) # loss function self.kl_loss = (get_loss( 'kl', self.config['kl loss'], { 'z_mean': (self.z_mean - self.z_mean_y), 'z_log_var': self.z_log_var }) * self.config.get('kl loss prod', 1.0)) self.xent_loss = ( get_loss('reconstruction', self.config['reconstruction loss'], { 'x': self.x_real, 'y': self.x_decode }) * self.config.get('reconstruction loss prod', 1.0)) self.loss = self.kl_loss + self.xent_loss # optimizer configure self.global_step, self.global_step_update = get_global_step() if 'lr' in self.config: self.learning_rate = get_learning_rate(self.config['lr_scheme'], float(self.config['lr']), self.global_step, self.config['lr_params']) self.optimizer = get_optimizer( self.config['optimizer'], {'learning_rate': self.learning_rate}, self.loss, self.decoder.vars + self.x_encoder.vars + self.y_encoder.vars) else: self.optimizer = get_optimizer( self.config['optimizer'], {}, self.loss, self.decoder.vars + self.x_encoder.vars + self.y_encoder.vars) self.train_update = tf.group([self.optimizer, self.global_step_update]) # model saver self.saver = tf.train.Saver(self.x_encoder.vars + self.y_encoder.vars, self.decoder.vars + [ self.global_step, ])
def __call__(self, i, condition=None): act_fn = get_activation(self.config.get('activation', 'relu')) norm_fn, norm_params = get_normalization( self.config.get('batch_norm', 'batch_norm'), self.config.get('batch_norm_params', self.normalizer_params)) winit_fn = get_weightsinit( self.config.get('weightsinit', 'normal 0.00 0.02')) nb_fc_nodes = self.config.get('nb_fc_nodes', [1024, 1024]) output_dims = self.config.get("output dims", 3) output_act_fn = get_activation( self.config.get('output_activation', 'none')) x, end_points = self.network(i) x = tcl.flatten(x) if condition is not None: x = tf.concatenate([x, condition], axis=-1) with tf.variable_scope(self.name): if self.reuse: tf.get_variable_scope().reuse_variables() else: assert tf.get_variable_scope().reuse is False self.reuse = True for ind, nb_nodes in enumerate(nb_fc_nodes): x = tcl.fully_connected(x, nb_nodes, activation_fn=act_fn, normalizer_fn=norm_fn, normalizer_params=norm_params, weights_initializer=winit_fn, scope='fc%d' % ind) if self.output_distribution == 'gaussian': mean = tcl.fully_connected(x, output_dims, activation_fn=output_act_fn, weights_initializer=winit_fn, scope='fc_out_mean') log_var = tcl.fully_connected(x, output_dims, activation_fn=output_act_fn, weights_initializer=winit_fn, scope='fc_out_log_var') return mean, log_var elif self.output_distribution == 'mean': mean = tcl.fully_connected(x, output_dims, activation_fn=output_act_fn, weights_initializer=winit_fn, scope='fc_out_mean') return mean elif self.output_distribution == 'none': out = tcl.fully_connected(x, output_dims, activation_fn=output_act_fn, weights_initializer=winit_fn, scope='fc_out_mean') return out else: raise Exception("None output distribution named " + self.output_distribution)