def random_fix_crop_with_multi_scale(ts, scale_ratios, new_size): # Fixed corner cropping and "Multi-scale" cropping augmentation # (In Xiong's Caffe) assert len(ts.get_shape()) == 3, 'only use for image' raw_shape = ts.get_shape().as_list() crop_pos_lst = [ 'left_top', 'left_bottom', 'mid', 'right_top', 'right_bottom' ] pp('random_fix_crop_with_multi_scale: scale_ratios %s, new_size %s' % (str(scale_ratios), str(new_size))) with tf.name_scope('random_fix_crop_with_multi_scale'): # random select from scale_ratios scale_idx = tf.random_uniform([], maxval=len(scale_ratios), dtype=tf.int32) scale_fns = [(tf.equal(scale_idx, i), lambda scale=scale: tf.constant([scale] * 2)) for i, scale in enumerate(scale_ratios)] crop_size_ts = tf.case(scale_fns, default=scale_fns[0][1]) # randomly crop one position crop_idx = tf.random_uniform([], maxval=len(crop_pos_lst), dtype=tf.int32) crop_fns = [ (tf.equal(crop_idx, i), lambda pos=pos: crop_and_resize(ts, crop_size_ts, pos, new_size)) for i, pos in enumerate(crop_pos_lst) ] ts = tf.case(crop_fns, default=crop_fns[0][1]) return tf.reshape(ts, new_size + raw_shape[-1:])
def loss(self, logits, labels, scope=None): """Add L2Loss to all the trainable variables. Add summary for "Loss" and "Loss/avg". Args: logits: Logits from inference(). labels: Labels from distorted_inputs or inputs(). 1-D tensor of shape [batch_size] Returns: Loss tensor of type float. """ # Calculate the average cross entropy loss across the batch. labels = tf.cast(labels, tf.int64) # following function will softmax internally cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=labels, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # The total loss is defined as the cross entropy loss, # plus all of the weight # decay terms (L2 loss). total_losses = tf.losses.get_losses( scope=scope) + tf.losses.get_regularization_losses() pp('total losses lst: <%s>...(%d)' % (total_losses[0].name, len(total_losses))) return tf.add_n(total_losses, name='total_loss')
def build_graph(self): # Build a Graph that computes the logits predictions. inputs = self.reader.read() # split into num_gpus groups inputs_lst = tf.split(inputs['X'], self.num_gpus, 0) labels_lst = tf.split(inputs['Y'], self.num_gpus, 0) # get optimizer opt = self.model.get_opt() # Calculate the gradients for each model tower. tower_losses = [] tower_grads = [] with tf.variable_scope(tf.get_variable_scope()) as scope: for i, gpu_idx in enumerate(self.gpus): tower_name = 'tower_%d' % i with tf.device('/gpu:%d' % gpu_idx), tf.name_scope(tower_name) as scope: pp(scope=tower_name) # inference model. logits = self.model.infer(inputs_lst[i]) # Calculate loss (cross_entropy and weights). loss = self.model.loss(logits, labels_lst[i], scope) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() if i == 0: # add loss summary (one gpu of one of iter_size) tf.summary.scalar(loss.op.name, loss) # Retain the summaries from the final tower. self.summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data grads = opt.compute_gradients(loss) # Keep track of the loss and grads across all towers. tower_losses.append(loss) tower_grads.append(grads) # Calculate mean of losses and grads across all towers. loss = tf.reduce_mean(tower_losses) grad_var_lst = average_gradients(tower_grads) # iter_size batch grads = [grad_var[0] for grad_var in grad_var_lst] loss_grads = tf.train.batch([loss] + grads, batch_size=self.iter_size) self.loss = tf.reduce_mean(loss_grads[0], axis=0) ave_grads = [(tf.reduce_mean(grad, axis=0), grad_var_lst[i][1]) for i, grad in enumerate(loss_grads[1:])] # update moving_average (e.g. moving_mean of batch_norm) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Apply gradients. apply_gradient_op = opt.apply_gradients(ave_grads, global_step=self.global_step) # Add a summary to track the learning rate. self.summaries.append(tf.summary.scalar('learning_rate', self.model.learning_rate)) with tf.control_dependencies(update_ops + [apply_gradient_op]): self.train_op = tf.no_op(name='train')
def pool2d(ts, kernel_size, strides, name): ts = tf.layers.max_pooling2d(ts, kernel_size, strides=strides, name=name) pp( '> layer %s: kernel_size: %d, strides %d\n' % (name, kernel_size, strides), ts) return ts
def launch_graph(self): # calculate iterations (all examples in test split) self.num_examples = self.reader.num_examples() # self.num_examples = 100000 print('num of examples: %d' % self.num_examples) if self.num_examples % self.input_batch_size != 0: print("Warning: num_examples can't be divided by" "input_batch_size with no remainder") num_iter = int(math.ceil(self.num_examples / self.input_batch_size)) print('actually covered num_examples: %d' % (num_iter * self.input_batch_size)) print('Total steps: %d' % num_iter) # tensor to get # logits = self.out_lst['logits'] pp('logits -> softmax -> sum') logits = self.out_lst['scaled_logits'] video_name = self.out_lst['name'] video_label = self.out_lst['label'] re_dict = {} video2class = {} re_count = {} bar = progressbar.ProgressBar() for step in bar(range(num_iter)): if self.coord.should_stop(): break logits_npy, v_name, v_label = self.sess.run( [logits, video_name, video_label]) for i in range(self.input_batch_size): video2class[v_name[i]] = v_label[i] if v_name[i] in re_dict: re_dict[v_name[i]] += logits_npy[i] re_count[v_name[i]] += 1 else: re_dict[v_name[i]] = logits_npy[i] re_count[v_name[i]] = 1 # def softmax(x): # """Compute softmax values for each sets of scores in x.""" # return np.exp(x) / np.sum(np.exp(x), axis=0) # check results right_count = 0 all_count = 0 for v_n, v_l in re_dict.iteritems(): all_count += 1 if re_count[v_n] != self.reader.num_per_video * 10: pp('waring re_count[%s] == %d' % (v_n, re_count[v_n])) scaled_logits = v_l / re_count[v_n] if np.argmax(scaled_logits) == video2class[v_n]: right_count += 1 print('test accuracy: %.4f\nall counts: %d\nall_examples: %d' % (right_count / all_count, all_count, self.num_examples))
def end(self): self.reader.close() if self.run_mode == 'profile': # Create the Timeline object, and write it to a json tl = timeline.Timeline(self.run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() pp('profile_log write to %s...' % self.profile_log) with open(self.profile_log, 'w') as f: f.write(ctf)
def infer(self, inputs): input_shape = inputs.get_shape().as_list() self.input_batch_size = input_shape[0] pp('> res101 inputs', inputs) ts = resnet(inputs, ly_lst=[3, 4, 23, 3], num_class=self.num_class, name='resnet_v1_101') pp('< res101 outputs', ts) return ts
def dense_drop(ts, units, dropout_prob, name): ts = tf.layers.dense(ts, units, activation=tf.nn.relu, kernel_regularizer=w_loss, name=name) ts = tf.layers.dropout(ts, rate=dropout_prob, training=(VARS['mode'] == 'train')) pp('> layer %s: units %d, dropout %.1f' % (name, units, dropout_prob), ts) return ts
def get_data(self): self.raw_inputs = { # X: [depth, height, width, channel] 'X': (tf.placeholder(tf.uint8, self.raw_size) if self.mode in [ 'train', 'eval' ] else tf.placeholder(tf.uint8, self.example_size)), # Y: scalar 'Y': tf.placeholder(tf.int32, []), } # name(frame path): scalar if VARS['mode'] == 'test': self.raw_inputs['name'] = tf.placeholder(tf.string, []) pp(scope='preprocess') raw_X = self.raw_inputs['X'] # clip: uint8 -> float32 ts = tf.to_float(raw_X) # subtract mean: [..., height, width, channel] ts = kits.subtract_mean(ts) # # pixels scaled to [0, 1] for convenience of color_jitter # ts = tf.image.convert_image_dtype(ts, dtype=tf.float32) # random or central crop: [height, width] crop_size = self.example_size[-3:-1] if self.mode == 'train': # ts = kits.random_size_and_crop(ts, self.example_size[:2]) # ts = kits.color_jitter(ts) # ts = kits.random_crop(ts, self.example_size[:2]) ts = kits.random_fix_crop_with_multi_scale(ts, [256, 224, 192, 168], crop_size) ts = kits.random_flip_left_right(ts, 0.5) # # [0, 1] -> [-1, 1] # ts = tf.subtract(ts, 0.5) # ts = tf.multiply(ts, 2.0) # pp('normalize [0, 1] -> [-1, 1]') elif self.mode == 'eval': ts = kits.crop(ts, crop_size, 'mid') print('eval: central crop') if VARS['mode'] in ['train', 'eval']: return {'X': ts, 'Y': self.raw_inputs['Y']} else: return { 'X': ts, 'Y': self.raw_inputs['Y'], 'name': self.raw_inputs['name'] }
def random_crop(ts, new_size): """Only crop along [height, width] Args: ts (tensor): [..., height, width, in_channels] new_size (list): [height, width] Returns: tensor: cropped ts with reshaped [height, width] """ shape = ts.get_shape().as_list() ts_shape = tf.concat( [tf.constant(shape[:-3], dtype=tf.int32), new_size, shape[-1:]], axis=0) pp('random crop %s' % str(new_size)) with tf.name_scope('random_crop'): return tf.random_crop(ts, ts_shape)
def color_jitter(ts): """works for color channels - random brightness - random contrast - random saturation - random hue Follow Inception-style: https://github.com/tensorflow/models/blob/master/inception/inception/image_processing.py#L183-L186 """ ts = tf.image.random_brightness(ts, max_delta=32. / 255.) ts = tf.image.random_saturation(ts, lower=0.5, upper=1.5) # ts = tf.image.random_hue(ts, max_delta=0.2) ts = tf.image.random_contrast(ts, lower=0.5, upper=1.5) # The random_* ops do not necessarily clamp. ts = tf.clip_by_value(ts, 0.0, 1.0) pp('color jitter (Inception-style)') return ts
def random_size_and_crop(ts, new_size): """random scale and random crop Random crop with size 8%-100% and aspect ratio 3/4 - 4/3 (Inception-style) """ raw_shape = ts.get_shape().as_list() raw_height, raw_width = raw_shape[-3:-1] raw_area = tf.constant(raw_height * raw_width, dtype=tf.float32) new_area = tf.random_uniform([], 0.08, 1.0) * raw_area aspect_ratio = tf.random_uniform([], 3. / 4, 4. / 3) crop_height = tf.round(tf.sqrt(new_area / aspect_ratio)) crop_width = tf.round(tf.sqrt(new_area * aspect_ratio)) # clip on crop_width, crop_height crop_height = tf.clip_by_value(tf.to_int32(crop_height), 0, raw_height) crop_width = tf.clip_by_value(tf.to_int32(crop_width), 0, raw_width) out_ts = crop_and_resize(ts, [crop_height, crop_width], 'random', new_size) out_ts.set_shape(raw_shape[:-3] + new_size + raw_shape[-1:]) pp('random_size_and_crop') return out_ts
def model_init(self): # init model weights from pretrained model if VARS['if_restart'] is True and self.init_weights_path is not None: vars_map = self.get_vars_to_restore() except_vars = set([ '...' + '/'.join(var.name.split('/')[-3:]) for var in tf.global_variables() if var not in vars_map.values() ]) pp('> init except vars\n', except_vars, scope='fine_tune') if self.init_weights_path.endswith('.hdf5'): self.model_init_from_hdf5(vars_map) elif self.init_weights_path.endswith('.npy'): self.model_init_from_npy(vars_map) else: self.model_init_from_ckpt(vars_map) else: pp('no init from pretrained model')
def conv2d_bn_relu(ts, num_kernel, kernel_size, strides, activation=tf.nn.relu, name=None): ts = tf.layers.conv2d(ts, num_kernel, kernel_size, strides=strides, padding='same', use_bias=False, kernel_regularizer=w_loss, name=name) ts = bn(ts, scope=name) pp( '> layer %s: num_kernel %d, kernel_size %d, strides %d, activation: %s with bn\n' % (name, num_kernel, kernel_size, strides, activation.func_name if activation else 'none'), ts) return (activation(ts) if activation is not None else ts)
def model_init_from_ckpt(self, vars_map): """ vars_map: {'conv2d/weights': var(weights)} """ restorer = tf.train.Saver(vars_map) # Restore variables from disk. pp('restore vars from ckpt: %s...' % self.init_weights_path) restorer.restore(self.sess, self.init_weights_path) pp('init > ', [name for name in vars_map]) pp('done')
def model_init_from_hdf5(self, vars_map): """ vars_map: {'conv2d/weights': var(weights)} """ pp('restore vars from hdf5 file: %s...' % self.init_weights_path) with h5py.File(self.init_weights_path, 'r') as f: for var_name, var in vars_map.iteritems(): npy_var = np.array(f[var_name]) # assign hdf5_weights to tf_vars self.sess.run(var.assign(npy_var)) pp('init > %s' % var_name) pp('done')
def model_init_from_npy(self, vars_map): """ vars_map: {'conv2d/weights': var(weights)} """ pp('restore vars from npy file: %s...' % self.init_weights_path) w_npy = np.load(self.init_weights_path).item() for var_name, var in vars_map.iteritems(): n_lst0 = '/'.join(var_name.split('/')[:-1]) n_lst1 = var_name.split('/')[-1] self.sess.run(var.assign(w_npy[n_lst0][n_lst1])) pp('init > %s' % var_name) pp('done')
def infer(self, inputs): # inputs: [b, h, w, s * c] inputs_shape = inputs.get_shape().as_list() pp('inputs shape: ', inputs_shape) self.input_batch_size = inputs_shape[0] frm_step = inputs_shape[-1] // 3 # transform -> [b, s, h, w, c] ts = tf.reshape(inputs, inputs_shape[:3] + [frm_step, 3]) ts = tf.transpose(ts, [0, 3, 1, 2, 4]) pp('transform inputs shape to: ', ts.get_shape().as_list()) # test: transform # tf.summary.image() def w_loss(ts, weight_decay=0.0005): return tf.nn.l2_loss(ts) * weight_decay def reshape_rank2(ts): # shape [-1, last dimension] ts_shape = ts.get_shape().as_list() return tf.reshape(ts, [ts_shape[0], -1]) with tf.variable_scope('c3d'): for ly in self.graph: if ly['name'].startswith('conv'): ts = tf.layers.conv3d( ts, ly['num_kernel'], [3, 3, 3], padding='same', activation=tf.nn.relu, kernel_initializer=tf.truncated_normal_initializer( stddev=0.01), kernel_regularizer=w_loss, name=ly['name']) pp('layer %s: num_kernel %d, stride [3, 3, 3],' ' gaussian 0.01, l2_loss, with relu' % (ly['name'], ly['num_kernel'])) pp(ts) elif ly['name'].startswith('pool'): ts = tf.layers.max_pooling3d(ts, ly['kernel_size'], ly['stride'], padding='same', name=ly['name']) pp('layer %s: kernel_size %s, stride %s' % (ly['name'], str(ly['kernel_size']), str(ly['stride']))) pp(ts) elif ly['name'].startswith('fc'): # reshape to rank 2 ts_shape = ts.get_shape().as_list() if len(ts.get_shape().as_list()) > 2: ts = reshape_rank2(ts) ts = tf.layers.dense( ts, ly['units'], activation=tf.nn.relu, kernel_initializer=tf.truncated_normal_initializer( stddev=0.005), bias_initializer=tf.ones_initializer(), kernel_regularizer=w_loss, name=ly['name']) ts = tf.layers.dropout(ts, rate=self.dropout_prob, training=self.is_training) pp('layer %s: units %d, gaussian 0.005, l2_loss,' ' with relu and droupout %.1f(%r)' % (ly['name'], ly['units'], self.dropout_prob, self.is_training)) pp(ts) else: raise ValueError('no such layer %s' % ly['name']) # linear fc ts = tf.layers.dense( ts, 101, activation=None, kernel_initializer=tf.truncated_normal_initializer( stddev=0.01), kernel_regularizer=w_loss, name='fc8') pp('layer %s: units 101, gaussian 0.01 without relu and droupout' % ('fc8')) pp(ts) return ts
def infer(self, inputs): input_shape = inputs.get_shape().as_list() self.input_batch_size = input_shape[0] def w_loss(ts, weight_decay=0.0005): return tf.nn.l2_loss(ts) * weight_decay def reshape_rank2(ts): # shape [-1, last dimension] ts_shape = ts.get_shape().as_list() return tf.reshape(ts, [ts_shape[0], -1]) def conv2d(ts, num_kernel, kernel_size=3, padding='same', name=None): return tf.layers.conv2d(ts, num_kernel, kernel_size, activation=tf.nn.relu, padding=padding, kernel_regularizer=w_loss, name=name) def pool2d(ts, name): return tf.layers.max_pooling2d(ts, [2, 2], strides=2, name=name) def dropout(ts): return tf.layers.dropout(ts, rate=self.dropout_prob, training=self.is_training) def dense(ts, units, name): ts = tf.layers.dense(ts, units, activation=tf.nn.relu, kernel_regularizer=w_loss, name=name) ts = tf.layers.dropout(ts, rate=self.dropout_prob, training=self.is_training) return ts ts = inputs with tf.variable_scope('vgg_16'): with tf.variable_scope('conv1'): ts = conv2d(ts, 64, name='conv1_1') ts = conv2d(ts, 64, name='conv1_2') ts = pool2d(ts, name='pool1') with tf.variable_scope('conv2'): ts = conv2d(ts, 128, name='conv2_1') ts = conv2d(ts, 128, name='conv2_2') ts = pool2d(ts, name='pool2') with tf.variable_scope('conv3'): ts = conv2d(ts, 256, name='conv3_1') ts = conv2d(ts, 256, name='conv3_2') ts = conv2d(ts, 256, name='conv3_3') ts = pool2d(ts, name='pool3') with tf.variable_scope('conv4'): ts = conv2d(ts, 512, name='conv4_1') ts = conv2d(ts, 512, name='conv4_2') ts = conv2d(ts, 512, name='conv4_3') ts = pool2d(ts, name='pool4') with tf.variable_scope('conv5'): ts = conv2d(ts, 512, name='conv5_1') ts = conv2d(ts, 512, name='conv5_2') ts = conv2d(ts, 512, name='conv5_3') ts = pool2d(ts, name='pool5') # use conv2d instead of dense ts = conv2d(ts, 4096, kernel_size=7, padding='valid', name='fc6') ts = dropout(ts) ts = conv2d(ts, 4096, kernel_size=1, name='fc7') ts = dropout(ts) # linear fc ts = tf.layers.conv2d(ts, self.num_class, 1, activation=None, kernel_initializer=tf.zeros_initializer(), kernel_regularizer=w_loss, name='fc8') pp('layer %s: units %d, gaussian 0.01 without relu and droupout' % ('fc8', self.num_class)) ts = tf.squeeze(ts, [1, 2], name='fc8/squeezed') pp(' output shape:', ts.get_shape().as_list()) return ts
def get_vars_to_restore(self): pp('use vanilla vars_to_restore') vars_to_restore = {var.name: var for var in tf.trainable_variables()} return vars_to_restore