示例#1
0
def random_fix_crop_with_multi_scale(ts, scale_ratios, new_size):
    # Fixed corner cropping and "Multi-scale" cropping augmentation
    # (In Xiong's Caffe)
    assert len(ts.get_shape()) == 3, 'only use for image'
    raw_shape = ts.get_shape().as_list()
    crop_pos_lst = [
        'left_top', 'left_bottom', 'mid', 'right_top', 'right_bottom'
    ]

    pp('random_fix_crop_with_multi_scale: scale_ratios %s, new_size %s' %
       (str(scale_ratios), str(new_size)))

    with tf.name_scope('random_fix_crop_with_multi_scale'):
        # random select from scale_ratios
        scale_idx = tf.random_uniform([],
                                      maxval=len(scale_ratios),
                                      dtype=tf.int32)

        scale_fns = [(tf.equal(scale_idx, i),
                      lambda scale=scale: tf.constant([scale] * 2))
                     for i, scale in enumerate(scale_ratios)]
        crop_size_ts = tf.case(scale_fns, default=scale_fns[0][1])
        # randomly crop one position
        crop_idx = tf.random_uniform([],
                                     maxval=len(crop_pos_lst),
                                     dtype=tf.int32)
        crop_fns = [
            (tf.equal(crop_idx, i),
             lambda pos=pos: crop_and_resize(ts, crop_size_ts, pos, new_size))
            for i, pos in enumerate(crop_pos_lst)
        ]
        ts = tf.case(crop_fns, default=crop_fns[0][1])

        return tf.reshape(ts, new_size + raw_shape[-1:])
示例#2
0
    def loss(self, logits, labels, scope=None):
        """Add L2Loss to all the trainable variables.

        Add summary for "Loss" and "Loss/avg".
        Args:
          logits: Logits from inference().
          labels: Labels from distorted_inputs or inputs(). 1-D tensor
                  of shape [batch_size]

        Returns:
          Loss tensor of type float.
        """
        # Calculate the average cross entropy loss across the batch.
        labels = tf.cast(labels, tf.int64)
        # following function will softmax internally
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=labels, name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy,
                                            name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

        # The total loss is defined as the cross entropy loss,
        #   plus all of the weight
        # decay terms (L2 loss).
        total_losses = tf.losses.get_losses(
            scope=scope) + tf.losses.get_regularization_losses()
        pp('total losses lst: <%s>...(%d)' %
           (total_losses[0].name, len(total_losses)))
        return tf.add_n(total_losses, name='total_loss')
示例#3
0
    def build_graph(self):
        # Build a Graph that computes the logits predictions.
        inputs = self.reader.read()
        # split into num_gpus groups
        inputs_lst = tf.split(inputs['X'], self.num_gpus, 0)
        labels_lst = tf.split(inputs['Y'], self.num_gpus, 0)
        # get optimizer
        opt = self.model.get_opt()

        # Calculate the gradients for each model tower.
        tower_losses = []
        tower_grads = []
        with tf.variable_scope(tf.get_variable_scope()) as scope:
            for i, gpu_idx in enumerate(self.gpus):
                tower_name = 'tower_%d' % i
                with tf.device('/gpu:%d' % gpu_idx), tf.name_scope(tower_name) as scope:
                    pp(scope=tower_name)
                    # inference model.
                    logits = self.model.infer(inputs_lst[i])
                    # Calculate loss (cross_entropy and weights).
                    loss = self.model.loss(logits, labels_lst[i], scope)
                    # Reuse variables for the next tower.
                    tf.get_variable_scope().reuse_variables()
                    if i == 0:
                        # add loss summary (one gpu of one of iter_size)
                        tf.summary.scalar(loss.op.name, loss)
                        # Retain the summaries from the final tower.
                        self.summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                           scope)
                    # Calculate the gradients for the batch of data
                    grads = opt.compute_gradients(loss)
                    # Keep track of the loss and grads across all towers.
                    tower_losses.append(loss)
                    tower_grads.append(grads)

        # Calculate mean of losses and grads across all towers.
        loss = tf.reduce_mean(tower_losses)
        grad_var_lst = average_gradients(tower_grads)

        # iter_size batch
        grads = [grad_var[0] for grad_var in grad_var_lst]
        loss_grads = tf.train.batch([loss] + grads,
                                    batch_size=self.iter_size)
        self.loss = tf.reduce_mean(loss_grads[0], axis=0)
        ave_grads = [(tf.reduce_mean(grad, axis=0), grad_var_lst[i][1])
                     for i, grad in enumerate(loss_grads[1:])]

        # update moving_average (e.g. moving_mean of batch_norm)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        # Apply gradients.
        apply_gradient_op = opt.apply_gradients(ave_grads,
                                                global_step=self.global_step)

        # Add a summary to track the learning rate.
        self.summaries.append(tf.summary.scalar('learning_rate',
                                                self.model.learning_rate))

        with tf.control_dependencies(update_ops + [apply_gradient_op]):
            self.train_op = tf.no_op(name='train')
示例#4
0
def pool2d(ts, kernel_size, strides, name):
    ts = tf.layers.max_pooling2d(ts, kernel_size, strides=strides, name=name)
    pp(
        '> layer %s: kernel_size: %d, strides %d\n' %
        (name, kernel_size, strides), ts)

    return ts
示例#5
0
    def launch_graph(self):
        # calculate iterations (all examples in test split)
        self.num_examples = self.reader.num_examples()
        # self.num_examples = 100000
        print('num of examples: %d' % self.num_examples)

        if self.num_examples % self.input_batch_size != 0:
            print("Warning: num_examples can't be divided by"
                  "input_batch_size with no remainder")

        num_iter = int(math.ceil(self.num_examples / self.input_batch_size))
        print('actually covered num_examples: %d' %
              (num_iter * self.input_batch_size))
        print('Total steps: %d' % num_iter)

        # tensor to get
        # logits = self.out_lst['logits']
        pp('logits -> softmax -> sum')
        logits = self.out_lst['scaled_logits']
        video_name = self.out_lst['name']
        video_label = self.out_lst['label']

        re_dict = {}
        video2class = {}
        re_count = {}

        bar = progressbar.ProgressBar()
        for step in bar(range(num_iter)):
            if self.coord.should_stop():
                break
            logits_npy, v_name, v_label = self.sess.run(
                [logits, video_name, video_label])
            for i in range(self.input_batch_size):
                video2class[v_name[i]] = v_label[i]
                if v_name[i] in re_dict:
                    re_dict[v_name[i]] += logits_npy[i]
                    re_count[v_name[i]] += 1
                else:
                    re_dict[v_name[i]] = logits_npy[i]
                    re_count[v_name[i]] = 1

        # def softmax(x):
        #     """Compute softmax values for each sets of scores in x."""
        #     return np.exp(x) / np.sum(np.exp(x), axis=0)

        # check results
        right_count = 0
        all_count = 0
        for v_n, v_l in re_dict.iteritems():
            all_count += 1
            if re_count[v_n] != self.reader.num_per_video * 10:
                pp('waring re_count[%s] == %d' % (v_n, re_count[v_n]))
            scaled_logits = v_l / re_count[v_n]
            if np.argmax(scaled_logits) == video2class[v_n]:
                right_count += 1

        print('test accuracy: %.4f\nall counts: %d\nall_examples: %d' %
              (right_count / all_count, all_count, self.num_examples))
示例#6
0
    def end(self):
        self.reader.close()

        if self.run_mode == 'profile':
            # Create the Timeline object, and write it to a json
            tl = timeline.Timeline(self.run_metadata.step_stats)
            ctf = tl.generate_chrome_trace_format()
            pp('profile_log write to %s...' % self.profile_log)
            with open(self.profile_log, 'w') as f:
                f.write(ctf)
示例#7
0
    def infer(self, inputs):
        input_shape = inputs.get_shape().as_list()
        self.input_batch_size = input_shape[0]
        pp('> res101 inputs', inputs)

        ts = resnet(inputs,
                    ly_lst=[3, 4, 23, 3],
                    num_class=self.num_class,
                    name='resnet_v1_101')
        pp('< res101 outputs', ts)

        return ts
示例#8
0
def dense_drop(ts, units, dropout_prob, name):
    ts = tf.layers.dense(ts,
                         units,
                         activation=tf.nn.relu,
                         kernel_regularizer=w_loss,
                         name=name)
    ts = tf.layers.dropout(ts,
                           rate=dropout_prob,
                           training=(VARS['mode'] == 'train'))

    pp('> layer %s: units %d, dropout %.1f' % (name, units, dropout_prob), ts)
    return ts
示例#9
0
    def get_data(self):
        self.raw_inputs = {
            # X: [depth, height, width, channel]
            'X': (tf.placeholder(tf.uint8, self.raw_size) if self.mode in [
                'train', 'eval'
            ] else tf.placeholder(tf.uint8, self.example_size)),
            # Y: scalar
            'Y':
            tf.placeholder(tf.int32, []),
        }
        # name(frame path): scalar
        if VARS['mode'] == 'test':
            self.raw_inputs['name'] = tf.placeholder(tf.string, [])

        pp(scope='preprocess')
        raw_X = self.raw_inputs['X']
        # clip: uint8 -> float32
        ts = tf.to_float(raw_X)

        # subtract mean: [..., height, width, channel]
        ts = kits.subtract_mean(ts)

        # # pixels scaled to [0, 1] for convenience of color_jitter
        # ts = tf.image.convert_image_dtype(ts, dtype=tf.float32)

        # random or central crop: [height, width]
        crop_size = self.example_size[-3:-1]
        if self.mode == 'train':
            # ts = kits.random_size_and_crop(ts, self.example_size[:2])
            # ts = kits.color_jitter(ts)
            # ts = kits.random_crop(ts, self.example_size[:2])
            ts = kits.random_fix_crop_with_multi_scale(ts,
                                                       [256, 224, 192, 168],
                                                       crop_size)
            ts = kits.random_flip_left_right(ts, 0.5)

            # # [0, 1] -> [-1, 1]
            # ts = tf.subtract(ts, 0.5)
            # ts = tf.multiply(ts, 2.0)
            # pp('normalize [0, 1] -> [-1, 1]')
        elif self.mode == 'eval':
            ts = kits.crop(ts, crop_size, 'mid')
            print('eval: central crop')

        if VARS['mode'] in ['train', 'eval']:
            return {'X': ts, 'Y': self.raw_inputs['Y']}
        else:
            return {
                'X': ts,
                'Y': self.raw_inputs['Y'],
                'name': self.raw_inputs['name']
            }
示例#10
0
def random_crop(ts, new_size):
    """Only crop along [height, width]

    Args:
        ts (tensor): [..., height, width, in_channels]
        new_size (list): [height, width]

    Returns:
        tensor: cropped ts with reshaped [height, width]
    """
    shape = ts.get_shape().as_list()
    ts_shape = tf.concat(
        [tf.constant(shape[:-3], dtype=tf.int32), new_size, shape[-1:]],
        axis=0)

    pp('random crop %s' % str(new_size))
    with tf.name_scope('random_crop'):
        return tf.random_crop(ts, ts_shape)
示例#11
0
def color_jitter(ts):
    """works for color channels
    - random brightness
    - random contrast
    - random saturation
    - random hue
    Follow Inception-style:
    https://github.com/tensorflow/models/blob/master/inception/inception/image_processing.py#L183-L186
    """
    ts = tf.image.random_brightness(ts, max_delta=32. / 255.)
    ts = tf.image.random_saturation(ts, lower=0.5, upper=1.5)
    # ts = tf.image.random_hue(ts, max_delta=0.2)
    ts = tf.image.random_contrast(ts, lower=0.5, upper=1.5)

    # The random_* ops do not necessarily clamp.
    ts = tf.clip_by_value(ts, 0.0, 1.0)

    pp('color jitter (Inception-style)')
    return ts
示例#12
0
def random_size_and_crop(ts, new_size):
    """random scale and random crop
    Random crop with size 8%-100% and aspect ratio 3/4 - 4/3 (Inception-style)
    """
    raw_shape = ts.get_shape().as_list()
    raw_height, raw_width = raw_shape[-3:-1]
    raw_area = tf.constant(raw_height * raw_width, dtype=tf.float32)
    new_area = tf.random_uniform([], 0.08, 1.0) * raw_area
    aspect_ratio = tf.random_uniform([], 3. / 4, 4. / 3)
    crop_height = tf.round(tf.sqrt(new_area / aspect_ratio))
    crop_width = tf.round(tf.sqrt(new_area * aspect_ratio))
    # clip on crop_width, crop_height
    crop_height = tf.clip_by_value(tf.to_int32(crop_height), 0, raw_height)
    crop_width = tf.clip_by_value(tf.to_int32(crop_width), 0, raw_width)

    out_ts = crop_and_resize(ts, [crop_height, crop_width], 'random', new_size)
    out_ts.set_shape(raw_shape[:-3] + new_size + raw_shape[-1:])

    pp('random_size_and_crop')
    return out_ts
示例#13
0
    def model_init(self):
        # init model weights from pretrained model
        if VARS['if_restart'] is True and self.init_weights_path is not None:

            vars_map = self.get_vars_to_restore()
            except_vars = set([
                '...' + '/'.join(var.name.split('/')[-3:])
                for var in tf.global_variables()
                if var not in vars_map.values()
            ])
            pp('> init except vars\n', except_vars, scope='fine_tune')

            if self.init_weights_path.endswith('.hdf5'):
                self.model_init_from_hdf5(vars_map)
            elif self.init_weights_path.endswith('.npy'):
                self.model_init_from_npy(vars_map)
            else:
                self.model_init_from_ckpt(vars_map)
        else:
            pp('no init from pretrained model')
示例#14
0
def conv2d_bn_relu(ts,
                   num_kernel,
                   kernel_size,
                   strides,
                   activation=tf.nn.relu,
                   name=None):
    ts = tf.layers.conv2d(ts,
                          num_kernel,
                          kernel_size,
                          strides=strides,
                          padding='same',
                          use_bias=False,
                          kernel_regularizer=w_loss,
                          name=name)
    ts = bn(ts, scope=name)

    pp(
        '> layer %s: num_kernel %d, kernel_size %d, strides %d, activation: %s with bn\n'
        % (name, num_kernel, kernel_size, strides,
           activation.func_name if activation else 'none'), ts)
    return (activation(ts) if activation is not None else ts)
示例#15
0
 def model_init_from_ckpt(self, vars_map):
     """
     vars_map: {'conv2d/weights': var(weights)}
     """
     restorer = tf.train.Saver(vars_map)
     # Restore variables from disk.
     pp('restore vars from ckpt: %s...' % self.init_weights_path)
     restorer.restore(self.sess, self.init_weights_path)
     pp('init > ', [name for name in vars_map])
     pp('done')
示例#16
0
 def model_init_from_hdf5(self, vars_map):
     """
     vars_map: {'conv2d/weights': var(weights)}
     """
     pp('restore vars from hdf5 file: %s...' % self.init_weights_path)
     with h5py.File(self.init_weights_path, 'r') as f:
         for var_name, var in vars_map.iteritems():
             npy_var = np.array(f[var_name])
             # assign hdf5_weights to tf_vars
             self.sess.run(var.assign(npy_var))
             pp('init > %s' % var_name)
     pp('done')
示例#17
0
    def model_init_from_npy(self, vars_map):
        """
        vars_map: {'conv2d/weights': var(weights)}
        """
        pp('restore vars from npy file: %s...' % self.init_weights_path)

        w_npy = np.load(self.init_weights_path).item()
        for var_name, var in vars_map.iteritems():
            n_lst0 = '/'.join(var_name.split('/')[:-1])
            n_lst1 = var_name.split('/')[-1]
            self.sess.run(var.assign(w_npy[n_lst0][n_lst1]))
            pp('init > %s' % var_name)
        pp('done')
示例#18
0
    def infer(self, inputs):
        # inputs: [b, h, w, s * c]
        inputs_shape = inputs.get_shape().as_list()
        pp('inputs shape: ', inputs_shape)
        self.input_batch_size = inputs_shape[0]
        frm_step = inputs_shape[-1] // 3

        # transform -> [b, s, h, w, c]
        ts = tf.reshape(inputs, inputs_shape[:3] + [frm_step, 3])
        ts = tf.transpose(ts, [0, 3, 1, 2, 4])
        pp('transform inputs shape to: ', ts.get_shape().as_list())

        # test: transform
        # tf.summary.image()

        def w_loss(ts, weight_decay=0.0005):
            return tf.nn.l2_loss(ts) * weight_decay

        def reshape_rank2(ts):
            # shape [-1, last dimension]
            ts_shape = ts.get_shape().as_list()
            return tf.reshape(ts, [ts_shape[0], -1])

        with tf.variable_scope('c3d'):
            for ly in self.graph:
                if ly['name'].startswith('conv'):
                    ts = tf.layers.conv3d(
                        ts,
                        ly['num_kernel'], [3, 3, 3],
                        padding='same',
                        activation=tf.nn.relu,
                        kernel_initializer=tf.truncated_normal_initializer(
                            stddev=0.01),
                        kernel_regularizer=w_loss,
                        name=ly['name'])
                    pp('layer %s: num_kernel %d, stride [3, 3, 3],'
                       ' gaussian 0.01, l2_loss, with relu' %
                       (ly['name'], ly['num_kernel']))
                    pp(ts)
                elif ly['name'].startswith('pool'):
                    ts = tf.layers.max_pooling3d(ts,
                                                 ly['kernel_size'],
                                                 ly['stride'],
                                                 padding='same',
                                                 name=ly['name'])
                    pp('layer %s: kernel_size %s, stride %s' %
                       (ly['name'], str(ly['kernel_size']), str(ly['stride'])))
                    pp(ts)
                elif ly['name'].startswith('fc'):
                    # reshape to rank 2
                    ts_shape = ts.get_shape().as_list()
                    if len(ts.get_shape().as_list()) > 2:
                        ts = reshape_rank2(ts)

                    ts = tf.layers.dense(
                        ts,
                        ly['units'],
                        activation=tf.nn.relu,
                        kernel_initializer=tf.truncated_normal_initializer(
                            stddev=0.005),
                        bias_initializer=tf.ones_initializer(),
                        kernel_regularizer=w_loss,
                        name=ly['name'])
                    ts = tf.layers.dropout(ts,
                                           rate=self.dropout_prob,
                                           training=self.is_training)
                    pp('layer %s: units %d, gaussian 0.005, l2_loss,'
                       ' with relu and droupout %.1f(%r)' %
                       (ly['name'], ly['units'], self.dropout_prob,
                        self.is_training))
                    pp(ts)
                else:
                    raise ValueError('no such layer %s' % ly['name'])

            # linear fc
            ts = tf.layers.dense(
                ts,
                101,
                activation=None,
                kernel_initializer=tf.truncated_normal_initializer(
                    stddev=0.01),
                kernel_regularizer=w_loss,
                name='fc8')
            pp('layer %s: units 101, gaussian 0.01 without relu and droupout' %
               ('fc8'))
            pp(ts)

        return ts
示例#19
0
    def infer(self, inputs):
        input_shape = inputs.get_shape().as_list()
        self.input_batch_size = input_shape[0]

        def w_loss(ts, weight_decay=0.0005):
            return tf.nn.l2_loss(ts) * weight_decay

        def reshape_rank2(ts):
            # shape [-1, last dimension]
            ts_shape = ts.get_shape().as_list()
            return tf.reshape(ts, [ts_shape[0], -1])

        def conv2d(ts, num_kernel, kernel_size=3, padding='same', name=None):
            return tf.layers.conv2d(ts,
                                    num_kernel,
                                    kernel_size,
                                    activation=tf.nn.relu,
                                    padding=padding,
                                    kernel_regularizer=w_loss,
                                    name=name)

        def pool2d(ts, name):
            return tf.layers.max_pooling2d(ts, [2, 2], strides=2, name=name)

        def dropout(ts):
            return tf.layers.dropout(ts,
                                     rate=self.dropout_prob,
                                     training=self.is_training)

        def dense(ts, units, name):
            ts = tf.layers.dense(ts,
                                 units,
                                 activation=tf.nn.relu,
                                 kernel_regularizer=w_loss,
                                 name=name)
            ts = tf.layers.dropout(ts,
                                   rate=self.dropout_prob,
                                   training=self.is_training)
            return ts

        ts = inputs
        with tf.variable_scope('vgg_16'):
            with tf.variable_scope('conv1'):
                ts = conv2d(ts, 64, name='conv1_1')
                ts = conv2d(ts, 64, name='conv1_2')
                ts = pool2d(ts, name='pool1')
            with tf.variable_scope('conv2'):
                ts = conv2d(ts, 128, name='conv2_1')
                ts = conv2d(ts, 128, name='conv2_2')
                ts = pool2d(ts, name='pool2')
            with tf.variable_scope('conv3'):
                ts = conv2d(ts, 256, name='conv3_1')
                ts = conv2d(ts, 256, name='conv3_2')
                ts = conv2d(ts, 256, name='conv3_3')
                ts = pool2d(ts, name='pool3')
            with tf.variable_scope('conv4'):
                ts = conv2d(ts, 512, name='conv4_1')
                ts = conv2d(ts, 512, name='conv4_2')
                ts = conv2d(ts, 512, name='conv4_3')
                ts = pool2d(ts, name='pool4')
            with tf.variable_scope('conv5'):
                ts = conv2d(ts, 512, name='conv5_1')
                ts = conv2d(ts, 512, name='conv5_2')
                ts = conv2d(ts, 512, name='conv5_3')
                ts = pool2d(ts, name='pool5')
            # use conv2d instead of dense
            ts = conv2d(ts, 4096, kernel_size=7, padding='valid', name='fc6')
            ts = dropout(ts)

            ts = conv2d(ts, 4096, kernel_size=1, name='fc7')
            ts = dropout(ts)
            # linear fc
            ts = tf.layers.conv2d(ts,
                                  self.num_class,
                                  1,
                                  activation=None,
                                  kernel_initializer=tf.zeros_initializer(),
                                  kernel_regularizer=w_loss,
                                  name='fc8')
            pp('layer %s: units %d, gaussian 0.01 without relu and droupout' %
               ('fc8', self.num_class))
            ts = tf.squeeze(ts, [1, 2], name='fc8/squeezed')
            pp(' output shape:', ts.get_shape().as_list())

        return ts
示例#20
0
 def get_vars_to_restore(self):
     pp('use vanilla vars_to_restore')
     vars_to_restore = {var.name: var for var in tf.trainable_variables()}
     return vars_to_restore