Exemplo n.º 1
0
    def _build_graph(self, inputs):
        input, nextinput = inputs

        cell = rnn.MultiRNNCell([rnn.LSTMBlockCell(num_units=param.rnn_size)
                                for _ in range(param.num_rnn_layer)])

        def get_v(n):
            ret = tf.get_variable(n + '_unused', [param.batch_size, param.rnn_size],
                                  trainable=False,
                                  initializer=tf.constant_initializer())
            ret = tf.placeholder_with_default(ret, shape=[None, param.rnn_size], name=n)
            return ret
        initial = (rnn.LSTMStateTuple(get_v('c0'), get_v('h0')),
                   rnn.LSTMStateTuple(get_v('c1'), get_v('h1')))

        embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size])
        input_feature = tf.nn.embedding_lookup(embeddingW, input)  # B x seqlen x rnnsize

        input_list = tf.unstack(input_feature, axis=1)  # seqlen x (Bxrnnsize)

        outputs, last_state = rnn.static_rnn(cell, input_list, initial, scope='rnnlm')
        last_state = tf.identity(last_state, 'last_state')

        # seqlen x (Bxrnnsize)
        output = tf.reshape(tf.concat(outputs, 1), [-1, param.rnn_size])  # (Bxseqlen) x rnnsize
        logits = FullyConnected('fc', output, param.vocab_size, nl=tf.identity)
        tf.nn.softmax(logits / param.softmax_temprature, name='prob')

        xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=tf.reshape(nextinput, [-1]))
        self.cost = tf.reduce_mean(xent_loss, name='cost')
        summary.add_param_summary(('.*/W', ['histogram']))   # monitor histogram of all W
        summary.add_moving_summary(self.cost)
Exemplo n.º 2
0
    def _build_graph(self, inputs):
        """This function should build the model which takes the input variables
        and define self.cost at the end"""

        # inputs contains a list of input variables defined above
        image, label = inputs

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1   # center the pixels values at zero

        # The context manager `argscope` sets the default option for all the layers under
        # this context. Here we use 32 channel convolution with shape 3x3
        with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32):
            logits = (LinearWrap(image)
                      .Conv2D('conv0')
                      .MaxPooling('pool0', 2)
                      .Conv2D('conv1')
                      .Conv2D('conv2')
                      .MaxPooling('pool1', 2)
                      .Conv2D('conv3')
                      .FullyConnected('fc0', 512, nl=tf.nn.relu)
                      .Dropout('dropout', 0.5)
                      .FullyConnected('fc1', out_dim=10, nl=tf.identity)())

        tf.nn.softmax(logits, name='prob')   # a Bx10 with probabilities

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error (in a moving_average fashion):
        # 1. write the value to tensosrboard
        # 2. write the value to stat.json
        # 3. print the value after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, self.cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
Exemplo n.º 3
0
    def build_graph(self, image, label):
        """This function should build the model which takes the input variables
        and return cost at the end"""

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1   # center the pixels values at zero

        # The context manager `argscope` sets the default option for all the layers under
        # this context. Here we use 32 channel convolution with shape 3x3
        with argscope([tf.layers.conv2d], padding='same', activation=tf.nn.relu):
            l = tf.layers.conv2d(image, 32, 3, name='conv0')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l, 32, 3, name='conv1')
            l = tf.layers.conv2d(l, 32, 3, name='conv2')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l, 32, 3, name='conv3')
            l = tf.layers.flatten(l)
            l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0')
            l = tf.layers.dropout(l, rate=0.5,
                                  training=get_current_tower_context().is_training)
        logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1')

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
        # 1. written to tensosrboard
        # 2. written to stat.json
        # 3. printed after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        # If you don't like regex, you can certainly define the cost in any other methods.
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/kernel', tf.nn.l2_loss),
                              name='regularize_loss')
        total_cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
Exemplo n.º 4
0
    def _build_graph(self, inputs):
        """This function should build the model which takes the input variables
        and define self.cost at the end"""

        # inputs contains a list of input variables defined above
        image, label = inputs

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1   # center the pixels values at zero

        l = tf.layers.conv2d(image, 32, 3, padding='same', activation=tf.nn.relu, name='conv0')
        l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
        l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv1')
        l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv2')
        l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
        l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv3')
        l = tf.layers.flatten(l)
        l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0')
        l = tf.layers.dropout(l, rate=0.5,
                              training=get_current_tower_context().is_training)
        logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1')

        tf.nn.softmax(logits, name='prob')   # a Bx10 with probabilities

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error (in a moving_average fashion):
        # 1. write the value to tensosrboard
        # 2. write the value to stat.json
        # 3. print the value after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/kernel', tf.nn.l2_loss),
                              name='regularize_loss')
        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, self.cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
Exemplo n.º 5
0
    def build_graph(self, comb_state, action, reward, isOver):
        comb_state = tf.cast(comb_state, tf.float32)
        comb_state = tf.reshape(
            comb_state, [-1] + list(self._shape2d) + [self.history + 1, self.channel])

        state = tf.slice(comb_state, [0, 0, 0, 0, 0], [-1, -1, -1, self.history, -1])
        state = tf.reshape(state, self._shape4d_for_prediction, name='state')
        self.predict_value = self.get_DQN_prediction(state)
        if not get_current_tower_context().is_training:
            return

        reward = tf.clip_by_value(reward, -1, 1)
        next_state = tf.slice(comb_state, [0, 0, 0, 1, 0], [-1, -1, -1, self.history, -1], name='next_state')
        next_state = tf.reshape(next_state, self._shape4d_for_prediction)
        action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0)

        pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1)  # N,
        max_pred_reward = tf.reduce_mean(tf.reduce_max(
            self.predict_value, 1), name='predict_reward')
        summary.add_moving_summary(max_pred_reward)

        with tf.variable_scope('target'), varreplace.freeze_variables(skip_collection=True):
            targetQ_predict_value = self.get_DQN_prediction(next_state)    # NxA

        if self.method != 'Double':
            # DQN
            best_v = tf.reduce_max(targetQ_predict_value, 1)    # N,
        else:
            # Double-DQN
            next_predict_value = self.get_DQN_prediction(next_state)
            self.greedy_choice = tf.argmax(next_predict_value, 1)   # N,
            predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0)
            best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

        target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)

        cost = tf.losses.huber_loss(
            target, pred_action_value, reduction=tf.losses.Reduction.MEAN)
        summary.add_param_summary(('conv.*/W', ['histogram', 'rms']),
                                  ('fc.*/W', ['histogram', 'rms']))   # monitor all W
        summary.add_moving_summary(cost)
        return cost
    def _build_graph(self, inputs):
        
        images, truemap_coded = inputs
        orig_imgs = images

        if hasattr(self, 'type_classification') and self.type_classification:
            true_type = truemap_coded[...,1]
            true_type = tf.cast(true_type, tf.int32)
            true_type = tf.identity(true_type, name='truemap-type')
            one_type  = tf.one_hot(true_type, self.nr_types, axis=-1)
            true_type = tf.expand_dims(true_type, axis=-1)

            true_np = tf.cast(true_type > 0, tf.int32) # ? sanity this
            true_np = tf.identity(true_np, name='truemap-np')
            one_np  = tf.one_hot(tf.squeeze(true_np), 2, axis=-1)
        else:
            true_np = truemap_coded[...,0]
            true_np = tf.cast(true_np, tf.int32)
            true_np = tf.identity(true_np, name='truemap-np')
            one_np  = tf.one_hot(true_np, 2, axis=-1)
            true_np = tf.expand_dims(true_np, axis=-1)

        true_hv = truemap_coded[...,-2:]
        true_hv = tf.identity(true_hv, name='truemap-hv')

        ####
        with argscope(Conv2D, activation=tf.identity, use_bias=False, # K.he initializer
                      W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \
                argscope([Conv2D, BatchNorm], data_format=self.data_format):

            i = tf.transpose(images, [0, 3, 1, 2])
            i = i if not self.input_norm else i / 255.0

            ####
            d = encoder(i, self.freeze)
            d[0] = crop_op(d[0], (184, 184))
            d[1] = crop_op(d[1], (72, 72))

            ####
            np_feat = decoder('np', d)
            npx = BNReLU('preact_out_np', np_feat[-1])

            hv_feat = decoder('hv', d)
            hv = BNReLU('preact_out_hv', hv_feat[-1])

            if self.type_classification:
                tp_feat = decoder('tp', d)
                tp = BNReLU('preact_out_tp', tp_feat[-1])

                # Nuclei Type Pixels (TP)
                logi_class = Conv2D('conv_out_tp', tp, self.nr_types, 1, use_bias=True, activation=tf.identity)
                logi_class = tf.transpose(logi_class, [0, 2, 3, 1])
                soft_class = tf.nn.softmax(logi_class, axis=-1)

            #### Nuclei Pixels (NP)
            logi_np = Conv2D('conv_out_np', npx, 2, 1, use_bias=True, activation=tf.identity)
            logi_np = tf.transpose(logi_np, [0, 2, 3, 1])
            soft_np = tf.nn.softmax(logi_np, axis=-1)
            prob_np = tf.identity(soft_np[...,1], name='predmap-prob-np')
            prob_np = tf.expand_dims(prob_np, axis=-1)

            #### Horizontal-Vertival (HV)
            logi_hv = Conv2D('conv_out_hv', hv, 2, 1, use_bias=True, activation=tf.identity)
            logi_hv = tf.transpose(logi_hv, [0, 2, 3, 1])
            prob_hv = tf.identity(logi_hv, name='predmap-prob-hv')
            pred_hv = tf.identity(logi_hv, name='predmap-hv')
    
            # * channel ordering: type-map, segmentation map
            # encoded so that inference can extract all output at once
            if self.type_classification:
                predmap_coded = tf.concat([soft_class, prob_np, pred_hv], axis=-1, name='predmap-coded')
            else:
                predmap_coded = tf.concat([prob_np, pred_hv], axis=-1, name='predmap-coded')
        ####
        def get_gradient_hv(l, h_ch, v_ch):
            """
            Calculate the horizontal partial differentiation for horizontal channel
            and the vertical partial differentiation for vertical channel.

            The partial differentiation is approximated by calculating the central differnce
            which is obtained by using Sobel kernel of size 5x5. The boundary is zero-padded
            when channel is convolved with the Sobel kernel.

            Args:
                l (tensor): tensor of shape NHWC with C should be 2 (1 channel for horizonal 
                            and 1 channel for vertical)
                h_ch(int) : index within C axis of `l` that corresponds to horizontal channel
                v_ch(int) : index within C axis of `l` that corresponds to vertical channel
            """
            def get_sobel_kernel(size):
                assert size % 2 == 1, 'Must be odd, get size=%d' % size

                h_range = np.arange(-size//2+1, size//2+1, dtype=np.float32)
                v_range = np.arange(-size//2+1, size//2+1, dtype=np.float32)
                h, v = np.meshgrid(h_range, v_range)
                kernel_h = h / (h * h + v * v + 1.0e-15)
                kernel_v = v / (h * h + v * v + 1.0e-15)
                return kernel_h, kernel_v            

            mh, mv = get_sobel_kernel(5)
            mh = tf.constant(mh, dtype=tf.float32)
            mv = tf.constant(mv, dtype=tf.float32)

            mh = tf.reshape(mh, [5, 5, 1, 1])
            mv = tf.reshape(mv, [5, 5, 1, 1])
            
            # central difference to get gradient, ignore the boundary problem  
            h = tf.expand_dims(l[...,h_ch], axis=-1)  
            v = tf.expand_dims(l[...,v_ch], axis=-1)  
            dh = tf.nn.conv2d(h, mh, strides=[1, 1, 1, 1], padding='SAME')
            dv = tf.nn.conv2d(v, mv, strides=[1, 1, 1, 1], padding='SAME')
            output = tf.concat([dh, dv], axis=-1)
            return output
        def loss_mse(true, pred, name=None):
            ### regression loss
            loss = pred - true
            loss = tf.reduce_mean(loss * loss, name=name)
            return loss
        def loss_msge(true, pred, focus, name=None):
            focus = tf.stack([focus, focus], axis=-1)
            pred_grad = get_gradient_hv(pred, 1, 0)
            true_grad = get_gradient_hv(true, 1, 0) 
            loss = pred_grad - true_grad
            loss = focus * (loss * loss)
            # artificial reduce_mean with focus region
            loss = tf.reduce_sum(loss) / (tf.reduce_sum(focus) + 1.0e-8)
            loss = tf.identity(loss, name=name)
            return loss

        ####
        if get_current_tower_context().is_training:
            #---- LOSS ----#
            loss = 0
            for term, weight in self.loss_term.items():
                if term == 'mse':
                    term_loss = loss_mse(true_hv, pred_hv, name='loss-mse')
                elif term == 'msge':
                    focus = truemap_coded[...,0]
                    term_loss = loss_msge(true_hv, pred_hv, focus, name='loss-msge')
                elif term == 'bce':
                    term_loss = categorical_crossentropy(soft_np, one_np)
                    term_loss = tf.reduce_mean(term_loss, name='loss-bce')
                elif 'dice' in self.loss_term:
                    term_loss = dice_loss(soft_np[...,0], one_np[...,0]) \
                              + dice_loss(soft_np[...,1], one_np[...,1])
                    term_loss = tf.identity(term_loss, name='loss-dice')
                else:
                    assert False, 'Not support loss term: %s' % term
                add_moving_summary(term_loss)
                loss += term_loss * weight

            if self.type_classification:
                term_loss = categorical_crossentropy(soft_class, one_type)
                term_loss = tf.reduce_mean(term_loss, name='loss-xentropy-class')
                add_moving_summary(term_loss)
                loss = loss + term_loss

                term_loss = 0
                for type_id in range(self.nr_types):
                    term_loss += dice_loss(soft_class[...,type_id], 
                                           one_type[...,type_id])
                term_loss = tf.identity(term_loss, name='loss-dice-class')
                add_moving_summary(term_loss)
                loss = loss + term_loss

            ### combine the loss into single cost function
            self.cost = tf.identity(loss, name='overall-loss')            
            add_moving_summary(self.cost)
            ####

            add_param_summary(('.*/W', ['histogram']))   # monitor W

            ### logging visual sthg
            orig_imgs = tf.cast(orig_imgs  , tf.uint8)
            tf.summary.image('input', orig_imgs, max_outputs=1)

            orig_imgs = crop_op(orig_imgs, (190, 190), "NHWC")

            pred_np = colorize(prob_np[...,0], cmap='jet')
            true_np = colorize(true_np[...,0], cmap='jet')
            
            pred_h = colorize(prob_hv[...,0], vmin=-1, vmax=1, cmap='jet')
            pred_v = colorize(prob_hv[...,1], vmin=-1, vmax=1, cmap='jet')
            true_h = colorize(true_hv[...,0], vmin=-1, vmax=1, cmap='jet')
            true_v = colorize(true_hv[...,1], vmin=-1, vmax=1, cmap='jet')

            if not self.type_classification:
                viz = tf.concat([orig_imgs, 
                                pred_h, pred_v, pred_np, 
                                true_h, true_v, true_np], 2)
            else:
                pred_type = tf.transpose(soft_class, (0, 1, 3, 2))
                pred_type = tf.reshape(pred_type, [-1, 80, 80 * self.nr_types])
                true_type = tf.cast(true_type[...,0] / self.nr_classes, tf.float32)
                true_type = colorize(true_type, vmin=0, vmax=1, cmap='jet')
                pred_type = colorize(pred_type, vmin=0, vmax=1, cmap='jet')

                viz = tf.concat([orig_imgs, 
                                pred_h, pred_v, pred_np, pred_type, 
                                true_h, true_v, true_np, true_type,], 2)

            viz = tf.concat([viz[0], viz[-1]], axis=0)
            viz = tf.expand_dims(viz, axis=0)
            tf.summary.image('output', viz, max_outputs=1)

        return
Exemplo n.º 7
0
    def build_graph(self, _, x, bboxes_xyz, bboxes_lwh, semantic_labels,
                    heading_labels, heading_residuals, size_labels,
                    size_residuals):
        l0_xyz = x
        l0_points = x

        # Set Abstraction layers
        l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz,
                                                           l0_points,
                                                           npoint=2048,
                                                           radius=0.2,
                                                           nsample=64,
                                                           mlp=[64, 64, 128],
                                                           mlp2=None,
                                                           group_all=False,
                                                           scope='sa1')
        l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz,
                                                           l1_points,
                                                           npoint=1024,
                                                           radius=0.4,
                                                           nsample=64,
                                                           mlp=[128, 128, 256],
                                                           mlp2=None,
                                                           group_all=False,
                                                           scope='sa2')
        l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz,
                                                           l2_points,
                                                           npoint=512,
                                                           radius=0.8,
                                                           nsample=64,
                                                           mlp=[128, 128, 256],
                                                           mlp2=None,
                                                           group_all=False,
                                                           scope='sa3')
        l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz,
                                                           l3_points,
                                                           npoint=256,
                                                           radius=1.2,
                                                           nsample=64,
                                                           mlp=[128, 128, 256],
                                                           mlp2=None,
                                                           group_all=False,
                                                           scope='sa4')
        # Feature Propagation layers
        l3_points = pointnet_fp_module(l3_xyz,
                                       l4_xyz,
                                       l3_points,
                                       l4_points, [256, 256],
                                       scope='fp1')
        seeds_points = pointnet_fp_module(l2_xyz,
                                          l3_xyz,
                                          l2_points,
                                          l3_points, [256, 256],
                                          scope='fp2')
        seeds_xyz = l2_xyz

        # Voting Module layers
        offset = tf.reshape(tf.concat([seeds_xyz, seeds_points], 2),
                            [-1, 256 + 3])
        units = [256, 256, 256 + 3]
        for i in range(len(units)):
            offset = FullyConnected(
                'voting%d' % i,
                offset,
                units[i],
                activation=BNReLU if i < len(units) - 1 else None)
        offset = tf.reshape(offset, [-1, 1024, 256 + 3])

        # B * N * 3
        votes = tf.concat([seeds_xyz, seeds_points], 2) + offset
        votes_xyz = votes[:, :, :3]
        dist2center = tf.abs(
            tf.expand_dims(seeds_xyz, 2) - tf.expand_dims(bboxes_xyz, 1))
        surface_ind = tf.less(dist2center,
                              tf.expand_dims(bboxes_lwh, 1) /
                              2.)  # B * N * BB * 3, bool
        surface_ind = tf.equal(tf.count_nonzero(surface_ind, -1),
                               3)  # B * N * BB
        surface_ind = tf.greater_equal(tf.count_nonzero(
            surface_ind, -1), 1)  # B * N, should be in at least one bbox

        dist2center_norm = tf.norm(dist2center, axis=-1)  # B * N * BB
        votes_assignment = tf.argmin(dist2center_norm,
                                     -1,
                                     output_type=tf.int32)  # B * N, int
        bboxes_xyz_votes_gt = tf.gather_nd(
            bboxes_xyz,
            tf.stack([
                tf.tile(
                    tf.expand_dims(tf.range(tf.shape(votes_assignment)[0]),
                                   -1), [1, tf.shape(votes_assignment)[1]]),
                votes_assignment
            ], 2))  # B * N * 3
        vote_reg_loss = tf.reduce_mean(
            tf.norm(votes_xyz - bboxes_xyz_votes_gt, ord=1, axis=-1) *
            tf.cast(surface_ind, tf.float32),
            name='vote_reg_loss')
        votes_points = votes[:, :, 3:]

        # Proposal Module layers
        # Farthest point sampling on seeds
        proposals_xyz, proposals_output, _ = pointnet_sa_module(
            votes_xyz,
            votes_points,
            npoint=config.PROPOSAL_NUM,
            radius=0.3,
            nsample=64,
            mlp=[128, 128, 128],
            mlp2=[128, 128, 5 + 2 * config.NH + 4 * config.NS + config.NC],
            group_all=False,
            scope='proposal',
            sample_xyz=seeds_xyz)

        obj_cls_score = tf.identity(proposals_output[..., :2], 'obj_scores')

        nms_iou = tf.get_variable('nms_iou',
                                  shape=[],
                                  initializer=tf.constant_initializer(0.25),
                                  trainable=False)
        if not get_current_tower_context().is_training:

            def get_3d_bbox(box_size, heading_angle, center):
                batch_size = tf.shape(heading_angle)[0]
                c = tf.cos(heading_angle)
                s = tf.sin(heading_angle)
                zeros = tf.zeros_like(c)
                ones = tf.ones_like(c)
                rotation = tf.reshape(
                    tf.stack([c, zeros, s, zeros, ones, zeros, -s, zeros, c],
                             -1), tf.stack([batch_size, -1, 3, 3]))
                l, w, h = box_size[..., 0], box_size[..., 1], box_size[
                    ..., 2]  # lwh(xzy) order!!!
                corners = tf.reshape(
                    tf.stack([
                        l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2,
                        -l / 2, h / 2, h / 2, h / 2, h / 2, -h / 2, -h / 2,
                        -h / 2, -h / 2, w / 2, -w / 2, -w / 2, w / 2, w / 2,
                        -w / 2, -w / 2, w / 2
                    ], -1), tf.stack([batch_size, -1, 3, 8]))
                return tf.einsum('ijkl,ijlm->ijmk',
                                 rotation, corners) + tf.expand_dims(
                                     center, 2)  # B * N * 8 * 3

            class_mean_size_tf = tf.constant(class_mean_size)
            size_cls_pred = tf.argmax(
                proposals_output[..., 5 + 2 * config.NH:5 + 2 * config.NH +
                                 config.NS],
                axis=-1)
            size_cls_pred_onehot = tf.one_hot(size_cls_pred,
                                              depth=config.NS,
                                              axis=-1)  # B * N * NS
            size_residual_pred = tf.reduce_sum(
                tf.expand_dims(size_cls_pred_onehot, -1) * tf.reshape(
                    proposals_output[..., 5 + 2 * config.NH + config.NS:5 +
                                     2 * config.NH + 4 * config.NS],
                    (-1, config.PROPOSAL_NUM, config.NS, 3)),
                axis=2)
            size_pred = tf.gather_nd(
                class_mean_size_tf,
                tf.expand_dims(size_cls_pred, -1)) * tf.maximum(
                    1 + size_residual_pred, 1e-6)  # B * N * 3: size
            # with tf.control_dependencies([tf.print(size_pred[0, 0, 2])]):
            center_pred = proposals_xyz + proposals_output[...,
                                                           2:5]  # B * N * 3
            heading_cls_pred = tf.argmax(proposals_output[...,
                                                          5:5 + config.NH],
                                         axis=-1)
            heading_cls_pred_onehot = tf.one_hot(heading_cls_pred,
                                                 depth=config.NH,
                                                 axis=-1)
            heading_residual_pred = tf.reduce_sum(
                heading_cls_pred_onehot *
                proposals_output[..., 5 + config.NH:5 + 2 * config.NH],
                axis=2)
            heading_pred = tf.floormod(
                (tf.cast(heading_cls_pred, tf.float32) * 2 +
                 heading_residual_pred) * np.pi / config.NH, 2 * np.pi)

            # with tf.control_dependencies([tf.print(size_residual_pred[0, :10, :]), tf.print(size_pred[0, :10, :])]):
            bboxes = get_3d_bbox(
                size_pred, heading_pred,
                center_pred)  # B * N * 8 * 3,  lhw(xyz) order!!!

            # bbox_corners = tf.concat([bboxes[:, :, 6, :], bboxes[:, :, 0, :]], axis=-1)  # B * N * 6,  lhw(xyz) order!!!
            # with tf.control_dependencies([tf.print(bboxes[0, 0])]):
            nms_idx = NMS3D(bboxes,
                            tf.reduce_max(proposals_output[..., -config.NC:],
                                          axis=-1), proposals_output[..., :2],
                            nms_iou)  # Nnms * 2

            bboxes_pred = tf.gather_nd(bboxes, nms_idx,
                                       name='bboxes_pred')  # Nnms * 8 * 3
            class_scores_pred = tf.gather_nd(
                proposals_output[..., -config.NC:],
                nms_idx,
                name='class_scores_pred')  # Nnms * C
            batch_idx = tf.identity(
                nms_idx[:, 0], name='batch_idx'
            )  # Nnms, this is used to identify between batches

            return

        # calculate positive and negative proposal idxes
        bboxes_xyz_gt = bboxes_xyz  # B * BB * 3
        bboxes_labels_gt = semantic_labels  # B * BB
        bboxes_heading_labels_gt = heading_labels
        bboxes_heading_residuals_gt = heading_residuals
        bboxes_size_labels_gt = size_labels
        bboxes_size_residuals_gt = size_residuals
        dist_mat = tf.norm(tf.expand_dims(proposals_xyz, 2) -
                           tf.expand_dims(bboxes_xyz_gt, 1),
                           axis=-1)  # B * PR * BB
        bboxes_assignment = tf.argmin(dist_mat, axis=-1)  # B * PR
        min_dist = tf.reduce_min(dist_mat, axis=-1)

        positive_idxes = tf.where(min_dist < config.POSITIVE_THRES)  # Np * 2
        # with tf.control_dependencies([tf.print(tf.shape(positive_idxes))]):
        negative_idxes = tf.where(min_dist > config.NEGATIVE_THRES)  # Nn * 2
        positive_gt_idxes = tf.stack([
            positive_idxes[:, 0],
            tf.gather_nd(bboxes_assignment, positive_idxes)
        ],
                                     axis=1)

        # objectiveness loss
        pos_obj_cls_score = tf.gather_nd(obj_cls_score, positive_idxes)
        pos_obj_cls_gt = tf.ones([tf.shape(positive_idxes)[0]], dtype=tf.int32)
        neg_obj_cls_score = tf.gather_nd(obj_cls_score, negative_idxes)
        neg_obj_cls_gt = tf.zeros([tf.shape(negative_idxes)[0]],
                                  dtype=tf.int32)
        obj_cls_loss = tf.identity(
            tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=pos_obj_cls_score, labels=pos_obj_cls_gt)) +
            tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=neg_obj_cls_score, labels=neg_obj_cls_gt)),
            name='obj_cls_loss')
        obj_correct = tf.concat([
            tf.cast(tf.nn.in_top_k(pos_obj_cls_score, pos_obj_cls_gt, 1),
                    tf.float32),
            tf.cast(tf.nn.in_top_k(neg_obj_cls_score, neg_obj_cls_gt, 1),
                    tf.float32)
        ],
                                axis=0,
                                name='obj_correct')
        obj_accuracy = tf.reduce_mean(obj_correct, name='obj_accuracy')

        # center regression losses
        center_gt = tf.gather_nd(bboxes_xyz_gt, positive_gt_idxes)
        delta_predicted = tf.gather_nd(proposals_output[..., 2:5],
                                       positive_idxes)
        delta_gt = center_gt - tf.gather_nd(proposals_xyz, positive_idxes)
        center_loss = tf.reduce_mean(
            tf.reduce_sum(tf.losses.huber_loss(
                labels=delta_gt,
                predictions=delta_predicted,
                reduction=tf.losses.Reduction.NONE),
                          axis=-1))

        # Appendix A1: chamfer loss, assignment at least one bbox to each gt bbox
        bboxes_assignment_dual = tf.argmin(dist_mat, axis=1)  # B * BB
        batch_idx = tf.tile(
            tf.expand_dims(tf.range(
                tf.shape(bboxes_assignment_dual, out_type=tf.int64)[0]),
                           axis=-1),
            [1, tf.shape(bboxes_assignment_dual)[1]])  # B * BB
        delta_gt_dual = bboxes_xyz_gt - tf.gather_nd(
            proposals_xyz,
            tf.stack([batch_idx, bboxes_assignment_dual],
                     axis=-1))  # B * BB * 3
        delta_predicted_dual = tf.gather_nd(
            proposals_output[..., 2:5],
            tf.stack([batch_idx, bboxes_assignment_dual],
                     axis=-1))  # B * BB * 3
        center_loss_dual = tf.reduce_mean(
            tf.reduce_sum(tf.losses.huber_loss(
                labels=delta_gt_dual,
                predictions=delta_predicted_dual,
                reduction=tf.losses.Reduction.NONE),
                          axis=-1))

        # add up
        center_loss += center_loss_dual

        # Heading loss
        heading_cls_gt = tf.gather_nd(bboxes_heading_labels_gt,
                                      positive_gt_idxes)
        heading_cls_score = tf.gather_nd(
            proposals_output[..., 5:5 + config.NH], positive_idxes)
        heading_cls_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=heading_cls_score, labels=heading_cls_gt))

        heading_cls_gt_onehot = tf.one_hot(heading_cls_gt,
                                           depth=config.NH,
                                           on_value=1,
                                           off_value=0,
                                           axis=-1)  # Np * NH
        heading_residual_gt = tf.gather_nd(bboxes_heading_residuals_gt,
                                           positive_gt_idxes)  # Np
        heading_residual_predicted = tf.gather_nd(
            proposals_output[..., 5 + config.NH:5 + 2 * config.NH],
            positive_idxes)  # Np * NH
        heading_residual_loss = tf.losses.huber_loss(
            labels=heading_residual_gt,
            predictions=tf.reduce_sum(heading_residual_predicted *
                                      tf.to_float(heading_cls_gt_onehot),
                                      axis=1),
            reduction=tf.losses.Reduction.MEAN)

        # Size loss
        size_cls_gt = tf.gather_nd(bboxes_size_labels_gt, positive_gt_idxes)
        size_cls_score = tf.gather_nd(
            proposals_output[...,
                             5 + 2 * config.NH:5 + 2 * config.NH + config.NS],
            positive_idxes)
        size_cls_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=size_cls_score, labels=size_cls_gt))

        size_cls_gt_onehot = tf.one_hot(size_cls_gt,
                                        depth=config.NS,
                                        on_value=1,
                                        off_value=0,
                                        axis=-1)  # Np * NS
        size_cls_gt_onehot = tf.tile(
            tf.expand_dims(tf.to_float(size_cls_gt_onehot), -1),
            [1, 1, 3])  # Np * NS * 3
        size_residual_gt = tf.gather_nd(bboxes_size_residuals_gt,
                                        positive_gt_idxes)  # Np * 3
        size_residual_predicted = tf.reshape(
            tf.gather_nd(
                proposals_output[..., 5 + 2 * config.NH + config.NS:5 +
                                 2 * config.NH + 4 * config.NS],
                positive_idxes), (-1, config.NS, 3))  # Np * NS * 3
        size_residual_loss = tf.reduce_mean(
            tf.reduce_sum(tf.losses.huber_loss(
                labels=size_residual_gt,
                predictions=tf.reduce_sum(size_residual_predicted *
                                          tf.to_float(size_cls_gt_onehot),
                                          axis=1),
                reduction=tf.losses.Reduction.NONE),
                          axis=-1))

        box_loss = center_loss + 0.1 * heading_cls_loss + heading_residual_loss + 0.1 * size_cls_loss + size_residual_loss

        # semantic loss
        sem_cls_score = tf.gather_nd(proposals_output[..., -config.NC:],
                                     positive_idxes)
        sem_cls_gt = tf.gather_nd(bboxes_labels_gt, positive_gt_idxes)  # Np
        sem_cls_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=sem_cls_score, labels=sem_cls_gt),
            name='sem_cls_loss')
        sem_correct = tf.cast(tf.nn.in_top_k(sem_cls_score, sem_cls_gt, 1),
                              tf.float32,
                              name='sem_correct')
        sem_accuracy = tf.reduce_mean(sem_correct, name='sem_accuracy')

        # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
        # 1. written to tensosrboard
        # 2. written to stat.json
        # 3. printed after each epoch
        summary.add_moving_summary(obj_accuracy, sem_accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        # If you don't like regex, you can certainly define the cost in any other methods.
        # no weight decay
        # wd_cost = tf.multiply(1e-5,
        #                       regularize_cost('.*/W', tf.nn.l2_loss),
        #                       name='regularize_loss')
        total_cost = vote_reg_loss + 0.5 * obj_cls_loss + 1. * box_loss + 0.1 * sem_cls_loss
        total_cost = tf.identity(total_cost, name='total_cost')
        summary.add_moving_summary(total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
Exemplo n.º 8
0
    def build_graph(self, image, label):
        is_training = get_current_tower_context().is_training

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        # monkey-patch tf.get_variable to apply fw
        def binarize_weight(v):
            name = v.op.name
            # don't binarize first and last layer
            if not name.endswith('W') or 'conv0' in name or 'fc' in name:
                return v
            else:
                logger.info("Binarizing weight {}".format(v.op.name))
                return fw(v)

        def nonlin(x):
            if BITA == 32:
                return tf.nn.relu(x)
            return tf.clip_by_value(x, 0.0, 1.0)

        def activate(x):
            return fa(nonlin(x))

        image = image / 256.0

        with remap_variables(binarize_weight), \
                argscope(BatchNorm, momentum=0.9, epsilon=1e-4), \
                argscope(Conv2D, use_bias=False):
            logits = (
                LinearWrap(image).Conv2D('conv0',
                                         48,
                                         5,
                                         padding='VALID',
                                         use_bias=True).MaxPooling(
                                             'pool0', 2,
                                             padding='SAME').apply(activate)
                # 18
                .Conv2D('conv1', 64, 3, padding='SAME').apply(fg).BatchNorm(
                    'bn1').apply(activate).Conv2D(
                        'conv2', 64, 3,
                        padding='SAME').apply(fg).BatchNorm('bn2').MaxPooling(
                            'pool1', 2, padding='SAME').apply(activate)
                # 9
                .Conv2D(
                    'conv3', 128, 3,
                    padding='VALID').apply(fg).BatchNorm('bn3').apply(activate)
                # 7
                .Conv2D('conv4', 128, 3, padding='SAME').apply(fg).
                BatchNorm('bn4').apply(activate).Conv2D(
                    'conv5', 128, 3,
                    padding='VALID').apply(fg).BatchNorm('bn5').apply(activate)
                # 5
                .Dropout(rate=0.5 if is_training else 0.0).Conv2D(
                    'conv6', 512, 5, padding='VALID').apply(fg).BatchNorm(
                        'bn6').apply(nonlin).FullyConnected('fc1', 10)())
        tf.nn.softmax(logits, name='output')

        # compute the number of failed samples
        wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)),
                        tf.float32,
                        name='wrong_tensor')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7))

        add_param_summary(('.*/W', ['histogram', 'rms']))
        total_cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(cost, wd_cost, total_cost)
        return total_cost
Exemplo n.º 9
0
    def build_graph(self, image, label):
        image = image / 128.0
        assert tf.test.is_gpu_available()
        image = tf.transpose(image, [0, 3, 1, 2])

        def residual(name, l, increase_dim=False, first=False):
            shape = l.get_shape().as_list()
            in_channel = shape[1]

            if increase_dim:
                out_channel = in_channel * 2
                stride1 = 2
            else:
                out_channel = in_channel
                stride1 = 1

            with tf.variable_scope(name):
                b1 = l if first else BNReLU(l)
                c1 = Conv2D('conv1', b1, out_channel, strides=stride1, activation=BNReLU)
                c2 = Conv2D('conv2', c1, out_channel)
                if increase_dim:
                    l = AvgPooling('pool', l, 2)
                    l = tf.pad(l, [[0, 0], [in_channel // 2, in_channel // 2], [0, 0], [0, 0]])

                l = c2 + l
                return l

        with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='channels_first'), \
                argscope(Conv2D, use_bias=False, kernel_size=3,
                         kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
            l = Conv2D('conv0', image, 16, activation=BNReLU)
            l = residual('res1.0', l, first=True)
            for k in range(1, self.n):
                l = residual('res1.{}'.format(k), l)
            # 32,c=16

            l = residual('res2.0', l, increase_dim=True)
            for k in range(1, self.n):
                l = residual('res2.{}'.format(k), l)
            # 16,c=32

            l = residual('res3.0', l, increase_dim=True)
            for k in range(1, self.n):
                l = residual('res3.' + str(k), l)
            l = BNReLU('bnlast', l)
            # 8,c=64
            l = GlobalAvgPooling('gap', l)

        logits = FullyConnected('linear', l, 10)
        tf.nn.softmax(logits, name='output')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='wrong_vector')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        # weight decay on all W of fc layers
        wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
                                          480000, 0.2, True)
        wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost')
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/W', ['histogram']))   # monitor W
        return tf.add_n([cost, wd_cost], name='cost')
Exemplo n.º 10
0
    def get_logits(self, image, label=None):
        if self.bit_w == 't':
            fw, fa, fg = get_dorefa(32, 32, 32)
            fw = ternarize
        else:
            fw, fa, fg = get_dorefa(self.bit_w, self.bit_a, self.bit_g)

        # monkey-patch tf.get_variable to apply fw
        def new_get_variable(v):
            if self.float_train:
                return v
            else:
                name = v.op.name
                # don't binarize first and last layer
                if model_name == 'alexnet':
                    if not name.endswith(
                            'W') or 'conv0' in name or 'fct' in name:
                        return v
                    else:
                        logger.info("Quantizing weight {}".format(v.op.name))
                        return fw(v)
                elif model_name == 'resnet18' and dataset_name == 'cifar':
                    if not name.endswith(
                            'kernel') or 'conv1_1' in name or 'dense' in name:
                        return v
                    else:
                        logger.info("Quantizing weight {}".format(v.op.name))
                        return fw(v)
                elif model_name == 'resnet18' and dataset_name == 'ImageNet':
                    if not name.endswith(
                            'W') or 'conv0' in name or 'linear' in name:
                        return v
                    else:
                        logger.info("Quantizing weight {}".format(v.op.name))
                        return fw(v)

        def nonlin(x):
            if BITA == 32:
                return tf.nn.relu(x)  # still use relu for 32-bit cases
            return tf.clip_by_value(x, 0.0, 1.0)

        def activate(x):
            if self.float_train:
                return x
            else:
                return fa(nonlin(x))

        def identity_block2d(input_tensor,
                             kernel_size,
                             filters,
                             stage,
                             block,
                             is_training,
                             reuse,
                             kernel_initializer=tf.contrib.layers.
                             variance_scaling_initializer()):
            filters1, filters2, filters3 = filters

            conv_name_2 = 'conv' + str(stage) + '_' + str(block) + '_3x3'
            bn_name_2 = 'bn' + str(stage) + '_' + str(block) + '_3x3'
            x = tf.layers.conv2d(input_tensor,
                                 filters2,
                                 kernel_size,
                                 use_bias=False,
                                 padding='SAME',
                                 kernel_initializer=kernel_initializer,
                                 name=conv_name_2,
                                 reuse=reuse)
            x = tf.layers.batch_normalization(x,
                                              training=is_training,
                                              name=bn_name_2,
                                              reuse=reuse)
            x = activate(x)

            conv_name_3 = 'conv' + str(stage) + '_' + str(
                block) + '_1x1_increase'
            bn_name_3 = 'bn' + str(stage) + '_' + str(block) + '_1x1_increase'
            x = tf.layers.conv2d(x,
                                 filters3, (kernel_size, kernel_size),
                                 use_bias=False,
                                 padding='SAME',
                                 kernel_initializer=kernel_initializer,
                                 name=conv_name_3,
                                 reuse=reuse)
            x = tf.layers.batch_normalization(x,
                                              training=is_training,
                                              name=bn_name_3,
                                              reuse=reuse)

            x = tf.add(input_tensor, x)
            if block != '4b':
                x = activate(x)
            return x

        def conv_block_2d(input_tensor,
                          kernel_size,
                          filters,
                          stage,
                          block,
                          is_training,
                          reuse,
                          strides=(2, 2),
                          kernel_initializer=tf.contrib.layers.
                          variance_scaling_initializer()):
            filters1, filters2, filters3 = filters

            conv_name_2 = 'conv' + str(stage) + '_' + str(block) + '_3x3'
            bn_name_2 = 'bn' + str(stage) + '_' + str(block) + '_3x3'
            x = tf.layers.conv2d(input_tensor,
                                 filters2, (kernel_size, kernel_size),
                                 use_bias=False,
                                 strides=strides,
                                 padding='SAME',
                                 kernel_initializer=kernel_initializer,
                                 name=conv_name_2,
                                 reuse=reuse)
            x = tf.layers.batch_normalization(x,
                                              training=is_training,
                                              name=bn_name_2,
                                              reuse=reuse)
            x = tf.nn.relu(x)

            conv_name_3 = 'conv' + str(stage) + '_' + str(
                block) + '_1x1_increase'
            bn_name_3 = 'bn' + str(stage) + '_' + str(block) + '_1x1_increase'
            x = tf.layers.conv2d(x,
                                 filters3, (kernel_size, kernel_size),
                                 use_bias=False,
                                 padding='SAME',
                                 kernel_initializer=kernel_initializer,
                                 name=conv_name_3,
                                 reuse=reuse)
            x = tf.layers.batch_normalization(x,
                                              training=is_training,
                                              name=bn_name_3,
                                              reuse=reuse)

            conv_name_4 = 'conv' + str(stage) + '_' + str(
                block) + '_1x1_shortcut'
            bn_name_4 = 'bn' + str(stage) + '_' + str(block) + '_1x1_shortcut'
            shortcut = tf.layers.conv2d(input_tensor,
                                        filters3, (kernel_size, kernel_size),
                                        use_bias=False,
                                        strides=strides,
                                        padding='SAME',
                                        kernel_initializer=kernel_initializer,
                                        name=conv_name_4,
                                        reuse=reuse)
            shortcut = tf.layers.batch_normalization(shortcut,
                                                     training=is_training,
                                                     name=bn_name_4,
                                                     reuse=reuse)

            x = tf.add(shortcut, x)
            x = tf.nn.relu(x)
            return x

        def resnet18_cifar(input_tensor,
                           is_training=True,
                           pooling_and_fc=True,
                           reuse=False,
                           kernel_initializer=tf.contrib.layers.
                           variance_scaling_initializer()):
            with remap_variables(new_get_variable):
                x = tf.layers.conv2d(input_tensor,
                                     64, (3, 3),
                                     strides=(1, 1),
                                     kernel_initializer=kernel_initializer,
                                     use_bias=False,
                                     padding='SAME',
                                     name='conv1_1/3x3_s1',
                                     reuse=reuse)
                x = tf.layers.batch_normalization(x,
                                                  training=is_training,
                                                  name='bn1_1/3x3_s1',
                                                  reuse=reuse)
                x = tf.nn.relu(x)

                x1 = identity_block2d(x,
                                      3, [48, 64, 64],
                                      stage=2,
                                      block='1b',
                                      is_training=is_training,
                                      reuse=reuse,
                                      kernel_initializer=kernel_initializer)
                x1 = identity_block2d(x1,
                                      3, [48, 64, 64],
                                      stage=3,
                                      block='1c',
                                      is_training=is_training,
                                      reuse=reuse,
                                      kernel_initializer=kernel_initializer)

                x2 = conv_block_2d(x1,
                                   3, [96, 128, 128],
                                   stage=3,
                                   block='2a',
                                   strides=(2, 2),
                                   is_training=is_training,
                                   reuse=reuse,
                                   kernel_initializer=kernel_initializer)
                x2 = activate(x2)
                x2 = identity_block2d(x2,
                                      3, [96, 128, 128],
                                      stage=3,
                                      block='2b',
                                      is_training=is_training,
                                      reuse=reuse,
                                      kernel_initializer=kernel_initializer)

                x3 = conv_block_2d(x2,
                                   3, [128, 256, 256],
                                   stage=4,
                                   block='3a',
                                   strides=(2, 2),
                                   is_training=is_training,
                                   reuse=reuse,
                                   kernel_initializer=kernel_initializer)
                x3 = activate(x3)
                x3 = identity_block2d(x3,
                                      3, [128, 256, 256],
                                      stage=4,
                                      block='3b',
                                      is_training=is_training,
                                      reuse=reuse,
                                      kernel_initializer=kernel_initializer)

                x4 = conv_block_2d(x3,
                                   3, [256, 512, 512],
                                   stage=5,
                                   block='4a',
                                   strides=(2, 2),
                                   is_training=is_training,
                                   reuse=reuse,
                                   kernel_initializer=kernel_initializer)
                x4 = activate(x4)
                x4 = identity_block2d(x4,
                                      3, [256, 512, 512],
                                      stage=5,
                                      block='4b',
                                      is_training=is_training,
                                      reuse=reuse,
                                      kernel_initializer=kernel_initializer)

                print('before gap: ', x4)
                x4 = tf.reduce_mean(x4, [1, 2])
                print('after gap: ', x4)
                # flatten = tf.contrib.layers.flatten(x4)
                prob = tf.layers.dense(
                    x4,
                    self.class_num,
                    reuse=reuse,
                    kernel_initializer=tf.contrib.layers.xavier_initializer())

                # tmp = tf.trainable_variables()
                # prob = tf.layers.batch_normalization(prob, training=is_training, name='fbn', reuse=reuse)
                print('prob', prob)

            return prob

        def resnet_group(name, l, block_func, features, count, stride):
            with tf.variable_scope(name):
                for i in range(0, count):
                    with tf.variable_scope('block{}'.format(i)):
                        l = block_func(l, features, stride if i == 0 else 1)
            return l

        def resnet_shortcut(l, n_out, stride, activation=tf.identity):
            # data_format = get_arg_scope()['Conv2D']['data_format']
            n_in = l.get_shape().as_list()[1 if self.data_format in
                                           ['NCHW', 'channels_first'] else 3]
            if n_in != n_out:  # change dimension when channel is not the same
                return activate(
                    Conv2D('convshortcut',
                           l,
                           n_out,
                           1,
                           strides=stride,
                           activation=activation))
            else:
                return l

        def get_bn(zero_init=False):
            """
            Zero init gamma is good for resnet. See https://arxiv.org/abs/1706.02677.
            """
            if zero_init:
                return lambda x, name=None: BatchNorm(
                    'bn', x, gamma_initializer=tf.zeros_initializer())
            else:
                return lambda x, name=None: BatchNorm('bn', x)

        def resnet_basicblock(l, ch_out, stride):
            shortcut = l
            l = Conv2D('conv1',
                       l,
                       ch_out,
                       3,
                       strides=stride,
                       activation=BNReLU)
            l = activate(l)
            l = Conv2D('conv2',
                       l,
                       ch_out,
                       3,
                       activation=get_bn(zero_init=True))
            l = activate(l)
            out = l + resnet_shortcut(
                shortcut, ch_out, stride, activation=get_bn(zero_init=False))
            return tf.nn.relu(out)

        def resnet18_imagenet(image):
            with remap_variables(new_get_variable), \
                 argscope(Conv2D, use_bias=False,
                          kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
                # Note that this pads the image by [2, 3] instead of [3, 2].
                # Similar things happen in later stride=2 layers as well.
                l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU)
                l = MaxPooling('pool0',
                               l,
                               pool_size=3,
                               strides=2,
                               padding='SAME')
                l = resnet_group('group0', l, resnet_basicblock, 64, 2, 1)
                l = activate(l)
                l = resnet_group('group1', l, resnet_basicblock, 128, 2, 2)
                l = activate(l)
                l = resnet_group('group2', l, resnet_basicblock, 256, 2, 2)
                l = activate(l)
                l = resnet_group('group3', l, resnet_basicblock, 512, 2, 2)
                l = GlobalAvgPooling('gap', l)
                logits = FullyConnected(
                    'linear',
                    l,
                    1000,
                    kernel_initializer=tf.random_normal_initializer(
                        stddev=0.01))

            # tmp = tf.trainable_variables()
            return logits

        def alexnet(image):
            with remap_variables(new_get_variable), \
                 argscope([Conv2D, BatchNorm, MaxPooling], data_format='channels_first'), \
                 argscope(BatchNorm, momentum=0.9, epsilon=1e-4), \
                 argscope(Conv2D, use_bias=False):
                logits = (
                    LinearWrap(image).Conv2D(
                        'conv0',
                        96,
                        12,
                        strides=4,
                        padding='VALID',
                        use_bias=True).apply(fg).Conv2D(
                            'conv1', 256, 5, padding='SAME',
                            split=2).apply(fg).BatchNorm('bn1').MaxPooling(
                                'pool1', 3, 2,
                                padding='SAME').apply(activate).Conv2D(
                                    'conv2', 384,
                                    3).apply(fg).BatchNorm('bn2').MaxPooling(
                                        'pool2', 3, 2,
                                        padding='SAME').apply(activate).Conv2D(
                                            'conv3', 384, 3, split=2).
                    apply(fg).BatchNorm('bn3').apply(activate).Conv2D(
                        'conv4', 256, 3,
                        split=2).apply(fg).BatchNorm('bn4').MaxPooling(
                            'pool4', 3, 2,
                            padding='VALID').apply(activate).FullyConnected(
                                'fc0',
                                4096).apply(fg).BatchNorm('bnfc0').apply(
                                    activate).FullyConnected('fc1',
                                                             4096,
                                                             use_bias=False).
                    apply(fg).BatchNorm('bnfc1').apply(nonlin).FullyConnected(
                        'fct', self.class_num, use_bias=True)())

            return logits

        logits = None
        if self.model_name == 'alexnet':
            logits = alexnet(image)
        elif self.model_name == 'resnet18':
            if dataset_name == 'cifar':
                logits = resnet18_cifar(image, reuse=tf.AUTO_REUSE)
            elif dataset_name == 'ImageNet':
                logits = resnet18_imagenet(image)

        add_param_summary(('.*/W', ['histogram', 'rms']))
        tf.nn.softmax(logits, name='output')  # for prediction

        return logits
Exemplo n.º 11
0
    def _build_graph(self, inputs):
        inp, label = inputs
        is_training = get_current_tower_context().is_training

        tw = get_tw(self.tw_thres)

        def ternarize_weight(v):
            name = v.op.name
            if not (name.endswith('W')):
                logger.info("Not ternarizing {}".format(name))
                return v
            elif not self.quant_ends and 'conv0' in name:
                logger.info("Not ternarizing {}".format(name))
                return v
            elif not self.quant_ends and 'last_linear' in name:
                logger.info("Not ternarizing {}".format(name))
                return v
            elif not self.quant_ends and (self.net_fn == fcn1_net or self.net_fn == fcn2_net) and 'linear0' in name:
                logger.info("Not ternarizing {}".format(name))
                return v
            else:
                logger.info("Ternarizing weight {}".format(name))
                return tw(v)

        def nonlin(x, name="activate"):
            if self.bita == 32:
                return BNReLUWithTrackedMults(x)
            else: assert False

        with remap_variables(ternarize_weight), \
                argscope([FullyConnectedWithTrackedMults], network_complexity=self.network_complexity), \
                argscope([Conv2DWithTrackedMults], network_complexity=self.network_complexity), \
                argscope([BNReLUWithTrackedMults], network_complexity=self.network_complexity), \
                argscope([BNWithTrackedMults], network_complexity=self.network_complexity), \
                argscope(BatchNorm, decay=0.9, epsilon=1e-4):
            l = self.net_fn(inp, nonlin, self.n_context)
            logits = FullyConnectedWithTrackedMults('last_linear', l, out_dim=self.n_spks, nl=tf.identity)

        prob = tf.nn.softmax(logits, name='output')

        # used for validation accuracy of utterance
        identity_guesses = flatten(tf.argmax(prob, axis=1))
        uniq_identities, _, count = tf.unique_with_counts(identity_guesses)
        idx_to_identity_with_most_votes = tf.argmax(count)
        chosen_identity = tf.gather(uniq_identities, idx_to_identity_with_most_votes)
        wrong = tf.expand_dims(tf.not_equal(chosen_identity, tf.cast(label[0], tf.int64)), axis=0, name='utt-wrong')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        add_moving_summary(cost)

        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1'))

        with tf.name_scope('original-weight-summaries'):
            add_param_summary(('.*/W', ['rms', 'histogram']))
            add_param_summary(('.*/b', ['rms', 'histogram']))

        with tf.name_scope('activation-summaries'):
            def fn(name):
                return (name.endswith('output') or name.endswith('output:0')) and "Inference" not in name and 'quantized' not in name
            tensors = get_tensors_from_graph(tf.get_default_graph(), fn) 
            logger.info("Adding activation tensors to summary: {}".format(tensors))
            for tensor in tensors:
                add_tensor_summary(tensor, ['rms', 'histogram'])

        wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True)
        wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost')
        add_moving_summary(wd_cost)
        self.cost = tf.add_n([cost, wd_cost], name='cost')

        tf.constant([self.network_complexity['mults']], name='TotalMults')
        tf.constant([self.network_complexity['weights']], name='TotalWeights')
        logger.info("Parameter count: {}".format(self.network_complexity))
Exemplo n.º 12
0
    def _build_graph(self, inputs):

        is_training = get_current_tower_context().is_training

        images, truemap_coded = inputs
        orig_imgs = images

        pen_map = truemap_coded[..., -1]
        if hasattr(self, 'type_classification') and self.type_classification:
            true = truemap_coded[..., 1]
        else:
            true = truemap_coded[..., 0]
        true = tf.cast(true, tf.int32)
        true = tf.identity(true, name='truemap')
        one = tf.one_hot(
            true,
            self.nr_types if self.type_classification else self.nr_classes,
            axis=-1)
        true = tf.expand_dims(true, axis=-1)

        def encoder_blk(name, feat_in, num_feats, has_down=False):
            with tf.variable_scope(name):
                feat = feat_in if not has_down else MaxPooling(
                    'pool1', feat_in, 2, strides=2, padding='same')
                feat = Conv2D('conv_1',
                              feat,
                              num_feats,
                              3,
                              padding='valid',
                              strides=1,
                              activation=tf.nn.relu)
                feat = Conv2D('conv_2',
                              feat,
                              num_feats,
                              3,
                              padding='valid',
                              strides=1,
                              activation=tf.nn.relu)
                return feat

        def decoder_blk(name, feat_in, num_feats, shorcut):
            with tf.variable_scope(name):
                in_ch = feat_in.get_shape().as_list()[1]
                feat = Conv2DTranspose('us',
                                       feat_in,
                                       in_ch,
                                       2,
                                       strides=(2, 2),
                                       padding='same',
                                       activation=tf.identity)
                feat = tf.concat([feat, shorcut], axis=1)
                feat = Conv2D('conv_1',
                              feat,
                              num_feats,
                              3,
                              padding='valid',
                              strides=1,
                              activation=tf.nn.relu)
                feat = Conv2D('conv_2',
                              feat,
                              num_feats,
                              3,
                              padding='valid',
                              strides=1,
                              activation=tf.nn.relu)
                return feat

        #### Xavier initializer
        with argscope([Conv2D, Conv2DTranspose], activation=tf.identity, use_bias=True,
                      kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()), \
                argscope([Conv2D, MaxPooling, Conv2DTranspose], data_format=self.data_format):

            i = tf.transpose(images, [0, 3, 1, 2])
            i = i if not self.input_norm else i / 255.0

            d1 = encoder_blk('d1', i, 64, has_down=False)
            d2 = encoder_blk('d2', d1, 128, has_down=True)
            d3 = encoder_blk('d3', d2, 256, has_down=True)
            d4 = encoder_blk('d4', d3, 512, has_down=True)
            d4 = tf.layers.dropout(d4, rate=0.5, seed=5, training=is_training)
            d5 = encoder_blk('d5', d4, 1024, has_down=True)
            d5 = tf.layers.dropout(d5, rate=0.5, seed=5, training=is_training)

            d1 = crop_op(d1, (176, 176))
            d2 = crop_op(d2, (80, 80))
            d3 = crop_op(d3, (32, 32))
            d4 = crop_op(d4, (8, 8))

            feat = decoder_blk('u4', d5, 512, d4)
            feat = decoder_blk('u3', feat, 256, d3)
            feat = decoder_blk('u2', feat, 128, d2)
            feat = decoder_blk('u1', feat, 64, d1)

            logi = Conv2D(
                'conv_out',
                feat,
                self.nr_types if self.type_classification else self.nr_classes,
                1,
                use_bias=True,
                activation=tf.identity)
            logi = tf.transpose(logi, [0, 2, 3, 1])
            soft = tf.nn.softmax(logi, axis=-1)

            if self.type_classification:
                prob_np = tf.reduce_sum(soft[..., 1:], axis=-1, keepdims=True)
                prob_np = tf.identity(prob_np, name='predmap-prob-np')
                predmap_coded = tf.concat([soft, prob_np], axis=-1)
            else:
                prob_np = tf.identity(soft[..., 1], name='predmap-prob')
                prob_np = tf.expand_dims(prob_np, axis=-1)
                predmap_coded = prob_np

            # * channel ordering: type-map, segmentation map
            # encoded so that inference can extract all output at once
            predmap_coded = tf.identity(predmap_coded, name='predmap-coded')

        ####
        if is_training:
            ######## LOSS
            ### classification loss
            loss_bce = categorical_crossentropy(soft, one)
            loss_bce = tf.reduce_mean(loss_bce * pen_map, name='loss-bce')
            add_moving_summary(loss_bce)

            wd_loss = regularize_cost('.*/W',
                                      l2_regularizer(1.0e-5),
                                      name='l2_wd_loss')
            add_moving_summary(wd_loss)
            self.cost = loss_bce + wd_loss

            add_param_summary(('.*/W', ['histogram']))  # monitor W

            #### logging visual sthg
            orig_imgs = tf.cast(orig_imgs, tf.uint8)
            orig_imgs = crop_op(orig_imgs, (184, 184), "channels_last")
            tf.summary.image('input', orig_imgs, max_outputs=1)

            pred = colorize(prob_np[..., 0], cmap='jet')
            true = colorize(true[..., 0], cmap='jet')
            pen_map = colorize(pen_map, cmap='jet')

            viz = tf.concat([orig_imgs, pred, true, pen_map], 2)

            tf.summary.image('output', viz, max_outputs=1)

        return
Exemplo n.º 13
0
Arquivo: vae.py Projeto: qq456cvb/AAE
    def build_graph(self, image, label):
        """This function should build the model which takes the input variables
        and return cost at the end"""

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.layers.flatten(image)
        # image = image * 2 - 1   # center the pixels values at zero
        # The context manager `argscope` sets the default option for all the layers under
        # this context. Here we use 32 channel convolution with shape 3x3
        with tf.variable_scope('encoder'):
            x = FullyConnected('fc1', image, 1000, activation=tf.nn.relu)
            x = FullyConnected('fc2', x, 1000, activation=tf.nn.relu)
            mu = tf.identity(FullyConnected('fc_mu', x, 2, activation=None),
                             'mu')
            logvar = FullyConnected('fc_var', x, 2, activation=None)

        eps = tf.random_normal((tf.shape(x)[0], 2))
        z = tf.identity(eps * tf.exp(0.5 * logvar) + mu, name='z')
        with tf.variable_scope('decoder'):
            x = FullyConnected('fc1', z, 1000, activation=tf.nn.relu)
            x = FullyConnected('fc2', x, 1000, activation=tf.nn.relu)
            rec = tf.identity(
                FullyConnected('fc_rec',
                               x,
                               IMAGE_SIZE * IMAGE_SIZE,
                               activation=tf.nn.sigmoid), 'rec')

        kl_loss = -tf.reduce_sum(1 + logvar - mu * mu - tf.exp(logvar), -1)
        kl_loss = tf.reduce_mean(kl_loss, name='kl_loss')

        rec_loss = tf.reduce_mean(tf.reduce_sum(tf.square(rec - image), -1),
                                  name='rec_loss')
        total_cost = rec_loss + kl_loss
        # a vector of length B with loss of each sample
        # cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        # cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss
        #
        # correct = tf.cast(tf.nn.in_top_k(predictions=logits, targets=label, k=1), tf.float32, name='correct')
        # accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
        # 1. written to tensosrboard
        # 2. written to stat.json
        # 3. printed after each epoch
        # train_error = tf.reduce_mean(1 - correct, name='train_error')
        # summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        # If you don't like regex, you can certainly define the cost in any other methods.
        # wd_cost = tf.multiply(1e-5,
        #                       regularize_cost('fc.*/W', tf.nn.l2_loss),
        #                       name='regularize_loss')
        # total_cost = tf.add_n([wd_cost, cost], name='total_cost')
        # summary.add_moving_summary(cost, wd_cost, total_cost)
        summary.add_moving_summary(rec_loss, kl_loss)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
Exemplo n.º 14
0
    def build_graph(self, *inputs):
        comb_state, self.action, reward, isOver, human = inputs
        comb_state = tf.cast(comb_state, tf.float32)
        state = tf.slice(comb_state, [0, 0, 0, 0, 0],
                         [-1, -1, -1, -1, self.channel],
                         name='state')
        # Standard DQN loss
        self.predict_value = self.get_DQN_prediction(state)
        if not get_current_tower_context().is_training:
            return

        reward = tf.clip_by_value(reward, -1, 1)
        next_state = tf.slice(comb_state, [0, 0, 0, 0, 1],
                              [-1, -1, -1, -1, self.channel],
                              name='next_state')
        self.action_onehot = tf.one_hot(self.action, self.num_actions, 1.0,
                                        0.0)

        pred_action_value = tf.reduce_sum(self.predict_value *
                                          self.action_onehot, 1)  # N,
        max_pred_reward = tf.reduce_mean(tf.reduce_max(self.predict_value, 1),
                                         name='predict_reward')
        summary.add_moving_summary(max_pred_reward)

        with tf.variable_scope('target'):
            targetQ_predict_value = self.get_DQN_prediction(next_state)  # NxA

        if 'Double' not in self.method:
            # DQN or Dueling
            best_v = tf.reduce_max(targetQ_predict_value, 1)  # N,
        else:
            # Double-DQN or DuelingDouble
            next_predict_value = self.get_DQN_prediction(next_state)
            self.greedy_choice = tf.argmax(next_predict_value, 1)  # N,
            predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions,
                                        1.0, 0.0)
            best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

        target = reward + (1.0 - tf.cast(
            isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)
        cost = tf.losses.huber_loss(target,
                                    pred_action_value,
                                    reduction=tf.losses.Reduction.MEAN)

        ###############################################################################
        # HITL UPDATE: Margin classification loss
        # This can only be calculated on the Human generated samples.
        # Q(s,A_E) (The Q value of the action that was take by the human in that state)
        action_value_1 = tf.multiply(self.predict_value,
                                     self.action_onehot,
                                     name='action_value_1')
        tar = tf.reduce_sum(action_value_1, 1)

        # l(a_E,a) here penalise every action plus 0.8 except the action that the
        # human took which gets 0
        mar = tf.one_hot(self.action, self.num_actions, 0.0, 0.8)
        # max[Q(s,a) + l(a_E,a)]
        # Q(s,a) = self.predict_value
        mar_1 = tf.add(mar, self.predict_value)
        margin = tf.reduce_max(mar_1, 1)

        margin_loss = tf.subtract(margin, tar)

        # this has been applied to all the transitions now need to set the
        # margin classification loss to 0 for the transition which were
        # not generated by a Human
        margin_loss = tf.where(human, margin_loss,
                               tf.zeros_like(margin_loss, dtype=tf.float32))
        margin_loss = tf.reduce_mean(margin_loss)

        cost = tf.add(cost, margin_loss)

        ###############################################################################
        summary.add_param_summary(
            ('conv.*/W', ['histogram', 'rms']),
            ('fc.*/W', ['histogram', 'rms']))  # monitor all W
        summary.add_moving_summary(cost)
        logger.info("Cost: {}".format(cost))
        return cost
Exemplo n.º 15
0
    def build_graph(self, imgs, cams, gt_depth):
        # preprocess
        imgs, gt_depth, ref_img = self._preprocess(imgs, gt_depth)

        with argscope([tf.layers.conv3d, tf.layers.conv3d_transpose, mvsnet_gn,
                       Conv2D, Conv2DTranspose, MaxPooling, AvgPooling, BatchNorm],
                      data_format=self.data_format),\
             argscope(tf.layers.batch_normalization, axis=-1):
            # feature extraction
            # shape: b, view_num, h/4, w/4, c
            feature_maps = feature_extraction_net(imgs, self.branch_function)

            # get depth_start and depth_interval batch-wise
            depth_start, depth_interval, depth_end = get_depth_meta(
                cams, depth_num=self.depth_num)

            # warping layer
            # shape of cost_volume: b, depth_num, h/4, w/4, c
            cost_volume = warping_layer('warping', feature_maps, cams,
                                        depth_start, depth_interval,
                                        self.depth_num)
            # cost_volume = tf.get_variable('fake_cost_volume', (1, 32, 192, 128, 160))

            if self.regularize_type == '3DCNN':
                # cost volume regularization
                # regularized_cost_volume: b, d, h/4, w/4
                regularized_cost_volume = cost_volume_regularization(
                    cost_volume, self.bn_training, self.bn_trainable)
                # regularized_cost_volume = simple_cost_volume_regularization(cost_volume, self.bn_training, self.bn_trainable)
                # shape of coarse_depth: b, 1, h/4, w/4
                # shape of prob_map: b, h/4, w/4, 1
                # TODO: no need to pass batch_size as param, actually, it is needed, because it is needed in the graph buiding
                coarse_depth, prob_map = soft_argmin(
                    'soft_argmin', regularized_cost_volume, depth_start,
                    depth_end, self.depth_num, depth_interval, self.batch_size)

                # shape of refine_depth: b, 1, h/4, w/4
                if self.is_refine:
                    refine_depth = depth_refinement(coarse_depth, ref_img,
                                                    depth_start, depth_end)
                    loss_coarse, *_ = mvsnet_regression_loss(
                        gt_depth, coarse_depth, depth_interval, 'coarse_loss')
                    loss_refine, less_one_accuracy, less_three_accuracy = mvsnet_regression_loss(
                        gt_depth, refine_depth, depth_interval, 'refine_loss')
                else:
                    refine_depth = coarse_depth
                    # loss_coarse, *_ = mvsnet_regression_loss(gt_depth, coarse_depth, depth_interval, 'coarse_loss')
                    loss_refine, less_one_accuracy, less_three_accuracy = mvsnet_regression_loss(
                        gt_depth, refine_depth, depth_interval, 'refine_loss')
                    loss_coarse = tf.identity(loss_refine, name='loss_coarse')

                # FIXME: it is weried because I never use refine part
                coarse_depth = tf.identity(coarse_depth, 'coarse_depth')
                refine_depth = tf.identity(refine_depth, 'refine_depth')
                prob_map = tf.identity(prob_map, 'prob_map')
                loss = tf.add(loss_refine / 2,
                              loss_coarse * self.lambda_ / 2,
                              name='loss')
                less_one_accuracy = tf.identity(less_one_accuracy,
                                                name='less_one_accuracy')
                less_three_accuracy = tf.identity(less_three_accuracy,
                                                  name='less_three_accuracy')

            else:
                prob_volume = gru_regularization(cost_volume, self.bn_training,
                                                 self.bn_trainable)
                loss, mae, less_one_accuracy, less_three_accuracy, coarse_depth = \
                    mvsnet_classification_loss(
                        prob_volume, gt_depth, self.depth_num, depth_start, depth_interval)
                coarse_depth = tf.identity(coarse_depth, 'coarse_depth')
                refine_depth = tf.identity(coarse_depth, 'refine_depth')
                # prob_map = get_propability_map(prob_volume, coarse_depth, depth_start, depth_interval)

            with tf.variable_scope('summaries'):
                with tf.device('/cpu:0'):
                    if self.regularize_type == '3DCNN':
                        add_moving_summary(loss, loss_coarse, loss_refine,
                                           less_one_accuracy,
                                           less_three_accuracy)
                    else:
                        add_moving_summary(loss, less_one_accuracy,
                                           less_three_accuracy)

                if self.regularize_type == '3DCNN':
                    add_image_summary(prob_map, name='prob_map')
                add_image_summary(coarse_depth, name='coarse_depth')
                add_image_summary(refine_depth, name='refine_depth')
                add_image_summary(ref_img, name='rgb')
                add_image_summary(gt_depth, name='gt_depth')

            if self.debug_param_summary:
                with tf.device('/gpu:0'):
                    add_param_summary(['.*/W', ['histogram', 'rms']],
                                      ['.*/gamma', ['histogram', 'mean']],
                                      ['.*/beta', ['histogram', 'mean']])
                    # all_vars = [var for var in tf.trainable_variables() if "gamma" in var.name or 'beta' in var.name]
                    # grad_vars = tf.gradients(loss, all_vars)
                    # for var, grad in zip(all_vars, grad_vars):
                    #     add_tensor_summary(grad, ['histogram', 'rms'], name=var.name + '-grad')
                    # all_vars = [var for var in tf.trainable_variables()]
                    # grad_vars = tf.gradients(loss, all_vars)
                    # for var, grad in zip(all_vars, grad_vars):
                    #     add_tensor_summary(grad, ['histogram'], name=var.name + '-grad')

        return loss
Exemplo n.º 16
0
    def _build_graph(self, inputs):

        is_training = get_current_tower_context().is_training

        images, truemap_coded = inputs
        orig_imgs = images
        true = truemap_coded[..., 0]
        true = tf.cast(true, tf.int32)
        true = tf.identity(true, name="truemap")
        one_hot = tf.one_hot(true, 2, axis=-1)
        true = tf.expand_dims(true, axis=-1)

        ####
        with argscope(
            Conv2D,
            activation=tf.identity,
            use_bias=False,  # K.he initializer
            W_init=tf.variance_scaling_initializer(scale=2.0, mode="fan_out"),
        ), argscope([Conv2D], data_format=self.data_format):

            i = images if not self.input_norm else images / 255.0

            ####
            feat = net(
                "net",
                i,
                self.basis_filter_list,
                self.rot_matrix_list,
                self.nr_orients,
                self.filter_type,
                is_training,
            )

            #### Prediction
            o_logi = Conv2D("output", feat, 2, 1, use_bias=True, nl=tf.identity)
            soft = tf.nn.softmax(o_logi, axis=-1)

            prob = tf.identity(soft, name="predmap-prob")

            # encoded so that inference can extract all output at once
            predmap_coded = tf.concat(prob, axis=-1, name="predmap-coded")

        ####
        if get_current_tower_context().is_training:
            # ---- LOSS ----#
            loss = 0
            for term, weight in self.loss_term.items():
                if term == "bce":
                    term_loss = categorical_crossentropy(soft, one_hot)
                    term_loss = tf.reduce_mean(term_loss, name="loss-bce")
                else:
                    assert False, "Not support loss term: %s" % term
                add_moving_summary(term_loss)
                loss += term_loss * weight

            ### combine the loss into single cost function
            wd_loss = regularize_cost(".*/W", l2_regularizer(1.0e-7), name="l2_wd_loss")
            add_moving_summary(wd_loss)
            self.cost = tf.identity(loss + wd_loss, name="overall-loss")
            add_moving_summary(self.cost)
            ####

            add_param_summary((".*/W", ["histogram"]))  # monitor W

            ### logging visual sthg
            orig_imgs = tf.cast(orig_imgs, tf.uint8)
            tf.summary.image("input", orig_imgs, max_outputs=1)

        return
Exemplo n.º 17
0
    def _build_graph(self, inputs):

        is_training = get_current_tower_context().is_training

        images, truemap_coded = inputs
        orig_imgs = images

        true = truemap_coded[..., :3]
        true = tf.cast(true, tf.int32)
        true = tf.identity(true, name='truemap')
        one_hot = tf.cast(true, tf.float32)

        ####
        with argscope(Conv2D, activation=tf.identity, use_bias=False, # K.he initializer
                      W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \
                argscope([Conv2D], data_format=self.data_format):

            i = images if not self.input_norm else images / 255.0

            ####
            d = encoder('encoder', i, self.basis_filter_list,
                        self.rot_matrix_list, self.nr_orients,
                        self.filter_type, is_training)

            ####
            feat = decoder('decoder', d, self.basis_filter_list,
                           self.rot_matrix_list, self.nr_orients,
                           self.filter_type, is_training)

            feat1 = Conv2D('feat', feat, 96, 1, use_bias=True, nl=BNReLU)
            o_logi = Conv2D('output',
                            feat,
                            3,
                            1,
                            use_bias=True,
                            nl=tf.identity)
            soft = tf.nn.softmax(o_logi, axis=-1)

            prob = tf.identity(soft[..., :2], name='predmap-prob')

            # encoded so that inference can extract all output at once
            predmap_coded = tf.concat(prob, axis=-1, name='predmap-coded')

        ####
        if get_current_tower_context().is_training:
            #---- LOSS ----#
            loss = 0
            for term, weight in self.loss_term.items():
                if term == 'bce':
                    term_loss = categorical_crossentropy(soft, one_hot)
                    term_loss = tf.reduce_mean(term_loss, name='loss-bce')
                elif 'dice' in self.loss_term:
                    # branch 1
                    term_loss = dice_loss(soft[...,0], one_hot[...,0]) \
                              + dice_loss(soft[...,1], one_hot[...,1])
                    term_loss = tf.identity(term_loss, name='loss-dice')
                else:
                    assert False, 'Not support loss term: %s' % term
                add_moving_summary(term_loss)
                loss += term_loss

            ### combine the loss into single cost function
            wd_loss = regularize_cost('.*/W',
                                      l2_regularizer(1.0e-7),
                                      name='l2_wd_loss')
            add_moving_summary(wd_loss)
            self.cost = tf.identity(loss + wd_loss, name='overall-loss')
            add_moving_summary(self.cost)
            ####

            add_param_summary(('.*/W', ['histogram']))  # monitor W

            ### logging visual sthg
            orig_imgs = tf.cast(orig_imgs, tf.uint8)
            tf.summary.image('input', orig_imgs, max_outputs=1)

            pred_blb = colorize(prob[..., 0], cmap='jet')
            true_blb = colorize(true[..., 0], cmap='jet')

            pred_cnt = colorize(prob[..., 1], cmap='jet')
            true_cnt = colorize(true[..., 1], cmap='jet')

            viz = tf.concat(
                [orig_imgs, pred_blb, pred_cnt, true_blb, true_cnt], 2)

            viz = tf.concat([viz[0], viz[-1]], axis=0)
            viz = tf.expand_dims(viz, axis=0)
            tf.summary.image('output', viz, max_outputs=1)

        return
Exemplo n.º 18
0
    def build_graph(
        self,
        cam1: tf.Tensor,
        cam2: tf.Tensor,
        mask: tf.Tensor,
        normal: tf.Tensor,
        depth: tf.Tensor,
        sgs: tf.Tensor,
        diffuse_gt: tf.Tensor,
        specular_gt: tf.Tensor,
        roughness_gt: tf.Tensor,
    ):
        with tf.variable_scope("prepare"):
            mask = mask[:, :, :, 0:1]
            repeat = [1 for _ in range(len(mask.shape))]
            repeat[-1] = 3

            mask3 = tf.tile(mask, repeat)

            two_side_by_side(cam1, cam2, "input", 10)

            in1 = cam1
            in2 = cam2
            m3 = mask3

            two_side_by_side(normal, tf.tile(depth, repeat), "geom", 10)

            batch_size = tf.shape(cam1)[0]

        diffuse, specular, roughness = self.network_architecture(
            in1, in2, m3, normal, depth)

        rendered = self.render(diffuse, specular, roughness, normal, depth,
                               sgs, mask3)

        ## Rerender
        with tf.variable_scope("viz"):
            rendered_reinhard = rendered / (1.0 + rendered)
            loss_img_reinhard = cam1 / (1.0 + cam1)
            two_side_by_side(
                tf.clip_by_value(tf.pow(loss_img_reinhard, 1.0 / 2.2), 0.0,
                                 1.0),
                tf.clip_by_value(tf.pow(rendered_reinhard, 1.0 / 2.2), 0.0,
                                 1.0),
                "rendered",
                10,
            )

        with tf.variable_scope("loss"):
            with tf.variable_scope("rendering"):
                rerendered_log = tf.clip_by_value(
                    tf.log(1.0 + tf.nn.relu(rendered)), 0.0, 13.0)
                rerendered_log = tf.check_numerics(
                    rerendered_log, "Rerendered log image contains NaN or Inf")
                loss_log = tf.clip_by_value(tf.log(1.0 + tf.nn.relu(cam1)),
                                            0.0, 13.0)
                loss_log = tf.check_numerics(
                    loss_log, "The Loss log image contains NaN or Inf")

                l1_err = l1_loss(loss_log, rerendered_log)
                rerendered_loss = tf.reduce_mean(masked_loss(l1_err, mask3),
                                                 name="rendering_loss")
                add_moving_summary(rerendered_loss)
                tf.losses.add_loss(rerendered_loss, tf.GraphKeys.LOSSES)

            with tf.variable_scope("diffuse"):
                diffuse_loss = tf.reduce_mean(
                    masked_loss(l1_loss(diffuse_gt, diffuse), mask3),
                    name="diffuse_loss",
                )
                add_moving_summary(diffuse_loss)
                tf.losses.add_loss(diffuse_loss, tf.GraphKeys.LOSSES)
                two_side_by_side(diffuse_gt, diffuse, "diffuse", 10)

            with tf.variable_scope("specular"):
                specular_loss = tf.reduce_mean(
                    masked_loss(l1_loss(specular_gt, specular), mask3),
                    name="specular_loss",
                )
                add_moving_summary(specular_loss)
                tf.losses.add_loss(specular_loss, tf.GraphKeys.LOSSES)
                two_side_by_side(specular_gt, specular, "specular", 10)

            with tf.variable_scope("roughness"):
                roughness_loss = tf.reduce_mean(
                    masked_loss(l1_loss(roughness_gt, roughness), mask),
                    name="roughness_loss",
                )
                add_moving_summary(roughness_loss)
                tf.losses.add_loss(roughness_loss, tf.GraphKeys.LOSSES)
                two_side_by_side(roughness_gt, roughness, "roughness", 10)

        self.cost = tf.losses.get_total_loss(name="total_costs")

        add_moving_summary(self.cost)
        if self.training:
            add_param_summary((".*/W", ["histogram"]))  # monitor W

        return self.cost
Exemplo n.º 19
0
    def _build_graph(self, inputs):
        image, label = inputs
        image = image / 255.0

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        # monkey-patch tf.get_variable to apply fw
        def new_get_variable(v):
            name = v.op.name
            # don't binarize first and last layer
            if not name.endswith('W') or 'conv0' in name or 'fct' in name:
                return v
            else:
                logger.info("Binarizing weight {}".format(v.op.name))
                return fw(v)

        def nonlin(x):
            if BITA == 32:
                return tf.nn.relu(x)    # still use relu for 32bit cases
            return tf.clip_by_value(x, 0.0, 1.0)

        def activate(x):
            return fa(nonlin(x))

        with remap_variables(new_get_variable), \
                argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
                argscope([Conv2D, FullyConnected], use_bias=False, nl=tf.identity):
            logits = (LinearWrap(image)
                      .Conv2D('conv0', 96, 12, stride=4, padding='VALID')
                      .apply(activate)
                      .Conv2D('conv1', 256, 5, padding='SAME', split=2)
                      .apply(fg)
                      .BatchNorm('bn1')
                      .MaxPooling('pool1', 3, 2, padding='SAME')
                      .apply(activate)

                      .Conv2D('conv2', 384, 3)
                      .apply(fg)
                      .BatchNorm('bn2')
                      .MaxPooling('pool2', 3, 2, padding='SAME')
                      .apply(activate)

                      .Conv2D('conv3', 384, 3, split=2)
                      .apply(fg)
                      .BatchNorm('bn3')
                      .apply(activate)

                      .Conv2D('conv4', 256, 3, split=2)
                      .apply(fg)
                      .BatchNorm('bn4')
                      .MaxPooling('pool4', 3, 2, padding='VALID')
                      .apply(activate)

                      .FullyConnected('fc0', 4096)
                      .apply(fg)
                      .BatchNorm('bnfc0')
                      .apply(activate)

                      .FullyConnected('fc1', 4096)
                      .apply(fg)
                      .BatchNorm('bnfc1')
                      .apply(nonlin)
                      .FullyConnected('fct', 1000, use_bias=True)())

        tf.nn.softmax(logits, name='output')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1'))
        wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5'))

        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(5e-6), name='regularize_cost')

        add_param_summary(('.*/W', ['histogram', 'rms']))
        self.cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(cost, wd_cost, self.cost)
Exemplo n.º 20
0
    def build_graph(self, points, idx, label, *split_axis):
        """This function should build the model which takes the input variables
        and return cost at the end"""

        # add all features in the leaf node
        batch_idx = tf.expand_dims(
            tf.tile(
                tf.reshape(tf.range(tf.shape(points)[0]), (-1, 1, 1)),
                [1, tf.shape(idx)[1], tf.shape(idx)[2]]), -1)
        points = tf.gather_nd(
            points, tf.concat([batch_idx, tf.expand_dims(idx, -1)], -1))
        points = tf.transpose(tf.reduce_mean(points, -2),
                              (0, 2, 1))  # B * N * 3
        x = tf.transpose(
            tf.nn.conv1d(points,
                         tf.get_variable('kernel_pre', [1, DIM, 32]),
                         1,
                         'SAME',
                         data_format='NCHW'), (0, 2, 1))
        x = tf.nn.bias_add(x, tf.get_variable('bias_pre', (32, )))

        features = [
            32, 32, 64, 64, 128, 128, 256, 256, 512, 512, 128, N_CLASSES
        ]
        Ws = [
            tf.get_variable('kernel%d' % i,
                            shape=(DIM, 2 * features[i], features[i + 1]))
            for i in range(DEPTH)
        ]
        Bs = [
            tf.get_variable('bias%d' % i, shape=(DIM, features[i + 1]))
            for i in range(DEPTH)
        ]
        for i in range(DEPTH):
            x = tf.expand_dims(
                tf.reshape(x, [
                    tf.shape(x)[0],
                    tf.div(tf.shape(x)[1], 2), 2 * features[i]
                ]), 2)  # B * N/2 * 1 * 2F
            w = tf.gather_nd(Ws[i],
                             tf.expand_dims(split_axis[i],
                                            -1))  # B * N/2 * 2F * F_next
            # x = tf.Print(x, [tf.shape(x), tf.shape(w)], summarize=100)
            b = tf.gather_nd(Bs[i], tf.expand_dims(split_axis[i],
                                                   -1))  # B * N/2 * F_next
            x = tf.squeeze(tf.matmul(x, w), -2) + b
            if i < DEPTH - 1:
                x = tf.nn.relu(x)

        logits = tf.squeeze(x, 1)

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(
            cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1),
                          tf.float32,
                          name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
        # 1. written to tensosrboard
        # 2. written to stat.json
        # 3. printed after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        # If you don't like regex, you can certainly define the cost in any other methods.
        wd_cost = tf.multiply(1e-3,
                              regularize_cost('kernel.*', tf.nn.l2_loss),
                              name='regularize_loss')
        total_cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('kernel.*', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
Exemplo n.º 21
0
    def build_graph(self, pc, pc_feature):
        pc_symmetry = tf.stack([-pc[..., 0], pc[..., 1], pc[..., 2]], -1)  # -x
        dist2sym = tf.reduce_sum((pc[:, :, None] - pc_symmetry[:, None])**2,
                                 -1)
        nearest_idx = tf.argmin(dist2sym, -1, output_type=tf.int32)

        # smoothnet encoder, only local features are used
        embedding = SmoothNet(pc_feature, self.cfg)
        with tf.variable_scope('encoder'):
            z = tf.sigmoid(embedding[:, :, -1], name='z')
            output_x = tf.nn.l2_normalize(embedding[:, :, :-1],
                                          axis=-1,
                                          name='feature')

        gp_loss = 0.
        loss_d = 0.
        loss_g = 0.
        if get_current_tower_context().is_training:
            beta_dist = tf.distributions.Beta(
                concentration1=self.cfg.beta.concentration1,
                concentration0=self.cfg.beta.concentration0)

            with tf.variable_scope('GAN'):
                real_z = beta_dist.sample(tf.shape(z))
                fake_val = self.discriminator(tf.stop_gradient(z))
                real_val = self.discriminator(real_z)
                loss_d = tf.reduce_mean(fake_val - real_val, name='loss_d')
                with varreplace.freeze_variables(stop_gradient=True):
                    loss_g = tf.reduce_mean(-self.discriminator(z),
                                            name='loss_g')

                z_interp = z + tf.random_uniform(
                    (tf.shape(fake_val)[0], 1)) * (real_z - z)
                gradient_f = tf.gradients(self.discriminator(z_interp),
                                          [z_interp])[0]
                gp_loss = tf.reduce_mean(tf.maximum(
                    tf.norm(gradient_f, axis=-1) - 1, 0)**2,
                                         name='gp_loss')
        code = tf.concat([
            tf.reduce_max(tf.nn.relu(output_x) * z[..., None], 1),
            tf.reduce_max(tf.nn.relu(-output_x) * z[..., None], 1)
        ],
                         axis=-1,
                         name='code')
        code = FullyConnected('fc_global',
                              code,
                              self.cfg.topnet.code_nfts,
                              activation=None)

        # topnet decoder
        tarch = get_arch(self.cfg.topnet.nlevels, self.cfg.num_points)

        def create_level(level, input_channels, output_channels, inputs, bn):
            with tf.variable_scope('level_%d' % level, reuse=tf.AUTO_REUSE):
                features = mlp_conv(inputs, [
                    input_channels,
                    int(input_channels / 2),
                    int(input_channels / 4),
                    int(input_channels / 8),
                    output_channels * int(tarch[level])
                ],
                                    get_current_tower_context().is_training,
                                    bn)
                features = tf.reshape(
                    features, [tf.shape(features)[0], -1, output_channels])
            return features

        Nin = self.cfg.topnet.nfeat + self.cfg.topnet.code_nfts
        Nout = self.cfg.topnet.nfeat
        bn = True
        N0 = int(tarch[0])
        nlevels = len(tarch)
        with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE):
            level0 = mlp(code, [256, 64, self.cfg.topnet.nfeat * N0],
                         get_current_tower_context().is_training,
                         bn=True)
            level0 = tf.tanh(level0, name='tanh_0')
            level0 = tf.reshape(level0, [-1, N0, self.cfg.topnet.nfeat])
            outs = [
                level0,
            ]
            for i in range(1, nlevels):
                if i == nlevels - 1:
                    Nout = 3
                    bn = False
                inp = outs[-1]
                y = tf.expand_dims(code, 1)
                y = tf.tile(y, [1, tf.shape(inp)[1], 1])
                y = tf.concat([inp, y], 2)
                outs.append(
                    tf.tanh(create_level(i, Nin, Nout, y, bn),
                            name='tanh_%d' % (i)))

        reconstruction = tf.reshape(outs[-1], [-1, self.cfg.num_points, 3],
                                    name='recon_pc')
        loss_recon = chamfer(reconstruction, pc)

        loss_recon = tf.identity(self.cfg.recon_factor *
                                 tf.reduce_mean(loss_recon),
                                 name='recon_loss')

        batch_size = tf.shape(output_x)[0]
        batch_idx = tf.tile(
            tf.range(batch_size)[:, None], [1, tf.shape(nearest_idx)[1]])
        feature_sym = tf.gather_nd(embedding,
                                   tf.stack([batch_idx, nearest_idx], -1))

        loss_sym = tf.identity(
            self.cfg.symmetry_factor *
            tf.reduce_mean(tf.reduce_sum(tf.abs(feature_sym - embedding), -1)),
            'symmetry_loss')

        wd_cost = tf.multiply(1e-4,
                              regularize_cost('.*(_W|kernel)', tf.nn.l2_loss),
                              name='regularize_loss')
        loss_gan = loss_d + loss_g + gp_loss
        total_cost = tf.add_n([loss_recon, wd_cost, loss_gan, loss_sym],
                              name='total_cost')
        summary.add_moving_summary(loss_recon, loss_sym)
        summary.add_param_summary(['.*(_W|kernel)', ['histogram', 'rms']])
        return total_cost
Exemplo n.º 22
0
    def _build_graph(self, inputs):
        ####
        def down_conv_block(name, l, channel, nr_blks, stride=1):
            with tf.variable_scope(name):
                if stride != 1:
                    assert stride == 2, 'U-Net supports stride 2 down-sample only'
                    l = MaxPooling('max_pool', l, 2, strides=2)
                for idx in range(0, nr_blks):
                    l = Conv2D('conv_%d' % idx,
                               l,
                               channel,
                               3,
                               padding='valid',
                               strides=1,
                               activation=BNReLU)
            return l

        ####
        def up_conv_block(name, l, shorcut, channel, nr_blks, stride=2):
            with tf.variable_scope(name):
                if stride != 1:
                    up_channel = l.get_shape().as_list()[1]  # NCHW
                    assert stride == 2, 'U-Net supports stride 2 up-sample only'
                    l = Conv2DTranspose('deconv', l, up_channel, 2, strides=2)
                    l = tf.concat([l, shorcut], axis=1)
                for idx in range(0, nr_blks):
                    l = Conv2D('conv_%d' % idx,
                               l,
                               channel,
                               3,
                               padding='valid',
                               strides=1,
                               activation=BNReLU)
            return l

        ####
        is_training = get_current_tower_context().is_training

        images, truemap_coded = inputs

        orig_imgs = images

        if self.type_classification:
            true_type = truemap_coded[..., 1]
            true_type = tf.cast(true_type, tf.int32)
            true_type = tf.identity(true_type, name='truemap-type')
            one_type = tf.one_hot(true_type, 5, axis=-1)
            true_type = tf.expand_dims(true_type, axis=-1)

        true_dst = truemap_coded[..., -1]
        true_dst = tf.expand_dims(true_dst, axis=-1)
        true_dst = tf.identity(true_dst, name='truemap-dst')

        #### Xavier initializer
        with argscope(Conv2D, activation=tf.identity, use_bias=True,
                      kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                      bias_initializer=tf.constant_initializer(0.1)), \
                argscope([Conv2D, Conv2DTranspose, MaxPooling, BatchNorm], data_format=self.data_format):

            i = tf.transpose(images / 255.0, [0, 3, 1, 2])

            ####
            with tf.variable_scope('encoder'):
                e0 = down_conv_block('e0', i, 32, nr_blks=2, stride=1)
                e1 = down_conv_block('e1', e0, 64, nr_blks=2, stride=2)
                e2 = down_conv_block('e2', e1, 128, nr_blks=2, stride=2)
                e3 = down_conv_block('e3', e2, 256, nr_blks=2, stride=2)
                e4 = down_conv_block('e4', e3, 512, nr_blks=2, stride=2)

                c0 = crop_op(e0, (176, 176))
                c1 = crop_op(e1, (80, 80))
                c2 = crop_op(e2, (32, 32))
                c3 = crop_op(e3, (8, 8))

            with tf.variable_scope('decoder'):
                d3 = up_conv_block('d3', e4, c3, 256, nr_blks=2, stride=2)
                d2 = up_conv_block('d2', d3, c2, 128, nr_blks=2, stride=2)
                d1 = up_conv_block('d1', d2, c1, 64, nr_blks=2, stride=2)
                d0 = up_conv_block('d0', d1, c0, 32, nr_blks=2, stride=2)

            ####
            logi_dst = Conv2D('conv_out_dst', d0, 1, 1, activation=tf.identity)
            logi_dst = tf.transpose(logi_dst, [0, 2, 3, 1])
            pred_dst = tf.identity(logi_dst, name='predmap-dst')

            if self.type_classification:
                logi_type = Conv2D('conv_out_type',
                                   d0,
                                   5,
                                   1,
                                   activation=tf.identity)
                logi_type = tf.transpose(logi_type, [0, 2, 3, 1])
                soft_type = tf.nn.softmax(logi_type, axis=-1)
                # encoded so that inference can extract all output at once
                predmap_coded = tf.concat([soft_type, pred_dst], axis=-1)
            else:
                predmap_coded = pred_dst

            # * channel ordering: type-map, segmentation map
            # encoded so that inference can extract all output at once
            predmap_coded = tf.identity(predmap_coded, name='predmap-coded')

        ####
        if is_training:
            ######## LOSS
            loss = 0
            ### regression loss
            loss_mse = pred_dst - true_dst
            loss_mse = loss_mse * loss_mse
            loss_mse = tf.reduce_mean(loss_mse, name='loss_mse')
            loss += loss_mse

            if self.type_classification:
                loss_type = categorical_crossentropy(soft_type, one_type)
                loss_type = tf.reduce_mean(loss_type,
                                           name='loss-xentropy-class')
                add_moving_summary(loss_type)
                loss += loss_type

            wd_loss = regularize_cost('.*/W',
                                      l2_regularizer(5.0e-6),
                                      name='l2_regularize_loss')
            loss += wd_loss

            self.cost = tf.identity(loss, name='cost')
            add_moving_summary(self.cost)
            ####

            add_param_summary(('.*/W', ['histogram']))  # monitor W

            #### logging visual sthg
            orig_imgs = tf.cast(orig_imgs, tf.uint8)
            tf.summary.image('input', orig_imgs, max_outputs=1)

            orig_imgs = crop_op(orig_imgs, (184, 184), "NHWC")

            pred_dst = colorize(pred_dst[..., 0], cmap='jet')
            true_dst = colorize(true_dst[..., 0], cmap='jet')

            viz = tf.concat([
                orig_imgs,
                true_dst,
                pred_dst,
            ], 2)
            tf.summary.image('output', viz, max_outputs=1)

        return
Exemplo n.º 23
0
    def build_graph(self, image, label):
        is_training = get_current_tower_context().is_training

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        # monkey-patch tf.get_variable to apply fw
        def binarize_weight(v):
            name = v.op.name
            # don't binarize first and last layer
            if not name.endswith('W') or 'conv0' in name or 'weak' in name or 'fc' in name:
                return v
            else:
                logger.info("Binarizing weight {}".format(v.op.name))
                return fw(v)
                #return ternarize(v)

        def cabs(x):
            return tf.minimum(1.0, tf.abs(x), name='cabs')

        def activate(x):
            return fa(cabs(x))
        
        def merge(x, y):
            #return x + y
            #return x - y
            return tf.concat([x,y], axis=3)

        image = image / 256.0;          k=3;      zp=0.25;      zp2=zp / 1
        #scale = tf.train.exponential_decay(learning_rate=1.0, global_step=get_global_step_var(), decay_steps=4721*5, decay_rate=0.5, staircase=True, name='scale')
        #scale = tf.where(scale>0.001, scale, tf.zeros_like(scale))
        scale = tf.train.cosine_decay(learning_rate=1.0, global_step=get_global_step_var(), decay_steps=4721*50, alpha=0.0)
        tf.summary.scalar('scale', scale);             endconv=[];  endweak=[]
        #scale2 = tf.train.cosine_decay(learning_rate=1.0, global_step=get_global_step_var(), decay_steps=4721*50, alpha=0.0)
        #scale3 = tf.train.cosine_decay(learning_rate=1.0, global_step=get_global_step_var(), decay_steps=4721*80, alpha=0.0)
        with remap_variables(binarize_weight), \
                argscope(BatchNorm, momentum=0.9, epsilon=1e-4), \
                argscope(Conv2D, use_bias=False):
            net=Conv2D('conv0', image, np.round(48*zp), 5, padding='VALID', use_bias=True)
            net=MaxPooling('pool0', net, 2, padding='SAME');            net=activate(net)

            net1=Conv2D('conv1', net, np.round(64*zp), 3, padding='SAME');      net1=BatchNorm('bn1', net1);     endconv.append(net1)
            net2=Conv2D('weak1', net, np.round(64*zp2), k, padding='SAME');      net2=BatchNorm('bn12', net2);      endweak.append(net2);  #  net2=tf.nn.relu(net2)
            net=merge(activate(net1), scale*net2)
            #net=activate(net1)
            
            net1=Conv2D('conv2', net, np.round(64*zp), 3, padding='SAME');      net1=BatchNorm('bn2', net1);     endconv.append(net1)
            net2=Conv2D('weak2', net, np.round(64*zp2), k, padding='SAME');      net2=BatchNorm('bn22', net2);      endweak.append(net2);   # net2=tf.nn.relu(net2)
            net1=MaxPooling('pool1', net1, 2, padding='SAME');   net2=MaxPooling('pool12', net2, 2, padding='SAME');
            net=merge(activate(net1), scale*net2)
            net=activate(net1)

            net1=Conv2D('conv3', net, np.round(128*zp), 3, padding='VALID');      net1=BatchNorm('bn3', net1);     endconv.append(net1)
            net2=Conv2D('weak3', net, np.round(128*zp2), k, padding='VALID');      net2=BatchNorm('bn32', net2);      endweak.append(net2);  #  net2=tf.nn.relu(net2)
            net=merge(activate(net1), scale*net2)
            #net=activate(net1)

            net1=Conv2D('conv4', net, np.round(128*zp), 3, padding='SAME');      net1=BatchNorm('bn4', net1);     endconv.append(net1)
            net2=Conv2D('weak4', net, np.round(128*zp2), k, padding='SAME');      net2=BatchNorm('bn42', net2);      endweak.append(net2);  #  net2=tf.nn.relu(net2)
            net=merge(activate(net1), scale*net2)
            # net=activate(net1)

            net1=Conv2D('conv5', net, np.round(128*zp), 3, padding='VALID');      net1=BatchNorm('bn5', net1);     endconv.append(net1)
            net2=Conv2D('weak5', net, np.round(128*zp2), k, padding='VALID');      net2=BatchNorm('bn52', net2);      endweak.append(net2);  #  net2=tf.nn.relu(net2)
            net=merge(activate(net1), scale*net2)
            #net=activate(net1)

            net=tf.nn.dropout(net, 0.5 if is_training else 1.0)
            net1=Conv2D('conv6', net, np.round(512*zp), 5, padding='VALID');       net1=BatchNorm('bn6', net1);     endconv.append(net1)
            net2=Conv2D('weak6', net, np.round(512*zp2), 5, padding='VALID');       net2=BatchNorm('bn62', net2);      endweak.append(net2);  #  net2=tf.nn.relu(net2)
            net=merge(cabs(net1), scale*net2)
            # net=cabs(net1)
            logits=FullyConnected('fc1', net, 10)
        tf.nn.softmax(logits, name='output')

        # compute the number of failed samples
        wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), tf.float32, name='wrong_tensor')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7))

        add_param_summary(('.*/W', ['histogram', 'rms']))
        total_cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(cost, wd_cost, total_cost)
        for i in range(len(endweak)):
            add_moving_summary(tf.reduce_mean(tf.abs(endconv[i]), name='mean_conv_'+str(i+1) )  )
            add_moving_summary(tf.reduce_mean(tf.abs(endweak[i]), name='mean_weak_'+str(i+1) )  )

        return total_cost
Exemplo n.º 24
0
    def _build_graph(self, inputs):

        ####
        is_training = get_current_tower_context().is_training

        images, truemap_coded = inputs

        orig_imgs = images
        pen_map = truemap_coded[..., -1]
        if self.type_classification:
            true = truemap_coded[..., 1]
        else:
            true = truemap_coded[..., 0]
        true = tf.cast(true, tf.int32)
        true = tf.identity(true, name='truemap')
        one = tf.one_hot(
            true,
            self.nr_types if self.type_classification else self.nr_classes,
            axis=-1)
        true = tf.expand_dims(true, axis=-1)

        def down_branch(name, main_in, aux_in, ch):
            with tf.variable_scope(name):
                a = Conv2D('conv1',
                           main_in,
                           ch,
                           3,
                           padding='valid',
                           use_bias=False,
                           activation=BNReLU)
                a = Conv2D('conv2',
                           a,
                           ch,
                           3,
                           padding='valid',
                           use_bias=True,
                           activation=tf.nn.relu)
                a = MaxPooling('pool', a, 2, strides=2, padding='same')

                b = Conv2D('conv3',
                           aux_in,
                           ch,
                           3,
                           padding='valid',
                           use_bias=False,
                           activation=BNReLU)
                b = Conv2D('conv4',
                           b,
                           ch,
                           3,
                           padding='valid',
                           use_bias=True,
                           activation=tf.nn.relu)

                c = tf.concat([a, b], axis=1)
            return c

        def up_branch(name, main_in, aux_in, ch):
            with tf.variable_scope(name):
                a = Conv2DTranspose('up1',
                                    main_in,
                                    ch,
                                    2,
                                    strides=(2, 2),
                                    padding='same',
                                    use_bias=True,
                                    activation=tf.identity)
                a = Conv2D('conv1',
                           a,
                           ch,
                           3,
                           padding='valid',
                           use_bias=True,
                           activation=tf.nn.relu)
                a = Conv2D('conv2',
                           a,
                           ch,
                           3,
                           padding='valid',
                           use_bias=True,
                           activation=tf.nn.relu)

                # stride 1 is no different from normal 5x5 conv, 'valid' to gain extrapolated border pixels
                b1 = Conv2DTranspose('up2',
                                     a,
                                     ch,
                                     5,
                                     strides=(1, 1),
                                     padding='valid',
                                     use_bias=True,
                                     activation=tf.identity)
                b2 = Conv2DTranspose('up3',
                                     aux_in,
                                     ch,
                                     5,
                                     strides=(1, 1),
                                     padding='valid',
                                     use_bias=True,
                                     activation=tf.identity)
                b = tf.concat([b1, b2], axis=1)
                b = Conv2D('conv3',
                           b,
                           ch,
                           1,
                           padding='same',
                           use_bias=True,
                           activation=tf.nn.relu)
            return b

        def aux_branch(name, main_in, up_kernel, up_strides):
            ch = main_in.get_shape().as_list()[1]  # NCHW
            with tf.variable_scope(name):  # preserve the depth
                a = Conv2DTranspose('up',
                                    main_in,
                                    ch,
                                    up_kernel,
                                    strides=up_strides,
                                    padding='same',
                                    use_bias=True,
                                    activation=tf.identity)
                a = Conv2D('conv',
                           a,
                           self.nr_types
                           if self.type_classification else self.nr_classes,
                           3,
                           padding='valid',
                           activation=tf.nn.relu)
                a = tf.layers.dropout(a,
                                      rate=0.5,
                                      seed=5,
                                      training=is_training)
            return a

        #### Xavier initializer
        with argscope(Conv2D, activation=tf.identity,
                    kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=True),
                    bias_initializer=tf.constant_initializer(0.1)), \
             argscope(Conv2DTranspose, activation=tf.identity,
                    kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=True),
                    bias_initializer=tf.constant_initializer(0.1)), \
                argscope([Conv2D, Conv2DTranspose, MaxPooling, BatchNorm], data_format=self.data_format):

            i = tf.transpose(images / 255.0, [0, 3, 1, 2])  # our way
            resize_func = lambda x, y: resize_op(
                x, size=y, interp='bicubic', data_format='channels_first')

            ####
            b1 = down_branch('b1', i, resize_func(i, (128, 128)), 64)
            b2 = down_branch('b2', b1, resize_func(i, (64, 64)), 128)
            b3 = down_branch('b3', b2, resize_func(i, (32, 32)), 256)
            b4 = down_branch('b4', b3, resize_func(i, (16, 16)), 512)

            with tf.variable_scope('b5'):
                b5 = Conv2D('conv1',
                            b4,
                            2048,
                            3,
                            padding='valid',
                            use_bias=True,
                            activation=tf.nn.relu)
                b5 = Conv2D('conv2',
                            b5,
                            2048,
                            3,
                            padding='valid',
                            use_bias=True,
                            activation=tf.nn.relu)
            b6 = up_branch('b6', b5, b4, 1024)
            b7 = up_branch('b7', b6, b3, 512)
            b8 = up_branch('b8', b7, b2, 256)
            b9 = up_branch('b9', b8, b1, 128)

            aux_out1 = aux_branch('aux_out1', b9, 2, (2, 2))
            aux_out2 = aux_branch('aux_out2', b8, 4, (4, 4))
            aux_out3 = aux_branch('aux_out3', b7, 8, (8, 8))
            out = tf.concat([aux_out1, aux_out2, aux_out3], axis=1)
            out_list = [out, aux_out1, aux_out2, aux_out3]

            soft_list = []
            prob_list = []
            for idx, sub_out in enumerate(out_list):
                logi = Conv2D('conv_out%d' % idx,
                              sub_out,
                              self.nr_types
                              if self.type_classification else self.nr_classes,
                              3,
                              padding='valid',
                              use_bias=True,
                              activation=tf.identity)
                logi = tf.transpose(logi, [0, 2, 3, 1])
                soft = tf.nn.softmax(logi, axis=-1)

                if self.type_classification:
                    prob_np = tf.reduce_sum(soft[..., 1:],
                                            axis=-1,
                                            keepdims=True)
                    prob_np = tf.identity(prob_np, name='predmap-prob-np')
                else:
                    prob_np = tf.identity(soft[..., 1], name='predmap-prob')
                    prob_np = tf.expand_dims(prob_np, axis=-1)

                soft_list.append(soft)
                prob_list.append(prob_np)

            # return the aggregated output
            # encoded so that inference can extract all output at once
            if self.type_classification:
                predmap_coded = tf.concat([soft_list[0], prob_list[0]],
                                          axis=-1,
                                          name='predmap-coded')
            else:
                predmap_coded = tf.identity(prob_list[0], name='predmap-coded')

        ####
        if is_training:
            ######## LOSS
            # get the variable to received fed weight from external scheduler
            with tf.variable_scope("", reuse=True):
                aux_loss_dw = tf.get_variable('aux_loss_dw')

            loss_list = []  # index 0 is main output
            global_step = tf.train.get_or_create_global_step()
            global_step = tf.cast(global_step, tf.float32)
            for idx, sub_soft in enumerate(soft_list):
                loss_bce = categorical_crossentropy(sub_soft, one)
                loss_bce = tf.reduce_mean(loss_bce * pen_map)
                loss_bce = loss_bce if idx == 0 else loss_bce * aux_loss_dw
                loss_bce = tf.identity(loss_bce, name='loss-bce-%d' % idx)
                loss_list.append(loss_bce)
                add_moving_summary(loss_bce)

            wd_loss = regularize_cost('.*/W',
                                      l2_regularizer(1.0e-5),
                                      name='l2_wd_loss')
            add_moving_summary(wd_loss)

            cost = tf.add_n(loss_list) + wd_loss
            self.cost = tf.identity(cost, name='overall_cost')
            add_moving_summary(self.cost)
            ####

            add_param_summary(('.*/W', ['histogram']))  # monitor W

            #### logging visual sthg
            orig_imgs = tf.cast(orig_imgs, tf.uint8)
            tf.summary.image('input', orig_imgs, max_outputs=1)

            colored_list = [true
                            ] + prob_list + [tf.expand_dims(pen_map, axis=-1)]
            colored_list = [
                colorize(feat[..., 0], cmap='jet') for feat in colored_list
            ]

            viz = tf.concat([orig_imgs] + colored_list, 2)
            tf.summary.image('output', viz, max_outputs=1)

        return
Exemplo n.º 25
0
    def _build_graph(self, inputs):
        """This function should build the model which takes the input variables
        and define self.cost at the end"""

        # inputs contains a list of input variables defined above
        image, label = inputs

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1  # center the pixels values at zero

        # The context manager `argscope` sets the default option for all the layers under
        # this context. Here we use 32 channel convolution with shape 3x3
        with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32):
            l = tf.layers.conv2d(image,
                                 32,
                                 3,
                                 padding='same',
                                 activation=tf.nn.relu,
                                 name='conv0')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l,
                                 32,
                                 3,
                                 padding='same',
                                 activation=tf.nn.relu,
                                 name='conv1')
            l = tf.layers.conv2d(l,
                                 32,
                                 3,
                                 padding='same',
                                 activation=tf.nn.relu,
                                 name='conv2')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l,
                                 32,
                                 3,
                                 padding='same',
                                 activation=tf.nn.relu,
                                 name='conv3')
            l = tf.layers.flatten(l)
            l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0')
            l = tf.layers.dropout(
                l, rate=0.5, training=get_current_tower_context().is_training)
            logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1')

        tf.nn.softmax(logits, name='prob')  # a Bx10 with probabilities

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(
            cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1),
                          tf.float32,
                          name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error (in a moving_average fashion):
        # 1. write the value to tensosrboard
        # 2. write the value to stat.json
        # 3. print the value after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/kernel', tf.nn.l2_loss),
                              name='regularize_loss')
        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, self.cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
Exemplo n.º 26
0
    def build_graph(self, image, edgemap):
        image = image - tf.constant([104, 116, 122], dtype='float32')
        edgemap = tf.expand_dims(edgemap, 3, name='edgemap4d')

        def branch(name, l, up):
            with tf.variable_scope(name):
                l = Conv2D('convfc',
                           l,
                           1,
                           kernel_size=1,
                           activation=tf.identity,
                           use_bias=True,
                           kernel_initializer=tf.constant_initializer())
                while up != 1:
                    l = BilinearUpSample('upsample{}'.format(up), l, 2)
                    up = up / 2
                return l

        with argscope(Conv2D, kernel_size=3, activation=tf.nn.relu):
            l = Conv2D('conv1_1', image, 64)
            l = Conv2D('conv1_2', l, 64)
            b1 = branch('branch1', l, 1)
            l = MaxPooling('pool1', l, 2)

            l = Conv2D('conv2_1', l, 128)
            l = Conv2D('conv2_2', l, 128)
            b2 = branch('branch2', l, 2)
            l = MaxPooling('pool2', l, 2)

            l = Conv2D('conv3_1', l, 256)
            l = Conv2D('conv3_2', l, 256)
            l = Conv2D('conv3_3', l, 256)
            b3 = branch('branch3', l, 4)
            l = MaxPooling('pool3', l, 2)

            l = Conv2D('conv4_1', l, 512)
            l = Conv2D('conv4_2', l, 512)
            l = Conv2D('conv4_3', l, 512)
            b4 = branch('branch4', l, 8)
            l = MaxPooling('pool4', l, 2)

            l = Conv2D('conv5_1', l, 512)
            l = Conv2D('conv5_2', l, 512)
            l = Conv2D('conv5_3', l, 512)
            b5 = branch('branch5', l, 16)

        final_map = Conv2D('convfcweight',
                           tf.concat([b1, b2, b3, b4, b5], 3),
                           1,
                           kernel_size=1,
                           kernel_initializer=tf.constant_initializer(0.2),
                           use_bias=False,
                           activation=tf.identity)
        costs = []
        for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]):
            output = tf.nn.sigmoid(b, name='output{}'.format(idx + 1))
            xentropy = class_balanced_sigmoid_cross_entropy(
                b, edgemap, name='xentropy{}'.format(idx + 1))
            costs.append(xentropy)

        # some magic threshold
        pred = tf.cast(tf.greater(output, 0.5), tf.int32, name='prediction')
        wrong = tf.cast(tf.not_equal(pred, edgemap), tf.float32)
        wrong = tf.reduce_mean(wrong, name='train_error')

        if get_current_tower_context().is_training:
            wd_w = tf.train.exponential_decay(2e-4, get_global_step_var(),
                                              80000, 0.7, True)
            wd_cost = tf.multiply(wd_w,
                                  regularize_cost('.*/W', tf.nn.l2_loss),
                                  name='wd_cost')
            costs.append(wd_cost)

            add_param_summary(('.*/W', ['histogram']))  # monitor W
            total_cost = tf.add_n(costs, name='cost')
            add_moving_summary(costs + [wrong, total_cost])
            return total_cost
Exemplo n.º 27
0
    def _build_graph(self, inputs):
        image, label = inputs
        is_training = get_current_tower_context().is_training

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        # monkey-patch tf.get_variable to apply fw
        def binarize_weight(v):
            name = v.op.name
            # don't binarize first and last layer
            if not name.endswith('W') or 'conv0' in name or 'fc' in name:
                return v
            else:
                logger.info("Binarizing weight {}".format(v.op.name))
                return fw(v)

        def cabs(x):
            return tf.minimum(1.0, tf.abs(x), name='cabs')

        def activate(x):
            return fa(cabs(x))

        image = image / 256.0

        with remap_variables(binarize_weight), \
                argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
                argscope(Conv2D, use_bias=False, nl=tf.identity):
            logits = (
                LinearWrap(image).Conv2D('conv0',
                                         48,
                                         5,
                                         padding='VALID',
                                         use_bias=True).MaxPooling(
                                             'pool0', 2,
                                             padding='SAME').apply(activate)
                # 18
                .Conv2D('conv1', 64, 3, padding='SAME').apply(fg).BatchNorm(
                    'bn1').apply(activate).Conv2D(
                        'conv2', 64, 3,
                        padding='SAME').apply(fg).BatchNorm('bn2').MaxPooling(
                            'pool1', 2, padding='SAME').apply(activate)
                # 9
                .Conv2D(
                    'conv3', 128, 3,
                    padding='VALID').apply(fg).BatchNorm('bn3').apply(activate)
                # 7
                .Conv2D('conv4', 128, 3, padding='SAME').apply(fg).
                BatchNorm('bn4').apply(activate).Conv2D(
                    'conv5', 128, 3,
                    padding='VALID').apply(fg).BatchNorm('bn5').apply(activate)
                # 5
                .tf.nn.dropout(0.5 if is_training else 1.0).Conv2D(
                    'conv6', 512, 5, padding='VALID').apply(fg).BatchNorm(
                        'bn6').apply(cabs).FullyConnected('fc1',
                                                          10,
                                                          nl=tf.identity)())
        tf.nn.softmax(logits, name='output')

        # compute the number of failed samples
        wrong = prediction_incorrect(logits, label)
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7))

        add_param_summary(('.*/W', ['histogram', 'rms']))
        self.cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(cost, wd_cost, self.cost)
    def _build_graph(self, inputs):
        def resnet101(image):
            mode = 'resnet'
            depth = 101
            basicblock = preresnet_basicblock if mode == 'preact' else resnet_basicblock
            bottleneck = {
                'resnet': resnet_bottleneck_deeplab,
                'preact': preresnet_bottleneck,
                'se': se_resnet_bottleneck
            }[mode]
            num_blocks, block_func = {
                18: ([2, 2, 2, 2], basicblock),
                34: ([3, 4, 6, 3], basicblock),
                50: ([3, 4, 6, 3], bottleneck),
                101: ([3, 4, 23, 3], bottleneck),
                152: ([3, 8, 36, 3], bottleneck)
            }[depth]

            def get_logits(image):
                with argscope(
                    [Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm],
                        data_format="NHWC"):
                    return resnet_backbone(
                        image,
                        num_blocks,
                        preresnet_group if mode == 'preact' else resnet_group,
                        block_func,
                        CLASS_NUM,
                        ASPP=False)

            return get_logits(image)

        image, label = inputs
        image = image - tf.constant([104, 116, 122], dtype='float32')
        label = tf.identity(label, name="label")

        predict = resnet101(image)

        costs = []
        prob = tf.nn.softmax(predict, name='prob')

        label4d = tf.expand_dims(label, 3, name='label4d')
        new_size = prob.get_shape()[1:3]

        cost = softmax_cross_entropy_with_ignore_label(logits=predict,
                                                       label=label4d,
                                                       class_num=CLASS_NUM)
        prediction = tf.argmax(prob, axis=-1, name="prediction")
        cost = tf.reduce_mean(
            cost, name='cross_entropy_loss')  # the average cross-entropy loss
        costs.append(cost)

        if get_current_tower_context().is_training:
            wd_w = tf.train.exponential_decay(2e-4, get_global_step_var(),
                                              80000, 0.7, True)
            wd_cost = tf.multiply(wd_w,
                                  regularize_cost('.*/W', tf.nn.l2_loss),
                                  name='wd_cost')
            costs.append(wd_cost)

            add_param_summary(('.*/W', ['histogram']))  # monitor W
            self.cost = tf.add_n(costs, name='cost')
Exemplo n.º 29
0
    def build_graph(self, x, center_label, heading_class_label, heading_residual_label, size_class_label,
                    size_residual_label,sem_cls_label, box_label_mask, vote_label, vote_label_mask,
                    scan_idx, max_gt_bboxes):

        l0_xyz = x[:,:,:3]
        l0_points = None if x.shape[-1] <=3 else x[:,:,3:]

        end_points = {}

        # Set Abstraction layers
        l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=2048, radius=0.2, nsample=64,
                                                           mlp=[64, 64, 128], mlp2=None, group_all=False, scope='sa1',
                                                           use_xyz=True, normalize_xyz=True)
        l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=1024, radius=0.4, nsample=32,
                                                           mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa2',
                                                           use_xyz=True, normalize_xyz=True)
        l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=512, radius=0.8, nsample=16,
                                                           mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa3',
                                                           use_xyz=True, normalize_xyz=True)
        l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz, l3_points, npoint=256, radius=1.2, nsample=16,
                                                           mlp=[128, 128, 256], mlp2=None, group_all=False, scope='sa4',
                                                           use_xyz=True, normalize_xyz=True)
        # Feature Propagation layers
        l3_points = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256, 256], scope='fp1')
        seed_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256, 256], scope='fp2')
        seed_xyz = l2_xyz
        # fp2_inds
        fp2_inds = l1_indices[:, 0:tf.shape(seed_xyz)[1]]

        # Voting Module layers
        # seed_xyz seed_points (B, 512, 3/C)
        vote_xyz, vote_features = self.hough_voting_mlp(seed_xyz, seed_points)

        # Proposal Module layers
        # Farthest point sampling on seeds
        proposals_xyz, proposals_output, _ = pointnet_sa_module(vote_xyz, vote_features,
                                                                npoint=config.PROPOSAL_NUM,
                                                                radius=0.3, nsample=64, mlp=[128, 128, 128],
                                                                mlp2=[128, 128,5+2 * config.NH+4 * config.NS+config.NC],
                                                                group_all=False, scope='proposal',
                                                                use_xyz=True, normalize_xyz=True)
        end_points['proposals_xyz'] = proposals_xyz

        end_points = self.parse_outputs_to_tensor(proposals_output, end_points)

        self.calc_inference_v1(end_points)

        vote_loss = self.vote_reg_loss(seed_xyz, vote_xyz, fp2_inds, vote_label, vote_label_mask)

        objectness_loss, objectness_label, objectness_mask, object_assignment = self.compute_objectness_loss(
            proposals_xyz, center_label, end_points)

        loss_points = self.compute_box_loss_and_sem_loss(end_points, center_label,
                                                         heading_class_label, heading_residual_label,
                                                         size_class_label, size_residual_label, sem_cls_label,
                                                         object_assignment, box_label_mask, objectness_label)

        # box loss
        box_loss = tf.identity(loss_points['center_loss'] + 0.1 * loss_points['heading_cls_loss'] +
                               loss_points['heading_residual_loss']+ 0.1 * loss_points['size_cls_loss'] +
                               loss_points['size_residual_loss'], name='box_loss')

        # wd_cost = tf.multiply(1e-5,
        #                       regularize_cost('.*/W', tf.nn.l2_loss),
        #                       name='regularize_loss')

        total_cost = vote_loss + 0.5 * objectness_loss + 1. * box_loss + 0.1 * loss_points['sem_cls_loss']

        # if not get_current_tower_context().is_training:
        #     self.calc_inference()

        total_cost = tf.identity(total_cost, 'total_loss')
        # total_cost = tf.add_n([total_cost, wd_cost], name='total_loss')

        summary.add_moving_summary(total_cost,
                                   vote_loss,
                                   objectness_loss, box_loss,
                                   loss_points['center_loss'],
                                   loss_points['center_loss_left'], loss_points['center_loss_right'],
                                   loss_points['heading_cls_loss'], loss_points['heading_residual_loss'],
                                   loss_points['size_cls_loss'], loss_points['size_residual_loss'],
                                   loss_points['sem_cls_loss'],
                                   # wd_cost,
                                   decay=0)
        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
Exemplo n.º 30
0
    def _build_graph(self, inputs):

        is_training = get_current_tower_context().is_training

        images, truemap_coded = inputs
        orig_imgs = images

        pen_map = truemap_coded[..., -1]

        true_np = truemap_coded[..., 0]
        true_np = tf.cast(true_np, tf.int32)
        true_np = tf.identity(true_np, name="truemap-np")
        one_np = tf.one_hot(true_np, 2, axis=-1)
        true_np = tf.expand_dims(true_np, axis=-1)

        true_mk = truemap_coded[..., 1:4]
        true_mk = tf.cast(true_mk, tf.int32)
        true_mk = tf.identity(true_mk, name="truemap-mk")
        one_mk = tf.cast(true_mk, tf.float32)

        ####
        with argscope(
                Conv2D,
                activation=tf.identity,
                use_bias=False,  # K.he initializer
                W_init=tf.variance_scaling_initializer(scale=2.0,
                                                       mode="fan_out"),
        ), argscope([Conv2D], data_format=self.data_format):

            i = images if not self.input_norm else images / 255.0

            ####
            d = encoder(
                "encoder",
                i,
                self.basis_filter_list,
                self.rot_matrix_list,
                self.nr_orients,
                self.filter_type,
                is_training,
            )
            ####
            feat = decoder(
                "decoder",
                d,
                self.basis_filter_list,
                self.rot_matrix_list,
                self.nr_orients,
                self.filter_type,
                is_training,
            )

            feat_np = Conv2D("feat_np", feat, 96, 1, use_bias=True, nl=BNReLU)
            o_logi_np = Conv2D("output_np",
                               feat_np,
                               2,
                               1,
                               use_bias=True,
                               nl=tf.identity)
            soft_np = tf.nn.softmax(o_logi_np, axis=-1)
            prob_np = tf.identity(soft_np[..., 1], name="predmap-prob")
            prob_np = tf.expand_dims(prob_np, -1)

            feat_mk = Conv2D("feat_mk", feat, 96, 1, use_bias=True, nl=BNReLU)
            o_logi_mk = Conv2D("output_mk",
                               feat_mk,
                               3,
                               1,
                               use_bias=True,
                               nl=tf.identity)
            soft_mk = tf.nn.softmax(o_logi_mk, axis=-1)
            prob_mk = tf.identity(soft_mk[..., :2], name="predmap-prob")

            # encoded so that inference can extract all output at once
            predmap_coded = tf.concat([prob_np, prob_mk],
                                      axis=-1,
                                      name="predmap-coded")

        ####
        if get_current_tower_context().is_training:
            # ---- LOSS ----#
            loss = 0
            for term, weight in self.loss_term.items():
                if term == "bce":
                    term_loss_np = categorical_crossentropy(soft_np, one_np)
                    term_loss_np = tf.reduce_mean(term_loss_np,
                                                  name="loss-bce-np")

                    term_loss_mk = categorical_crossentropy(soft_mk, one_mk)
                    term_loss_mk = tf.reduce_mean(term_loss_mk * pen_map,
                                                  name="loss-bce-mk")
                elif "dice" in self.loss_term:
                    # branch 1
                    term_loss_np = dice_loss(
                        soft_np[..., 0], one_np[..., 0]) + dice_loss(
                            soft_np[..., 1], one_np[..., 1])
                    term_loss_np = tf.identity(term_loss_np,
                                               name="loss-dice-np")

                    term_loss_mk = dice_loss(
                        soft_mk[..., 0], one_mk[..., 0]) + dice_loss(
                            soft_mk[..., 1], one_mk[..., 1])
                    term_loss_mk = tf.identity(term_loss_mk,
                                               name="loss-dice-mk")
                else:
                    assert False, "Not support loss term: %s" % term
                add_moving_summary(term_loss_np)
                add_moving_summary(term_loss_mk)
                loss += term_loss_np + term_loss_mk

            ### combine the loss into single cost function
            wd_loss = regularize_cost(".*/W",
                                      l2_regularizer(1.0e-7),
                                      name="l2_wd_loss")
            add_moving_summary(wd_loss)
            self.cost = tf.identity(loss + wd_loss, name="overall-loss")
            add_moving_summary(self.cost)
            ####

            add_param_summary((".*/W", ["histogram"]))  # monitor W

            ### logging visual sthg
            orig_imgs = tf.cast(orig_imgs, tf.uint8)
            tf.summary.image("input", orig_imgs, max_outputs=1)

            pred_np = colorize(prob_np[..., 0], cmap="jet")
            true_np = colorize(true_np[..., 0], cmap="jet")

            pred_mk_blb = colorize(prob_mk[..., 0], cmap="jet")
            true_mk_blb = colorize(true_mk[..., 0], cmap="jet")
            pred_mk_cnt = colorize(prob_mk[..., 1], cmap="jet")
            true_mk_cnt = colorize(true_mk[..., 1], cmap="jet")

            viz = tf.concat(
                [
                    orig_imgs,
                    pred_np,
                    pred_mk_blb,
                    pred_mk_cnt,
                    true_np,
                    true_mk_blb,
                    true_mk_cnt,
                ],
                2,
            )

            viz = tf.concat([viz[0], viz[-1]], axis=0)
            viz = tf.expand_dims(viz, axis=0)
            tf.summary.image("output", viz, max_outputs=1)

        return
Exemplo n.º 31
0
    def get_logits(self, image):
        if BITW == 't':
            fw, fa, fg = get_dorefa(32, 32, 32)
            fw = ternarize
        else:
            fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        # monkey-patch tf.get_variable to apply fw
        def new_get_variable(v):
            name = v.op.name
            # don't binarize first and last layer
            if not name.endswith('W') or 'conv0' in name or 'fct' in name:
                return v
            else:
                logger.info("Quantizing weight {}".format(v.op.name))
                return fw(v)

        def nonlin(x):
            if BITA == 32:
                return tf.nn.relu(x)    # still use relu for 32bit cases
            return tf.clip_by_value(x, 0.0, 1.0)

        def activate(x):
            return fa(nonlin(x))

        with remap_variables(new_get_variable), \
                argscope([Conv2D, BatchNorm, MaxPooling], data_format='channels_first'), \
                argscope(BatchNorm, momentum=0.9, epsilon=1e-4), \
                argscope(Conv2D, use_bias=False):
            logits = (LinearWrap(image)
                      .Conv2D('conv0', 96, 12, strides=4, padding='VALID', use_bias=True)
                      .apply(activate)
                      .Conv2D('conv1', 256, 5, padding='SAME', split=2)
                      .apply(fg)
                      .BatchNorm('bn1')
                      .MaxPooling('pool1', 3, 2, padding='SAME')
                      .apply(activate)

                      .Conv2D('conv2', 384, 3)
                      .apply(fg)
                      .BatchNorm('bn2')
                      .MaxPooling('pool2', 3, 2, padding='SAME')
                      .apply(activate)

                      .Conv2D('conv3', 384, 3, split=2)
                      .apply(fg)
                      .BatchNorm('bn3')
                      .apply(activate)

                      .Conv2D('conv4', 256, 3, split=2)
                      .apply(fg)
                      .BatchNorm('bn4')
                      .MaxPooling('pool4', 3, 2, padding='VALID')
                      .apply(activate)

                      .FullyConnected('fc0', 4096)
                      .apply(fg)
                      .BatchNorm('bnfc0')
                      .apply(activate)

                      .FullyConnected('fc1', 4096, use_bias=False)
                      .apply(fg)
                      .BatchNorm('bnfc1')
                      .apply(nonlin)
                      .FullyConnected('fct', 1000, use_bias=True)())
        add_param_summary(('.*/W', ['histogram', 'rms']))
        tf.nn.softmax(logits, name='output')  # for prediction
        return logits
Exemplo n.º 32
0
    def build_graph(self, _, x):
        l0_xyz = x
        l0_points = x

        # Set Abstraction layers
        l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz,
                                                           l0_points,
                                                           npoint=2048,
                                                           radius=0.2,
                                                           nsample=64,
                                                           mlp=[64, 64, 128],
                                                           mlp2=None,
                                                           group_all=False,
                                                           scope='sa1')
        l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz,
                                                           l1_points,
                                                           npoint=1024,
                                                           radius=0.4,
                                                           nsample=64,
                                                           mlp=[128, 128, 256],
                                                           mlp2=None,
                                                           group_all=False,
                                                           scope='sa2')
        l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz,
                                                           l2_points,
                                                           npoint=512,
                                                           radius=0.8,
                                                           nsample=64,
                                                           mlp=[128, 128, 256],
                                                           mlp2=None,
                                                           group_all=False,
                                                           scope='sa3')
        l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz,
                                                           l3_points,
                                                           npoint=256,
                                                           radius=1.2,
                                                           nsample=64,
                                                           mlp=[128, 128, 256],
                                                           mlp2=None,
                                                           group_all=False,
                                                           scope='sa4')
        # Feature Propagation layers
        l3_points = pointnet_fp_module(l3_xyz,
                                       l4_xyz,
                                       l3_points,
                                       l4_points, [256, 256],
                                       scope='fp1')
        seeds_points = pointnet_fp_module(l2_xyz,
                                          l3_xyz,
                                          l2_points,
                                          l3_points, [256, 256],
                                          scope='fp2')
        seeds_xyz = l2_xyz

        # Voting Module layers
        offset = tf.reshape(tf.concat([seeds_xyz, seeds_points], 2),
                            [-1, 256 + 3])
        units = [256, 256, 256 + 3]
        for i in range(len(units)):
            offset = FullyConnected(
                'voting%d' % i,
                offset,
                units[i],
                activation=BNReLU if i < len(units) - 1 else None)
        offset = tf.reshape(offset, [-1, 1024, 256 + 3])

        # B * N * 3
        votes = tf.concat([seeds_xyz, seeds_points], 2) + offset
        votes_xyz = votes[:, :, :3]
        '''
        dist2center = tf.abs(tf.expand_dims(seeds_xyz, 2) - tf.expand_dims(bboxes_xyz, 1))
        surface_ind = tf.less(dist2center, tf.expand_dims(bboxes_lwh, 1) / 2.)  # B * N * BB * 3, bool
        surface_ind = tf.equal(tf.count_nonzero(surface_ind, -1), 3)  # B * N * BB
        surface_ind = tf.greater_equal(tf.count_nonzero(surface_ind, -1), 1)  # B * N, should be in at least one bbox
        '''
        '''
        dist2center_norm = tf.norm(dist2center, axis=-1)  # B * N * BB
        votes_assignment = tf.argmin(dist2center_norm, -1, output_type=tf.int32)  # B * N, int
        bboxes_xyz_votes_gt = tf.gather_nd(bboxes_xyz, tf.stack([
            tf.tile(tf.expand_dims(tf.range(tf.shape(votes_assignment)[0]), -1), [1, tf.shape(votes_assignment)[1]]),
            votes_assignment], 2))  # B * N * 3
        vote_reg_loss = tf.reduce_mean(tf.norm(votes_xyz - bboxes_xyz_votes_gt, ord=1, axis=-1) * tf.cast(surface_ind, tf.float32), name='vote_reg_loss')
        '''
        votes_points = votes[:, :, 3:]

        # Proposal Module layers
        # Farthest point sampling on seeds
        proposals_xyz, proposals_output, _ = pointnet_sa_module(
            votes_xyz,
            votes_points,
            npoint=config.PROPOSAL_NUM,
            radius=0.3,
            nsample=64,
            mlp=[128, 128, 128],
            # mlp2=[128, 128, 5+2 * config.NH+4 * config.NS+config.NC],
            mlp2=[128, 128, config.PARA_MUN],
            group_all=False,
            scope='proposal',
            sample_xyz=seeds_xyz)
        '''
        nms_iou = tf.get_variable('nms_iou', shape=[], initializer=tf.constant_initializer(0.25), trainable=False)
        '''
        if not get_current_tower_context().is_training:

            def get_3d_bbox(box_size, heading_angle, center):
                batch_size = tf.shape(heading_angle)[0]
                c = tf.cos(heading_angle)
                s = tf.sin(heading_angle)
                zeros = tf.zeros_like(c)
                ones = tf.ones_like(c)
                rotation = tf.reshape(
                    tf.stack([c, zeros, s, zeros, ones, zeros, -s, zeros, c],
                             -1), tf.stack([batch_size, -1, 3, 3]))
                l, w, h = box_size[..., 0], box_size[..., 1], box_size[
                    ..., 2]  # lwh(xzy) order!!!
                corners = tf.reshape(
                    tf.stack([
                        l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2,
                        -l / 2, h / 2, h / 2, h / 2, h / 2, -h / 2, -h / 2,
                        -h / 2, -h / 2, w / 2, -w / 2, -w / 2, w / 2, w / 2,
                        -w / 2, -w / 2, w / 2
                    ], -1), tf.stack([batch_size, -1, 3, 8]))
                return tf.einsum('ijkl,ijlm->ijmk',
                                 rotation, corners) + tf.expand_dims(
                                     center, 2)  # B * N * 8 * 3

            class_mean_size_tf = tf.constant(class_mean_size)
            size_cls_pred = tf.argmax(
                proposals_output[..., 5 + 2 * config.NH:5 + 2 * config.NH +
                                 config.NS],
                axis=-1)
            size_cls_pred_onehot = tf.one_hot(size_cls_pred,
                                              depth=config.NS,
                                              axis=-1)  # B * N * NS
            size_residual_pred = tf.reduce_sum(
                tf.expand_dims(size_cls_pred_onehot, -1) * tf.reshape(
                    proposals_output[..., 5 + 2 * config.NH + config.NS:5 +
                                     2 * config.NH + 4 * config.NS],
                    (-1, config.PROPOSAL_NUM, config.NS, 3)),
                axis=2)
            size_pred = tf.gather_nd(
                class_mean_size_tf,
                tf.expand_dims(size_cls_pred, -1)) * tf.maximum(
                    1 + size_residual_pred, 1e-6)  # B * N * 3: size
            # with tf.control_dependencies([tf.print(size_pred[0, 0, 2])]):
            center_pred = proposals_xyz + proposals_output[...,
                                                           2:5]  # B * N * 3
            heading_cls_pred = tf.argmax(proposals_output[...,
                                                          5:5 + config.NH],
                                         axis=-1)
            heading_cls_pred_onehot = tf.one_hot(heading_cls_pred,
                                                 depth=config.NH,
                                                 axis=-1)
            heading_residual_pred = tf.reduce_sum(
                heading_cls_pred_onehot *
                proposals_output[..., 5 + config.NH:5 + 2 * config.NH],
                axis=2)
            heading_pred = tf.floormod(
                (tf.cast(heading_cls_pred, tf.float32) * 2 +
                 heading_residual_pred) * np.pi / config.NH, 2 * np.pi)

            # with tf.control_dependencies([tf.print(size_residual_pred[0, :10, :]), tf.print(size_pred[0, :10, :])]):
            bboxes = get_3d_bbox(
                size_pred, heading_pred,
                center_pred)  # B * N * 8 * 3,  lhw(xyz) order!!!

            # bbox_corners = tf.concat([bboxes[:, :, 6, :], bboxes[:, :, 0, :]], axis=-1)  # B * N * 6,  lhw(xyz) order!!!
            # with tf.control_dependencies([tf.print(bboxes[0, 0])]):
            nms_idx = NMS3D(bboxes,
                            tf.reduce_max(proposals_output[..., -config.NC:],
                                          axis=-1), proposals_output[..., :2],
                            nms_iou)  # Nnms * 2

            bboxes_pred = tf.gather_nd(bboxes, nms_idx,
                                       name='bboxes_pred')  # Nnms * 8 * 3
            class_scores_pred = tf.gather_nd(
                proposals_output[..., -config.NC:],
                nms_idx,
                name='class_scores_pred')  # Nnms * C
            batch_idx = tf.identity(
                nms_idx[:, 0], name='batch_idx'
            )  # Nnms, this is used to identify between batches

            return

        # calculate positive and negative proposal idxes
        bboxes_xyz_gt = bboxes_xyz  # B * BB * 3
        '''
        bboxes_labels_gt = semantic_labels  # B * BB
        bboxes_heading_labels_gt = heading_labels
        bboxes_heading_residuals_gt = heading_residuals
        bboxes_size_labels_gt = size_labels
        bboxes_size_residuals_gt = size_residuals
        dist_mat = tf.norm(tf.expand_dims(proposals_xyz, 2) - tf.expand_dims(bboxes_xyz_gt, 1), axis=-1)  # B * PR * BB
        bboxes_assignment = tf.argmin(dist_mat, axis=-1)  # B * PR
        min_dist = tf.reduce_min(dist_mat, axis=-1)
        '''
        '''
        positive_idxes = tf.where(min_dist < config.POSITIVE_THRES)  # Np * 2
        # with tf.control_dependencies([tf.print(tf.shape(positive_idxes))]):
        negative_idxes = tf.where(min_dist > config.NEGATIVE_THRES)  # Nn * 2
        positive_gt_idxes = tf.stack([positive_idxes[:, 0], tf.gather_nd(bboxes_assignment, positive_idxes)], axis=1)

        # objectiveness loss
        pos_obj_cls_score = tf.gather_nd(obj_cls_score, positive_idxes)
        pos_obj_cls_gt = tf.ones([tf.shape(positive_idxes)[0]], dtype=tf.int32)
        neg_obj_cls_score = tf.gather_nd(obj_cls_score, negative_idxes)
        neg_obj_cls_gt = tf.zeros([tf.shape(negative_idxes)[0]], dtype=tf.int32)
        obj_cls_loss = tf.identity(tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pos_obj_cls_score, labels=pos_obj_cls_gt))
                                   + tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=neg_obj_cls_score, labels=neg_obj_cls_gt)), name='obj_cls_loss')
        obj_correct = tf.concat([tf.cast(tf.nn.in_top_k(pos_obj_cls_score, pos_obj_cls_gt, 1), tf.float32),
                                 tf.cast(tf.nn.in_top_k(neg_obj_cls_score, neg_obj_cls_gt, 1), tf.float32)], axis=0, name='obj_correct')
        obj_accuracy = tf.reduce_mean(obj_correct, name='obj_accuracy')
        '''
        '''
        # center regression losses
        center_gt = tf.gather_nd(bboxes_xyz_gt, positive_gt_idxes)
        delta_predicted = tf.gather_nd(proposals_output[..., 2:5], positive_idxes)
        delta_gt = center_gt - tf.gather_nd(proposals_xyz, positive_idxes)
        center_loss = tf.reduce_mean(tf.reduce_sum(tf.losses.huber_loss(labels=delta_gt, predictions=delta_predicted, reduction=tf.losses.Reduction.NONE), axis=-1))
        '''
        '''
        # Appendix A1: chamfer loss, assignment at least one bbox to each gt bbox
        bboxes_assignment_dual = tf.argmin(dist_mat, axis=1)  # B * BB
        batch_idx = tf.tile(tf.expand_dims(tf.range(tf.shape(bboxes_assignment_dual, out_type=tf.int64)[0]), axis=-1), [1, tf.shape(bboxes_assignment_dual)[1]])  # B * BB
        delta_gt_dual = bboxes_xyz_gt - tf.gather_nd(proposals_xyz, tf.stack([batch_idx, bboxes_assignment_dual], axis=-1))  # B * BB * 3
        delta_predicted_dual = tf.gather_nd(proposals_output[..., 2:5], tf.stack([batch_idx, bboxes_assignment_dual], axis=-1))  # B * BB * 3
        center_loss_dual = tf.reduce_mean(tf.reduce_sum(tf.losses.huber_loss(labels=delta_gt_dual, predictions=delta_predicted_dual, reduction=tf.losses.Reduction.NONE), axis=-1))

        # add up
        center_loss += center_loss_dual
        '''
        '''
        # Heading loss
        heading_cls_gt = tf.gather_nd(bboxes_heading_labels_gt, positive_gt_idxes)
        heading_cls_score = tf.gather_nd(proposals_output[..., 5:5+config.NH], positive_idxes)
        heading_cls_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=heading_cls_score, labels=heading_cls_gt))

        heading_cls_gt_onehot = tf.one_hot(heading_cls_gt,  depth=config.NH, on_value=1, off_value=0, axis=-1)  # Np * NH
        heading_residual_gt = tf.gather_nd(bboxes_heading_residuals_gt, positive_gt_idxes)  # Np
        heading_residual_predicted = tf.gather_nd(proposals_output[..., 5 + config.NH:5+2 * config.NH], positive_idxes)  # Np * NH
        heading_residual_loss = tf.losses.huber_loss(labels=heading_residual_gt,
                                                     predictions=tf.reduce_sum(heading_residual_predicted * tf.to_float(heading_cls_gt_onehot), axis=1), reduction=tf.losses.Reduction.MEAN)

        # Size loss
        size_cls_gt = tf.gather_nd(bboxes_size_labels_gt, positive_gt_idxes)
        size_cls_score = tf.gather_nd(proposals_output[..., 5+2 * config.NH:5+2 * config.NH + config.NS], positive_idxes)
        size_cls_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=size_cls_score, labels=size_cls_gt))

        size_cls_gt_onehot = tf.one_hot(size_cls_gt, depth=config.NS, on_value=1, off_value=0, axis=-1)  # Np * NS
        size_cls_gt_onehot = tf.tile(tf.expand_dims(tf.to_float(size_cls_gt_onehot), -1), [1, 1, 3])  # Np * NS * 3
        size_residual_gt = tf.gather_nd(bboxes_size_residuals_gt, positive_gt_idxes)  # Np * 3
        size_residual_predicted = tf.reshape(tf.gather_nd(proposals_output[..., 5+2 * config.NH + config.NS:5+2 * config.NH + 4 * config.NS], positive_idxes), (-1, config.NS, 3))  # Np * NS * 3
        size_residual_loss = tf.reduce_mean(tf.reduce_sum(tf.losses.huber_loss(labels=size_residual_gt,
                                                                               predictions=tf.reduce_sum(size_residual_predicted * tf.to_float(size_cls_gt_onehot), axis=1), reduction=tf.losses.Reduction.NONE), axis=-1))

        box_loss = center_loss + 0.1 * heading_cls_loss + heading_residual_loss + 0.1 * size_cls_loss + size_residual_loss

        # semantic loss
        sem_cls_score = tf.gather_nd(proposals_output[..., -config.NC:], positive_idxes)
        sem_cls_gt = tf.gather_nd(bboxes_labels_gt, positive_gt_idxes)  # Np
        sem_cls_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=sem_cls_score, labels=sem_cls_gt),
            name='sem_cls_loss')
        sem_correct = tf.cast(tf.nn.in_top_k(sem_cls_score, sem_cls_gt, 1), tf.float32, name='sem_correct')
        sem_accuracy = tf.reduce_mean(sem_correct, name='sem_accuracy')
        '''
        '''
        # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
        # 1. written to tensosrboard
        # 2. written to stat.json
        # 3. printed after each epoch
        summary.add_moving_summary(obj_accuracy, sem_accuracy)
        '''

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        # If you don't like regex, you can certainly define the cost in any other methods.
        # no weight decay
        # wd_cost = tf.multiply(1e-5,
        #                       regularize_cost('.*/W', tf.nn.l2_loss),
        #                       name='regularize_loss')
        ''''
        # bboxes_xyz(the gt of bounding box center): B * BB * 3 (BB is the num of bounding box)
        # votes_xys: B * N * 3 (N is the number of votes)
        # when compare bboxes_xyz and votes_xyz, expand dims to B * N * BB * 3
        # after expand_dims, become B * 1 * BB * 3, B * N * 1 * 3, Tensorflow will use broadcast
        # proposals_xyz: B * PR * 3 (PR is the num of proposal)
        '''
        # vote_reg_loss
        # refer to line 61 in model.py when writing these codes
        # TODO: Here, we use the nearest center as the GT, need to implement the version that using the closest box's
        #  center as GT
        vote2proposal_center = tf.abs(
            tf.expand_dims(votes_xyz, 2) -
            tf.expand_dims(proposals_xyz, 1))  # B * N * PR * 3
        vote2proposal_center_norm = tf.norm(vote2proposal_center,
                                            axis=-1)  # B * N * PR
        votes_assignment = tf.argmin(vote2proposal_center_norm,
                                     -1,
                                     output_type=tf.int32)  # B * N, int
        votes_gt = tf.gather_nd(
            proposals_xyz,
            tf.stack([
                tf.tile(input=tf.expand_dims(
                    tf.range(tf.shape(votes_assignment)[0]), -1),
                        multiples=[1, tf.shape(votes_assignment)[1]]),
                votes_assignment
            ], 2)
        )  # gather a B * N * 3 tensor from B * PR * 3 according to a B * N(votes_assignment)
        # the indices will be B * N * 2, indices[b, n] = [b, votes_assignment[b, n]]
        votes_gt_no_gradient = tf.stop_gradient(votes_gt)
        vote_reg_loss = tf.reduce_mean(tf.norm(votes_xyz -
                                               votes_gt_no_gradient,
                                               ord=1,
                                               axis=-1),
                                       name='vote_reg_loss')

        # obj_cls_loss & box_loss
        # First decide which box it is fit with for every point
        '''
        we assume that the proposals_output is B * PR * 11(2 objectness, 3 xyz, 3 lwh, 3 angles)
        data_idx is B * P * 3 (P is the number of total points)
        we want to get pts_assignment of B * P, pts_fit_loss of B * P
        '''
        # the rotation angle of each points relative to the proposal boxes
        alphas_star = -proposals_output[:, :, 8]  # B * PR
        betas_star = -proposals_output[:, :, 9]  # B * PR
        gammas_star = -proposals_output[:, :, 10]  # B * PR
        # referring to https://en.wikipedia.org/wiki/Rotation_matrix#In_three_dimensions
        # rotation matrix
        # TODO: When do visualization, the meaning of the angles should be consistent
        b_pr = alphas_star.shape
        pr = alphas_star.shape[1]
        p = x.shape[1]
        r_alphas = tf.stack([
            tf.ones(b_pr),
            tf.zeros(b_pr),
            tf.zeros(b_pr),
            tf.zeros(b_pr),
            tf.cos(alphas_star), -tf.sin(alphas_star),
            tf.zeros(b_pr),
            tf.sin(alphas_star),
            tf.cos(alphas_star)
        ],
                            axis=2)
        r_betas = tf.stack([
            tf.cos(betas_star),
            tf.zeros(b_pr),
            tf.sin(betas_star),
            tf.zeros(b_pr),
            tf.ones(b_pr),
            tf.zeros(b_pr), -tf.sin(betas_star),
            tf.zeros(b_pr),
            tf.cos(betas_star)
        ],
                           axis=2)
        r_gammas = tf.stack([
            tf.cos(gammas_star), -tf.sin(gammas_star),
            tf.zeros(b_pr),
            tf.sin(gammas_star),
            tf.cos(gammas_star),
            tf.zeros(b_pr),
            tf.zeros(b_pr),
            tf.zeros(b_pr),
            tf.ones(b_pr)
        ],
                            axis=2)
        r_alphas = tf.reshape(r_alphas, shape=[b_pr[0], b_pr[1], 3, 3])
        r_betas = tf.reshape(r_betas, shape=[b_pr[0], b_pr[1], 3, 3])
        r_gammas = tf.reshape(r_gammas, shape=[b_pr[0], b_pr[1], 3, 3])
        r_matrix = tf.linalg.matmul(r_alphas,
                                    tf.linalg.matmul(
                                        r_betas, r_gammas))  # B * PR * 3 * 3
        r_matrix_expand = tf.expand_dims(r_matrix,
                                         axis=1)  # B * 1 * PR * 3 * 3
        r_matrix_tile = tf.tile(r_matrix_expand,
                                multiples=[1, p, 1, 1,
                                           1])  # B * P * PR * 3 * 3
        x_expand = tf.expand_dims(tf.expand_dims(x, axis=2),
                                  axis=-1)  # B * P * 1 * 3 * 1 from B * P * 3
        # here, we need column vector to do the multiplication,
        x_tile = tf.tile(x_expand, multiples=[1, 1, pr, 1,
                                              1])  # B * P * PR * 3 * 1
        rotated_data_idx = tf.squeeze(tf.linalg.matmul(
            r_matrix_tile, x_tile))  # B * P * PR * 3
        # squeeze the additional axis to get the position tensor
        pts_to_box_assignment, pts_to_box_distance = pts2box(
            rotated_data_idx, proposals_output[:, :, 2:8])
        # both are B * P & B * P
        # obj_cls_loss
        # abandon the point at the origin
        origin_index = tf.equal(tf.count_nonzero(x, axis=-1),
                                3)  # B * P, origin point will be 1
        is_not_origin = tf.tile(tf.expand_dims(
            1 - tf.cast(origin_index, dtype=tf.float32), axis=-1),
                                multiples=[1, 1, pr])  # B * P * PR
        proposal_fit_count = tf.count_nonzero(tf.math.multiply(
            tf.one_hot(pts_to_box_assignment, depth=pr), is_not_origin),
                                              axis=1)  # B * PR
        obj_gt = tf.math.greater(
            proposal_fit_count,
            config.POSITIVE_THRES_NUM)  # B * PR, 1 or positive
        obj_cls_score_gt = tf.one_hot(obj_gt, depth=2, axis=-1)  # B * PR * 2
        obj_cls_score = tf.identity(proposals_output[..., :2],
                                    'obj_scores')  # B * PR * 2
        obj_cls_loss = tf.identity(tf.reduce_min(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=obj_cls_score, labels=obj_cls_score_gt)),
                                   name='obj_cls_loss')

        # box_loss
        pos_pts_to_box_distance = pos_pts2box(rotated_data_idx,
                                              proposals_output[:, :,
                                                               2:8], obj_gt)
        box_loss = tf.math.reduce_sum(
            tf.math.multiply(pos_pts_to_box_distance, 1 - origin_index))
        # total_cost = vote_reg_loss + 0.5 * obj_cls_loss + 1. * box_loss + 0.1 * sem_cls_loss
        total_cost = vote_reg_loss + 0.5 * obj_cls_loss + 1. * box_loss
        total_cost = tf.identity(total_cost, name='total_cost')
        summary.add_moving_summary(total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
Exemplo n.º 33
0
    def _build_graph(self, inputs):
        image, label = inputs
        is_training = get_current_tower_context().is_training

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        # monkey-patch tf.get_variable to apply fw
        def binarize_weight(v):
            name = v.op.name
            # don't binarize first and last layer
            if not name.endswith('W') or 'conv0' in name or 'fc' in name:
                return v
            else:
                logger.info("Binarizing weight {}".format(v.op.name))
                return fw(v)

        def cabs(x):
            return tf.minimum(1.0, tf.abs(x), name='cabs')

        def activate(x):
            return fa(cabs(x))

        image = image / 256.0

        with remap_variables(binarize_weight), \
                argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
                argscope(Conv2D, use_bias=False, nl=tf.identity):
            logits = (LinearWrap(image)
                      .Conv2D('conv0', 48, 5, padding='VALID', use_bias=True)
                      .MaxPooling('pool0', 2, padding='SAME')
                      .apply(activate)
                      # 18
                      .Conv2D('conv1', 64, 3, padding='SAME')
                      .apply(fg)
                      .BatchNorm('bn1').apply(activate)

                      .Conv2D('conv2', 64, 3, padding='SAME')
                      .apply(fg)
                      .BatchNorm('bn2')
                      .MaxPooling('pool1', 2, padding='SAME')
                      .apply(activate)
                      # 9
                      .Conv2D('conv3', 128, 3, padding='VALID')
                      .apply(fg)
                      .BatchNorm('bn3').apply(activate)
                      # 7

                      .Conv2D('conv4', 128, 3, padding='SAME')
                      .apply(fg)
                      .BatchNorm('bn4').apply(activate)

                      .Conv2D('conv5', 128, 3, padding='VALID')
                      .apply(fg)
                      .BatchNorm('bn5').apply(activate)
                      # 5
                      .tf.nn.dropout(0.5 if is_training else 1.0)
                      .Conv2D('conv6', 512, 5, padding='VALID')
                      .apply(fg).BatchNorm('bn6')
                      .apply(cabs)
                      .FullyConnected('fc1', 10, nl=tf.identity)())
        tf.nn.softmax(logits, name='output')

        # compute the number of failed samples
        wrong = prediction_incorrect(logits, label)
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7))

        add_param_summary(('.*/W', ['histogram', 'rms']))
        self.cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(cost, wd_cost, self.cost)
    def _build_graph(self, inputs):
       
        images, truemap_coded = inputs

        orig_imgs = images

        true_np = truemap_coded[...,0]
        true_np = tf.cast(true_np, tf.int32)
        true_np = tf.identity(true_np, name='truemap-np')
        one_np  = tf.one_hot(true_np, 2, axis=-1)
        true_np = tf.expand_dims(true_np, axis=-1)

        true_dist = truemap_coded[...,1:]
        true_dist = tf.identity(true_dist, name='truemap-dist')

        ####
        with argscope(Conv2D, activation=tf.identity, use_bias=False, # K.he initializer
                      W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \
                argscope([Conv2D, BatchNorm], data_format=self.data_format):

            i = tf.transpose(images, [0, 3, 1, 2])
            i = i if not self.input_norm else i / 255.0

            ####
            d = encoder(i, self.freeze)
            d[0] = crop_op(d[0], (184, 184))
            d[1] = crop_op(d[1], (72, 72))

            ####
            np_feat = decoder('np', d)
            np = BNReLU('preact_out_np', np_feat[-1])

            dist_feat = decoder('dst', d)
            dist = BNReLU('preact_out_dist', dist_feat[-1])

            ####
            logi_np = Conv2D('conv_out_np', np, 2, 1, use_bias=True, activation=tf.identity)
            logi_np = tf.transpose(logi_np, [0, 2, 3, 1])
            soft_np = tf.nn.softmax(logi_np, axis=-1)
            prob_np = tf.identity(soft_np[...,1], name='predmap-prob-np')
            prob_np = tf.expand_dims(prob_np, axis=-1)
            pred_np = tf.argmax(soft_np, axis=-1, name='predmap-np')
            pred_np = tf.expand_dims(tf.cast(pred_np, tf.float32), axis=-1)

            ####
            logi_dist = Conv2D('conv_out_dist', dist, 1, 1, use_bias=True, activation=tf.identity)
            logi_dist = tf.transpose(logi_dist, [0, 2, 3, 1])
            prob_dist = tf.identity(logi_dist, name='predmap-prob-dist')
            pred_dist = tf.identity(logi_dist, name='predmap-dist')

            # encoded so that inference can extract all output at once
            predmap_coded = tf.concat([prob_np, pred_dist], axis=-1, name='predmap-coded')
        ####

        ####
        if get_current_tower_context().is_training:
            ######## LOSS
            ### Distance regression loss
            loss_mse = pred_dist - true_dist
            loss_mse = loss_mse * loss_mse
            loss_mse = tf.reduce_mean(loss_mse, name='loss-mse')
            add_moving_summary(loss_mse)   

            ### Nuclei Blob classification loss
            loss_bce = categorical_crossentropy(soft_np, one_np)
            loss_bce = tf.reduce_mean(loss_bce, name='loss-bce')
            add_moving_summary(loss_bce)

            ### combine the loss into single cost function
            self.cost = tf.identity(loss_mse + loss_bce, name='overall-loss')            
            add_moving_summary(self.cost)
            ####

            add_param_summary(('.*/W', ['histogram']))   # monitor W

            #### logging visual sthg
            orig_imgs = tf.cast(orig_imgs  , tf.uint8)
            tf.summary.image('input', orig_imgs, max_outputs=1)

            orig_imgs = crop_op(orig_imgs, (190, 190), "NHWC")

            pred_np = colorize(prob_np[...,0], cmap='jet')
            true_np = colorize(true_np[...,0], cmap='jet')

            pred_dist = colorize(prob_dist[...,0], cmap='jet')
            true_dist = colorize(true_dist[...,0], cmap='jet')

            viz = tf.concat([orig_imgs, 
                            true_np, pred_np, 
                            true_dist, pred_dist,], 2)

            tf.summary.image('output', viz, max_outputs=1)

        return
Exemplo n.º 35
0
    def _build_graph(self, inputs):
        """This function should build the model which takes the input variables
        and define self.cost at the end"""

        # inputs contains a list of input variables defined above
        input1, input2, input3, input4, label = inputs

        n_hidden = 64
        n_classes = 5
        n_steps = 25
        output_num = 4

        with tf.name_scope('sensor1'):
            output1 = LSTM_Network('sensor1', input1, n_steps, n_hidden,
                                   output_num, 1)

        with tf.name_scope('sensor2'):
            output2 = LSTM_Network('sensor2', input2, n_steps, n_hidden,
                                   output_num, 17)

        with tf.name_scope('sensor3'):
            output3 = LSTM_Network('sensor3', input3, n_steps, n_hidden,
                                   output_num, 17)

        with tf.name_scope('sensor4'):
            output4 = LSTM_Network('sensor4', input4, n_steps, n_hidden,
                                   output_num, 17)


        logits = Connect('cloud', [output1, output2, output3, output4]) \
        .FullyConnected('fc1', 256, activation=tf.nn.relu) \
        .FullyConnected('fc2', 5, activation=tf.identity)()

        tf.nn.softmax(logits, name='prob')  # a Bx10 with probabilities

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(
            cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1),
                          tf.float32,
                          name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error (in a moving_average fashion):
        # 1. write the value to tensosrboard
        # 2. write the value to stat.json
        # 3. print the value after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')

        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, self.cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
    def build_graph(
        self,
        cam1: tf.Tensor,
        cam2: tf.Tensor,
        mask: tf.Tensor,
        normal_gt: tf.Tensor,
        depth_gt: tf.Tensor,
    ):
        with tf.variable_scope("prepare"):
            repeat = [1 for _ in range(len(mask.shape))]
            repeat[-1] = 3

            tbutil.two_side_by_side(cam1, cam2, "input", 5)
            mask3 = tf.tile(mask, repeat)

        normal, depth = self.network_architecture(cam1, cam2, mask)

        with tf.variable_scope("loss"):
            with tf.variable_scope("normal"):
                normal_loss = tf.reduce_mean(
                    masked_loss(l1_loss(normal_gt * 2 - 1, normal * 2 - 1),
                                mask3),
                    name="normal_loss",
                )
                add_moving_summary(normal_loss)
                tf.losses.add_loss(normal_loss, tf.GraphKeys.LOSSES)
                tbutil.two_side_by_side(normal_gt, normal, "normal", 5)

            with tf.variable_scope("depth"):
                depth_loss = tf.reduce_mean(masked_loss(
                    l1_loss(depth_gt, depth), mask),
                                            name="depth_loss")
                add_moving_summary(depth_loss)
                tf.losses.add_loss(depth_loss, tf.GraphKeys.LOSSES)
                tbutil.two_side_by_side(depth_gt, depth, "depth", 5)

            if self.enable_consistency:
                with tf.variable_scope("consistency"):

                    near = uncompressDepth(1)
                    far = uncompressDepth(0)
                    d = uncompressDepth(depth)
                    h = tf.div_no_nan(d - near, far - near)

                    sobel = tf.image.sobel_edges(
                        h)  # b,h,w,1,[dy,dx] - 1 because height has 1 channel
                    dx = sobel[:, :, :, :, 1]  # b,h,w,1
                    dy = -sobel[:, :, :, :, 0]
                    # We're using a depth map instead of a height. Which means bright
                    # values are at a greater depth. Thus, we need to invert the gradient
                    texelSize = 1 / self.imgSize
                    dz = tf.ones_like(dx) * texelSize * 2

                    n = normalize(tf.concat([dx, dy, dz], -1))
                    n = n * 0.5 + 0.5
                    consistency = masked_loss(l2_loss(n, normal), mask3)

                    consistency_loss = (
                        tf.reduce_mean(consistency, name="consistency_loss") *
                        self.consistency_loss)
                    add_moving_summary(consistency_loss)
                    tf.losses.add_loss(consistency_loss, tf.GraphKeys.LOSSES)

                    tbutil.four_side_by_side(tf.tile(depth, repeat), n, normal,
                                             consistency, "consistency", 5)

        self.cost = tf.losses.get_total_loss(name="total_costs")

        add_moving_summary(self.cost)
        add_param_summary((".*/W", ["histogram"]))  # monitor W

        return self.cost
Exemplo n.º 37
0
    def build_graph(self, image, label):
        """This function should build the model which takes the input variables
        and return cost at the end"""

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.layers.flatten(image)
        # image = image * 2 - 1   # center the pixels values at zero
        # The context manager `argscope` sets the default option for all the layers under
        # this context. Here we use 32 channel convolution with shape 3x3
        with tf.variable_scope('encoder'):
            x = FullyConnected('fc1', image, 1000, activation=tf.nn.relu)
            x = FullyConnected('fc2', x, 1000, activation=tf.nn.relu)
            mu = tf.identity(FullyConnected('fc_mu', x, 2, activation=None),
                             'mu')
            logvar = FullyConnected('fc_var', x, 2, activation=None)

        eps = tf.random_normal((tf.shape(x)[0], 2))
        z = tf.identity(eps * tf.exp(0.5 * logvar) + mu, name='z')

        z_real = tf.random_normal((tf.shape(x)[0], 2))
        self.f_loss = tf.reduce_mean(
            -tf.reduce_sum(self.f(z_real) - self.f(z), -1), name='f_loss')
        self.g_loss = tf.reduce_mean(tf.reduce_sum(-self.f(z), -1),
                                     name='g_loss')

        z_interp = tf.stop_gradient(z) + tf.random_uniform(
            (tf.shape(x)[0], 1)) * (z_real - tf.stop_gradient(z))
        gradient_f = tf.gradients(self.f(z_interp), [z_interp])[0]
        gp_loss = tf.reduce_mean(tf.maximum(
            tf.norm(gradient_f, axis=-1) - 1, 0)**2,
                                 name='gp_loss')

        self.f_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'f')
        self.g_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                        'encoder')

        with tf.variable_scope('decoder'):
            x = FullyConnected('fc1', z, 1000, activation=tf.nn.relu)
            x = FullyConnected('fc2', x, 1000, activation=tf.nn.relu)
            rec = tf.identity(
                FullyConnected('fc_rec',
                               x,
                               IMAGE_SIZE * IMAGE_SIZE,
                               activation=tf.nn.sigmoid), 'rec')

        kl_loss = -tf.reduce_sum(1 + logvar - mu * mu - tf.exp(logvar), -1)
        kl_loss = tf.reduce_mean(kl_loss, name='kl_loss')

        rec_loss = tf.reduce_mean(tf.reduce_sum(tf.square(rec - image), -1),
                                  name='rec_loss')
        # total_cost = rec_loss + kl_loss
        total_cost = rec_loss + gp_loss

        summary.add_moving_summary(rec_loss, kl_loss, self.f_loss, self.g_loss,
                                   gp_loss)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
Exemplo n.º 38
0
    def _build_graph(self, inputs):
        image, edgemap = inputs
        image = image - tf.constant([104, 116, 122], dtype='float32')
        edgemap = tf.expand_dims(edgemap, 3, name='edgemap4d')

        def branch(name, l, up):
            with tf.variable_scope(name):
                l = Conv2D('convfc', l, 1, kernel_shape=1, nl=tf.identity,
                           use_bias=True,
                           W_init=tf.constant_initializer(),
                           b_init=tf.constant_initializer())
                while up != 1:
                    l = BilinearUpSample('upsample{}'.format(up), l, 2)
                    up = up / 2
                return l

        with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
            l = Conv2D('conv1_1', image, 64)
            l = Conv2D('conv1_2', l, 64)
            b1 = branch('branch1', l, 1)
            l = MaxPooling('pool1', l, 2)

            l = Conv2D('conv2_1', l, 128)
            l = Conv2D('conv2_2', l, 128)
            b2 = branch('branch2', l, 2)
            l = MaxPooling('pool2', l, 2)

            l = Conv2D('conv3_1', l, 256)
            l = Conv2D('conv3_2', l, 256)
            l = Conv2D('conv3_3', l, 256)
            b3 = branch('branch3', l, 4)
            l = MaxPooling('pool3', l, 2)

            l = Conv2D('conv4_1', l, 512)
            l = Conv2D('conv4_2', l, 512)
            l = Conv2D('conv4_3', l, 512)
            b4 = branch('branch4', l, 8)
            l = MaxPooling('pool4', l, 2)

            l = Conv2D('conv5_1', l, 512)
            l = Conv2D('conv5_2', l, 512)
            l = Conv2D('conv5_3', l, 512)
            b5 = branch('branch5', l, 16)

        final_map = Conv2D('convfcweight',
                           tf.concat([b1, b2, b3, b4, b5], 3), 1, 1,
                           W_init=tf.constant_initializer(0.2),
                           use_bias=False, nl=tf.identity)
        costs = []
        for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]):
            output = tf.nn.sigmoid(b, name='output{}'.format(idx + 1))
            xentropy = class_balanced_sigmoid_cross_entropy(
                b, edgemap,
                name='xentropy{}'.format(idx + 1))
            costs.append(xentropy)

        # some magic threshold
        pred = tf.cast(tf.greater(output, 0.5), tf.int32, name='prediction')
        wrong = tf.cast(tf.not_equal(pred, edgemap), tf.float32)
        wrong = tf.reduce_mean(wrong, name='train_error')

        if get_current_tower_context().is_training:
            wd_w = tf.train.exponential_decay(2e-4, get_global_step_var(),
                                              80000, 0.7, True)
            wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost')
            costs.append(wd_cost)

            add_param_summary(('.*/W', ['histogram']))   # monitor W
            self.cost = tf.add_n(costs, name='cost')
            add_moving_summary(costs + [wrong, self.cost])
Exemplo n.º 39
0
    def build_graph(self, image, label):
        image = image / 128.0
        assert tf.test.is_gpu_available()
        image = tf.transpose(image, [0, 3, 1, 2])

        def residual(name, l, increase_dim=False, first=False):
            shape = l.get_shape().as_list()
            in_channel = shape[1]

            if increase_dim:
                out_channel = in_channel * 2
                stride1 = 2
            else:
                out_channel = in_channel
                stride1 = 1

            with tf.variable_scope(name):
                b1 = l if first else BNReLU(l)
                c1 = Conv2D('conv1', b1, out_channel, strides=stride1, activation=BNReLU)
                c2 = Conv2D('conv2', c1, out_channel)
                if increase_dim:
                    l = AvgPooling('pool', l, 2)
                    l = tf.pad(l, [[0, 0], [in_channel // 2, in_channel // 2], [0, 0], [0, 0]])

                l = c2 + l
                return l

        with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='channels_first'), \
                argscope(Conv2D, use_bias=False, kernel_size=3,
                         kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
            l = Conv2D('conv0', image, 16, activation=BNReLU)
            l = residual('res1.0', l, first=True)
            for k in range(1, self.n):
                l = residual('res1.{}'.format(k), l)
            # 32,c=16

            l = residual('res2.0', l, increase_dim=True)
            for k in range(1, self.n):
                l = residual('res2.{}'.format(k), l)
            # 16,c=32

            l = residual('res3.0', l, increase_dim=True)
            for k in range(1, self.n):
                l = residual('res3.' + str(k), l)
            l = BNReLU('bnlast', l)
            # 8,c=64
            l = GlobalAvgPooling('gap', l)

        logits = FullyConnected('linear', l, 10)
        tf.nn.softmax(logits, name='output')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='wrong_vector')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        # weight decay on all W of fc layers
        wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
                                          480000, 0.2, True)
        wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost')
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/W', ['histogram']))   # monitor W
        return tf.add_n([cost, wd_cost], name='cost')