Exemplo n.º 1
0
def evaluate(model, loader, n_class, device, dtype, iter_idx, writer):
    hist = np.zeros((n_class, n_class))

    for batch_idx, (data, target) in enumerate(loader):
        data = data.to(device=device, dtype=dtype)

        with torch.no_grad():
            output = model(data)
            _, h, w = target.shape
            output = torch.nn.functional.interpolate(output,
                                                     size=(h, w),
                                                     mode='bilinear',
                                                     align_corners=True)

        output, target = output.data.cpu().numpy(), target.data.cpu().numpy()
        output = np.argmax(output, axis=1)
        hist += fast_hist(target.flatten(), output.flatten(), n_class)

        if batch_idx == 0:
            writer.add_image(
                'val/input',
                vutils.make_grid(data,
                                 normalize=True,
                                 scale_each=True,
                                 padding=0), iter_idx)
            writer.add_image('val/output', decode_labels(output[0]), iter_idx)
            writer.add_image('val/gt', decode_labels(target[0]), iter_idx)

    m_iou = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
    return np.sum(m_iou) / len(m_iou)
Exemplo n.º 2
0
def save_train_result(args, step, images, labels, preds):
    fig, axes = plt.subplots(args.save_num_images, 2, figsize=(16, 12))
    for i in xrange(args.save_num_images):
        cv2.imwrite(args.save_dir + str(step) + "_%d.png" % i,
                    (images[i] + IMG_MEAN)[:, :, ::-1].astype(np.uint8))

        axes.flat[i * 2 + 0].set_title('mask')
        axes.flat[i * 2 + 0].imshow(decode_labels(labels[i, :, :, 0]))

        axes.flat[i * 2 + 1].set_title('pred')
        axes.flat[i * 2 + 1].imshow(decode_labels(preds[i, :, :, 0]))

    plt.savefig(args.save_dir + str(step) + ".png")
    plt.close(fig)
Exemplo n.º 3
0
def save_val_result(args,step,images,labels,preds,i):
    for j in range(BATCH_SIZE):
        fig, axes = plt.subplots(1, 2, figsize=(16, 12))
        if j < 1:
            cv2.imwrite(args.save_dir + str(step) + '_' + str(i * BATCH_SIZE + j) + "test_img.png",
                        (images[j] + IMG_MEAN)[:, :, ::-1].astype(np.uint8))

        axes.flat[0].set_title('mask')
        axes.flat[0].imshow(decode_labels(labels[j, :, :, 0]))

        axes.flat[1].set_title('pred')
        axes.flat[1].imshow(decode_labels(preds[j, :, :, 0]))

        plt.savefig(args.save_dir + str(step) + '_' + str(i * BATCH_SIZE + j) + "test.png")
        plt.close(fig)
Exemplo n.º 4
0
def main():
    args = docopt(docstr, version='v0.1')
    print(args)

    gpu0 = int(args['--gpu0'])

    model = deeplab_resnet.Res_Deeplab(21, True, 4, 1e-2)
    model.load_state_dict(torch.load(args['--snapshots']))
    model.eval().cuda(gpu0)

    im_path = args['--img_path']

    img = cv2.imread(im_path).astype(float)
    img_original = img.copy() / 255.0
    img[:, :, 0] = img[:, :, 0] - 104.008
    img[:, :, 1] = img[:, :, 1] - 116.669
    img[:, :, 2] = img[:, :, 2] - 122.675

    with torch.no_grad():
        output = model(*[torch.from_numpy(i[np.newaxis, :].transpose(0, 3, 1, 2)).float().cuda(gpu0) for i in  [img, img_original]])
    output = output.cpu().data[0].numpy().transpose(1, 2, 0)
    output = np.argmax(output, axis=2)

    vis_output = decode_labels(output)

    output_directory = os.path.dirname(im_path)
    output_name = os.path.splitext(os.path.basename(im_path))[0]
    save_path = os.path.join(output_directory, '{}_labels.png'.format(output_name))
    imsave(save_path, vis_output)
Exemplo n.º 5
0
def main():
    args = docopt(docstr, version='v0.1')
    print(args)

    gpu0 = int(args['--gpu0'])
    im_path = args['--testIMpath']
    gt_path = args['--testGTpath']

    model = deeplab_resnet.Res_Deeplab(int(args['--NoLabels']), args['--dgf'],
                                       4, 1e-2)
    model.eval().cuda(gpu0)

    img_list = open('data/list/val.txt').readlines()
    saved_state_dict = torch.load(args['--snapshots'])
    model.load_state_dict(saved_state_dict)

    save_path = os.path.join('data', args['--exp'])
    if not os.path.isdir(save_path):
        os.makedirs(save_path)

    max_label = int(args['--NoLabels']) - 1  # labels from 0,1, ... 20(for VOC)
    hist = np.zeros((max_label + 1, max_label + 1))
    for idx, i in enumerate(img_list):
        print('{}/{} ...'.format(idx + 1, len(img_list)))

        img = cv2.imread(os.path.join(im_path, i[:-1] + '.jpg')).astype(float)
        img_original = img.copy() / 255.0
        img[:, :, 0] = img[:, :, 0] - 104.008
        img[:, :, 1] = img[:, :, 1] - 116.669
        img[:, :, 2] = img[:, :, 2] - 122.675

        if args['--dgf']:
            inputs = [img, img_original]
        else:
            inputs = [np.zeros((513, 513, 3))]
            inputs[0][:img.shape[0], :img.shape[1], :] = img

        with torch.no_grad():
            output = model(*[
                torch.from_numpy(i[np.newaxis, :].transpose(
                    0, 3, 1, 2)).float().cuda(gpu0) for i in inputs
            ])
        if not args['--dgf']:
            interp = nn.Upsample(size=(513, 513),
                                 mode='bilinear',
                                 align_corners=True)
            output = interp(output)
            output = output[:, :, :img.shape[0], :img.shape[1]]

        output = output.cpu().data[0].numpy().transpose(1, 2, 0)
        output = np.argmax(output, axis=2)

        vis_output = decode_labels(output)
        imsave(os.path.join(save_path, i[:-1] + '.png'), vis_output)

        gt = cv2.imread(os.path.join(gt_path, i[:-1] + '.png'), 0)
        hist += fast_hist(gt.flatten(), output.flatten(), max_label + 1)

    miou = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
    print("Mean iou = ", np.sum(miou) / len(miou))
Exemplo n.º 6
0
def main():
    args = get_arguments()
    # Read Image
    img = tf.image.decode_jpeg(tf.read_file(args.img_path), channels=3)
    # Convert RGB to BGR
    red, green, blue = tf.split(axis=2, num_or_size_splits=3, value=img)
    img = tf.cast(tf.concat(axis=2, values=[blue, green, red]),
                  dtype=tf.float32)
    # Extract mean
    img -= IMG_MEAN

    # Create Network
    net = DeepLabResNetModel(tf.expand_dims(img, dim=0), ModeKeys.TRAIN,
                             args.num_classes, args.atrous_blocks)

    # Predictions
    raw_output = net.output
    raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(img)[0:2, ])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # Init
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        restorer = tf.train.Saver()
        load_model(restorer, sess, args.model_weights)
        preds = sess.run(pred)
        msk = decode_labels(preds, num_classes=args.num_classes)
        im = Image.fromarray(msk[0])
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)
        im.save(args.save_dir + 'mask.png')

        print 'Image saved'
Exemplo n.º 7
0
def plot_images(imdata,
                lbl_preds,
                lbl_trues,
                epoch,
                split='test',
                crop_size=None):
    img = unNormalize(imdata.data).cpu().numpy()[0] * 255
    img = img.astype(np.uint8)  # (c, h, w)
    pred = []
    gt = []
    for lbl_pred, lbl_true in zip(lbl_preds, lbl_trues):
        predtmp = decode_labels(lbl_pred, num_images=1)[0]  # (h, w, c)
        gttmp = decode_labels(lbl_true, num_images=1)[0]
        pred.append(np.moveaxis(predtmp, 2, 0))  # (c, h, w)
        gt.append(np.moveaxis(gttmp, 2, 0))

    if crop_size is not None:  # center crop
        _, h, w = img.shape
        tx, ty, bx, by = 0, 0, w, h
        ch, cw = crop_size
        if ch > h:
            ty, by = 0, 0
        else:
            ty, by = (h - ch) // 2, h - (h - ch) // 2
        if cw > w:
            tx, bx = 0, 0
        else:
            tx, bx = (w - cw) // 2, w - (w - cw) // 2
        img = img[:, ty:by, tx:bx]
        for i in range(len(pred)):
            pred[i] = pred[i][:, ty:by, tx:bx]
            gt[i] = gt[i][:, ty:by, tx:bx]

    grid_imgs = [img]
    for g, p in zip(gt, pred):
        grid_imgs = grid_imgs + [g] + [p]
    plotter.plot_images(grid_imgs,
                        split,
                        epoch,
                        nrow=7,
                        exp_name=args.name + '_' + args.dataset)
Exemplo n.º 8
0
def main(test_data_dir, save_img=False):
    if save_img and not os.path.exists(IMG_DIR):
        os.makedirs(IMG_DIR)
        print('INFO : IMG_DIR does not exists, create a directory!')

    mha_list = read_mha_files(test_file)
    length = len(mha_list) * 155

    image_batch = net_inputs_test(batch_size, test_data_dir)
    image_batch = tf.cast(image_batch, tf.float32)

    output = Model(image_batch, dcr_type, dilated_rates=dilated_rates)
    out_mask = tf.expand_dims(
        tf.cast(tf.arg_max(output, dimension=3), tf.uint8), -1)
    out_mask = tf.image.resize_images(out_mask, [240, 240])

    with tf.Session() as sess:
        mha = []

        ckpt = tf.train.get_checkpoint_state(CKPT_PATH)
        if not ckpt:
            raise RuntimeError('No Checkpoint Found !')
        else:
            saver = tf.train.Saver()
            ckpt_path = ckpt.model_checkpoint_path
            saver.restore(sess, ckpt_path)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        for step in range(1, length + 1):
            stdout.write('\rProcessing {} / {} ...'.format(step, length))

            _out_mask = sess.run(out_mask)
            _out_mask = np.round(_out_mask).astype(np.uint8)
            re_img = _out_mask.reshape([240, 240, 1])
            mha.append(re_img)

            if save_img:
                img = utils.decode_labels(
                    np.round(_out_mask).astype(np.uint8)).reshape(
                        [240, 240, 3])
                scipy.misc.imsave('{}/{}.jpg'.format(IMG_DIR, step), img)

            if step % 155 == 0 and step != 0:
                file_id = mha_list.pop().split(',')[0].strip()[1:-1].split(
                    '.')[-2]
                mha_name = ''.join(['VSD.Seg_HG_001.', str(file_id), '.mha'])
                mha = []

        coord.request_stop()
        coord.join(threads)
        print()
    def predict(self):
        self.predict_setup()

        self.sess.run(tf.global_variables_initializer())
        self.sess.run(tf.local_variables_initializer())

        # load checkpoint
        #checkpointfile = self.conf.modeldir+ '/model.ckpt-' + str(self.conf.valid_step)
        #checkpointfile = 'deeplab_resnet_init.ckpt'
        checkpointfile = tf.train.latest_checkpoint("./model_multigpu_bs10/")
        #checkpointfile = './model_crf_test0'+ '/model.ckpt-' + '0'
        self.load(self.loader, checkpointfile)

        # Start queue threads.
        threads = tf.train.start_queue_runners(coord=self.coord,
                                               sess=self.sess)

        # img_name_list
        image_list, _ = read_labeled_image_list('', self.conf.test_data_list)

        # Predict!
        for step in range(self.conf.test_num_steps):
            preds = self.sess.run(self.pred)
            resized_dec = self.sess.run(self.resized_decoder100)
            img_name = image_list[step].split('/')[2].split('.')[0]
            # Save raw predictions, i.e. each pixel is an integer between [0,20].
            im = Image.fromarray(preds[0, :, :, 0], mode='L')
            filename = '/%s_mask.png' % (img_name)
            im.save(self.conf.out_dir + '/prediction' + filename)
            #resized = Image.fromarray(resized_dec[0], mode='RGB')
            #fn = '/%s_resized_dec.png' % (img_name)
            #resized.save(self.conf.out_dir + '/resized_decoding' + fn)

            # Save predictions for visualization.
            # See utils/label_utils.py for color setting
            # Need to be modified based on datasets.
            if self.conf.visual:
                msk = decode_labels(preds, num_classes=self.conf.num_classes)
                im = Image.fromarray(msk[0], mode='RGB')
                filename = '/%s_mask_visual.png' % (img_name)
                im.save(self.conf.out_dir + '/visual_prediction' + filename)

            if step % 100 == 0:
                print('step {:d}'.format(step))

        print('The output files has been saved to {}'.format(
            self.conf.out_dir))

        # finish
        self.coord.request_stop()
        self.coord.join(threads)
Exemplo n.º 10
0
    def predict(self):
        normal_color = "\033[0;37;40m"
        self.predict_setup()

        self.sess.run(tf.global_variables_initializer())
        self.sess.run(tf.local_variables_initializer())

        # load checkpoint
        checkpointfile = self.conf.modeldir + '/model.ckpt-' + str(
            self.conf.test_step)
        self.load(self.loader, checkpointfile)

        # Start queue threads.
        threads = tf.train.start_queue_runners(coord=self.coord,
                                               sess=self.sess)

        # img_name_list
        image_list, _ = read_labeled_image_list('', self.conf.test_data_list)

        # Predict!
        for step in range(self.conf.test_num_steps):
            preds, raw_preds = self.sess.run([self.pred, raw_output_])
            img_name = image_list[step].split('/')[2].split('.')[0]

            # Save raw predictions, i.e. each pixel is an integer between [0,20].
            prior1 = self.conf.prior
            im = Image.fromarray(preds[0, :, :, 0], mode='L')
            filename = '/%s_mask.png' % (img_name)
            im.save(self.conf.out_dir + '/prediction' + '/' + str(prior1) +
                    filename)

            # Save predictions for visualization.
            # See utils/label_utils.py for color setting
            # Need to be modified based on datasets.
            if self.conf.visual:
                msk = decode_labels(preds, num_classes=self.conf.num_classes)
                im = Image.fromarray(msk[0], mode='RGB')
                filename = '/%s_mask_visual.png' % (img_name)
                im.save(self.conf.out_dir + '/visual_prediction' + '/' +
                        str(prior1) + filename)

            if step % 100 == 0:
                print('step {:d}'.format(step))

        print(
            'The output files has been saved to {}'.format(self.conf.out_dir) +
            normal_color)

        # finish
        self.coord.request_stop()
        self.coord.join(threads)
Exemplo n.º 11
0
def image_summary(image,
                  truth,
                  prediction,
                  image_mean,
                  image_std=None,
                  num_classes=2,
                  max_output=10):
    """

    :param image: 4-D array(N, H, W, 3)
    :param truth: 4-D array(N, H, W, 1)
    :param prediction: 4-D array(N, H, W, 1)
    :param image_mean: [B,G,R]
    :param image_std: [B,G,R]
    :param num_classes: scalar
    :param max_output: scalar, must be less than N
    :return: 4-D array(max_output, H, 3*W, 3)
    """
    images = inv_preprocess(image, max_output, image_mean, image_std)
    labels = decode_labels(truth, max_output, num_classes)
    predictions = decode_labels(prediction, max_output, num_classes)

    return np.concatenate([images, labels, predictions], axis=2)
    def predict(self):
        self.predict_setup()

        self.sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])

        # checkpoing_file
        checkpoint_file = tf.train.latest_checkpoint(self.conf.modeldir)
        if (not os.path.exists("{}.meta".format(checkpoint_file))) and (self.conf.pretrain_file is not None):
            self.load(self.loader, self.conf.pretrain_file)
        elif os.path.exists("{}.meta".format(checkpoint_file)):
            self.load(self.loader, checkpoint_file)

        # Start queue threads.
        threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess)

        # img_name_list
        image_list, _ = read_labeled_image_list('', self.conf.test_data_list)

        # Predict!
        for step in range(self.conf.test_num_steps):
            preds = self.sess.run(self.pred)

            img_name = image_list[step].split('/')[2].split('.')[0]
            # Save raw predictions, i.e. each pixel is an integer between [0,20].
            im = Image.fromarray(preds[0, :, :, 0], mode='L')
            filename = '/%s_mask.png' % (img_name)
            im.save(self.conf.out_dir + '/prediction' + filename)

            # Save predictions for visualization.
            if self.conf.visual:
                msk = decode_labels(preds, num_classes=self.conf.num_classes)
                im = Image.fromarray(msk[0], mode='RGB')
                im.save(self.conf.out_dir + '/visual_prediction' + '/{}_mask_visual.png'.format(img_name))
                # 原图
                origin_image = "{}{}".format(self.conf.data_dir, image_list[step])
                im = Image.open(origin_image)
                filename, ext = os.path.splitext(filename)
                im.save(self.conf.out_dir + '/visual_prediction' + filename + "_original" + ext)

            if step % 100 == 0:
                print('step {:d}'.format(step))

        print('The output files has been saved to {}'.format(self.conf.out_dir))

        # finish
        self.coord.request_stop()
        self.coord.join(threads)
        pass
Exemplo n.º 13
0
    def predict(self):
        self.predict_setup()

        self.sess.run([
            tf.local_variables_initializer(),
            tf.global_variables_initializer()
        ])

        # load checkpoint
        self.load(
            self.loader,
            self.conf.modeldir + '/model.ckpt-' + str(self.conf.valid_step))

        # Start queue threads.
        threads = tf.train.start_queue_runners(coord=self.coord,
                                               sess=self.sess)

        # img_name_list
        image_list, _ = read_labeled_image_list('', self.conf.test_data_list)

        # Predict!
        for step in range(self.conf.test_num_steps):
            preds = self.sess.run(self.pred)

            img_name = image_list[step].split('/')[2].split('.')[0]
            # Save raw predictions, i.e. each pixel is an integer between [0,20].
            im = Image.fromarray(preds[0, :, :, 0], mode='L')
            im.save(self.conf.out_dir +
                    '/prediction/{}_mask.png'.format(img_name))

            if self.conf.visual:
                msk = decode_labels(preds, num_classes=self.conf.num_classes)
                im = Image.fromarray(msk[0], mode='RGB')
                im.save(
                    self.conf.out_dir +
                    '/visual_prediction/{}_mask_visual.png'.format(img_name))

            if step % 100 == 0:
                print('step {:d}'.format(step))

        print('The output files has been saved to {}'.format(
            self.conf.out_dir))

        # finish
        self.coord.request_stop()
        self.coord.join(threads)
        pass
	def predict(self):
		self.predict_setup()

		self.sess.run(tf.global_variables_initializer())
		self.sess.run(tf.local_variables_initializer())

		# load checkpoint
		checkpointfile = self.conf.modeldir+ '/model.ckpt-' + str(self.conf.valid_step)
		self.load(self.loader, checkpointfile)

		# Start queue threads.
		threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess)

		# img_name_list
		image_list, _ = read_labeled_image_list('', self.conf.test_data_list)

		# Predict!
		for step in range(self.conf.test_num_steps):
			preds = self.sess.run(self.pred)

			img_name = image_list[step].split('/')[2].split('.')[0]
			# Save raw predictions, i.e. each pixel is an integer between [0,20].
			im = Image.fromarray(preds[0,:,:,0], mode='L')
			filename = '/%s_mask.png' % (img_name)
			im.save(self.conf.out_dir + '/prediction' + filename)

			# Save predictions for visualization.
			# See utils/label_utils.py for color setting
			# Need to be modified based on datasets.
			if self.conf.visual:
				msk = decode_labels(preds, num_classes=self.conf.num_classes)
				im = Image.fromarray(msk[0], mode='RGB')
				filename = '/%s_mask_visual.png' % (img_name)
				im.save(self.conf.out_dir + '/visual_prediction' + filename)

			if step % 100 == 0:
				print('step {:d}'.format(step))

		print('The output files has been saved to {}'.format(self.conf.out_dir))

		# finish
		self.coord.request_stop()
		self.coord.join(threads)
Exemplo n.º 15
0
    def build_computational_graph(self):
        """Build the ICNet with ResNet50 backbone. and 4 level PSP module."""
        def bottleneck_module(inputs,
                              lvl,
                              pad,
                              is_training,
                              filters,
                              strides,
                              data_format='channels_last',
                              bottleneck_factor=4):
            """
            Implement the bottleneck module proposed in ResNet.
            1x1 conv -> 3x3 conv -> 1x1 conv
            """

            # 1x1 reduce component
            x = tf.layers.conv2d(inputs,
                                 filters=filters // bottleneck_factor,
                                 kernel_size=1,
                                 strides=strides,
                                 data_format=data_format,
                                 use_bias=False,
                                 name="conv{}_1x1_reduce".format(lvl))
            x = tf.layers.batch_normalization(
                x,
                momentum=0.95,
                epsilon=1e-5,
                training=is_training,
                name="conv{}_1x1_reduce_bn".format(lvl))
            x = tf.nn.relu(x)

            # 3x3 component
            x = zero_padding(x, pad)
            x = tf.layers.conv2d(x,
                                 filters=filters // bottleneck_factor,
                                 kernel_size=3,
                                 strides=1,
                                 dilation_rate=pad,
                                 data_format=data_format,
                                 use_bias=False,
                                 name="conv{}_3x3".format(lvl))
            x = tf.layers.batch_normalization(x,
                                              momentum=0.95,
                                              epsilon=1e-5,
                                              training=is_training,
                                              name="conv{}_3x3_bn".format(lvl))
            x = tf.nn.relu(x)

            # 1x1 increase component
            x = tf.layers.conv2d(x,
                                 filters=filters,
                                 kernel_size=1,
                                 strides=1,
                                 data_format=data_format,
                                 use_bias=False,
                                 name="conv{}_1x1_increase".format(lvl))
            x = tf.layers.batch_normalization(
                x,
                momentum=0.95,
                epsilon=1e-5,
                training=is_training,
                name="conv{}_1x1_increase_bn".format(lvl))

            # 1x1 project (if needed)
            if data_format == "channels_last":
                _, h, w, d = inputs.get_shape().as_list()
                _, hh, ww, dd = x.get_shape().as_list()
            else:
                _, d, h, w = inputs.get_shape().as_list()
                _, dd, hh, ww = x.get_shape().as_list()

            if h != hh or d != dd:
                conv_proj = tf.layers.conv2d(
                    inputs,
                    filters,
                    kernel_size=1,
                    strides=strides,
                    use_bias=False,
                    name="conv{}_1x1_proj".format(lvl))
                conv_proj_bn = tf.layers.batch_normalization(
                    conv_proj,
                    momentum=0.95,
                    epsilon=1e-5,
                    training=is_training,
                    name="conv{}_1x1_proj_bn".format(lvl))
                out = x + conv_proj_bn
            else:
                out = x + inputs

            return tf.nn.relu(out)

        def build_dilated_residual_network(input_layer):
            """Construct a 34-layer variant dilated residual network."""
            is_training = self.placeholders["is_training"]

            conv1_1 = tf.layers.conv2d(input_layer,
                                       filters=32,
                                       kernel_size=3,
                                       strides=2,
                                       padding="same",
                                       use_bias=False,
                                       name="conv1_1_3x3_s2")
            conv1_1_bn = tf.layers.batch_normalization(
                conv1_1,
                momentum=0.95,
                epsilon=1e-5,
                training=is_training,
                name="conv1_1_3x3_s2_bn")
            conv1_1_relu = tf.nn.relu(conv1_1_bn)

            conv1_2 = tf.layers.conv2d(conv1_1_relu,
                                       filters=32,
                                       kernel_size=3,
                                       strides=1,
                                       padding="same",
                                       use_bias=False,
                                       name="conv1_2_3x3")
            conv1_2_bn = tf.layers.batch_normalization(conv1_2,
                                                       momentum=0.95,
                                                       epsilon=1e-5,
                                                       training=is_training,
                                                       name="conv1_2_3x3_bn")
            conv1_2_relu = tf.nn.relu(conv1_2_bn)

            conv1_3 = tf.layers.conv2d(conv1_2_relu,
                                       filters=64,
                                       kernel_size=3,
                                       strides=1,
                                       padding="same",
                                       use_bias=False,
                                       name="conv1_3_3x3")
            conv1_3_bn = tf.layers.batch_normalization(conv1_3,
                                                       momentum=0.95,
                                                       epsilon=1e-5,
                                                       training=is_training,
                                                       name="conv1_3_3x3_bn")
            conv1_3_relu = tf.nn.relu(conv1_3_bn)

            padding0 = zero_padding(conv1_3_relu, paddings=1)
            pool1 = tf.layers.max_pooling2d(padding0,
                                            pool_size=3,
                                            strides=2,
                                            padding='valid',
                                            name="pool1")

            conv2_1_block = bottleneck_module(pool1,
                                              lvl="2_1",
                                              pad=1,
                                              is_training=is_training,
                                              filters=128,
                                              strides=1)
            conv2_2_block = bottleneck_module(conv2_1_block,
                                              lvl="2_2",
                                              pad=1,
                                              is_training=is_training,
                                              filters=128,
                                              strides=1)
            conv2_3_block = bottleneck_module(conv2_2_block,
                                              lvl="2_3",
                                              pad=1,
                                              is_training=is_training,
                                              filters=128,
                                              strides=1)

            conv3_1_block = bottleneck_module(conv2_3_block,
                                              lvl="3_1",
                                              pad=1,
                                              is_training=is_training,
                                              filters=256,
                                              strides=2)

            # We share weights for the low and med resolution levels;
            # conv3_1_sub4 is a hook into the end of med resolution level
            conv3_1_sub4 = tf.image.resize_bilinear(
                conv3_1_block,
                tf.shape(conv3_1_block)[1:-1] // 2,
                align_corners=True,
                name="conv3_1_sub4")

            conv3_2_block = bottleneck_module(conv3_1_sub4,
                                              lvl="3_2",
                                              pad=1,
                                              is_training=is_training,
                                              filters=256,
                                              strides=1)
            conv3_3_block = bottleneck_module(conv3_2_block,
                                              lvl="3_3",
                                              pad=1,
                                              is_training=is_training,
                                              filters=256,
                                              strides=1)
            conv3_4_block = bottleneck_module(conv3_3_block,
                                              lvl="3_4",
                                              pad=1,
                                              is_training=is_training,
                                              filters=256,
                                              strides=1)

            # Pad is used as dilation rate internally in bottleneck module
            conv4_1_block = bottleneck_module(conv3_4_block,
                                              lvl="4_1",
                                              pad=2,
                                              is_training=is_training,
                                              filters=512,
                                              strides=1)
            conv4_2_block = bottleneck_module(conv4_1_block,
                                              lvl="4_2",
                                              pad=2,
                                              is_training=is_training,
                                              filters=512,
                                              strides=1)
            conv4_3_block = bottleneck_module(conv4_2_block,
                                              lvl="4_3",
                                              pad=2,
                                              is_training=is_training,
                                              filters=512,
                                              strides=1)
            conv4_4_block = bottleneck_module(conv4_3_block,
                                              lvl="4_4",
                                              pad=2,
                                              is_training=is_training,
                                              filters=512,
                                              strides=1)
            conv4_5_block = bottleneck_module(conv4_4_block,
                                              lvl="4_5",
                                              pad=2,
                                              is_training=is_training,
                                              filters=512,
                                              strides=1)
            conv4_6_block = bottleneck_module(conv4_5_block,
                                              lvl="4_6",
                                              pad=2,
                                              is_training=is_training,
                                              filters=512,
                                              strides=1)

            conv5_1_block = bottleneck_module(conv4_6_block,
                                              lvl="5_1",
                                              pad=4,
                                              is_training=is_training,
                                              filters=1024,
                                              strides=1)
            conv5_2_block = bottleneck_module(conv5_1_block,
                                              lvl="5_2",
                                              pad=4,
                                              is_training=is_training,
                                              filters=1024,
                                              strides=1)
            conv5_3_block = bottleneck_module(conv5_2_block,
                                              lvl="5_3",
                                              pad=4,
                                              is_training=is_training,
                                              filters=1024,
                                              strides=1)

            return conv3_1_block, conv5_3_block

        input_shape = tf.shape(self.placeholders["image"])
        processed_image = self.placeholders["image"]
        is_training = self.placeholders["is_training"]

        with tf.variable_scope("ICNet"):

            # Assume NHWC
            data_sub2 = tf.image.resize_bilinear(processed_image,
                                                 (input_shape[1:-1] // 2),
                                                 align_corners=True,
                                                 name="data_sub2")

            conv3_1, drn = build_dilated_residual_network(data_sub2)

            # According to paper: "1/4 sized image is fed into PSPNet with
            # downsampling rate 8, resulting in a 1/32-resolution feature map".
            # However, according to author's cityscape prototxt, we feed a
            # 1/2 sized image into PSPNet with downsampling rate 16. Either way
            # results in 1/32 resolution feature map; compute those dimensions
            h, w = self.input_shape[0] // 32, self.input_shape[1] // 32
            pool_sizes = strides_list = [(h, w), (h / 2, w / 2),
                                         (h / 3, w / 3), (h / 4, w / 4)]
            # These are used to match names pretrained weights
            level_indices = [1, 2, 3, 6]
            psp = pyramid_pooling_module(drn,
                                         filters=256,
                                         pool_sizes=pool_sizes,
                                         strides_list=strides_list,
                                         level_indices=level_indices,
                                         is_training=is_training,
                                         name_prefix="conv5_3",
                                         convolve=False)

            conv5_4 = tf.layers.conv2d(psp,
                                       filters=256,
                                       kernel_size=1,
                                       strides=1,
                                       padding="same",
                                       use_bias=False,
                                       name="conv5_4_k1")
            conv5_4_bn = tf.layers.batch_normalization(conv5_4,
                                                       momentum=0.95,
                                                       epsilon=1e-5,
                                                       training=is_training,
                                                       name="conv5_4_k1_bn")
            conv5_4_bn = tf.nn.relu(conv5_4_bn)

            # Build light high resolution CNN on top of input
            conv1_sub1 = tf.layers.conv2d(processed_image,
                                          kernel_size=3,
                                          filters=32,
                                          strides=2,
                                          padding="same",
                                          use_bias=False,
                                          name="conv1_sub1")
            conv1_sub1_bn = tf.layers.batch_normalization(conv1_sub1,
                                                          momentum=0.95,
                                                          epsilon=1e-5,
                                                          training=is_training,
                                                          name="conv1_sub1_bn")
            conv1_sub1_relu = tf.nn.relu(conv1_sub1_bn)

            conv2_sub1 = tf.layers.conv2d(conv1_sub1_relu,
                                          kernel_size=3,
                                          filters=32,
                                          strides=2,
                                          padding="same",
                                          use_bias=False,
                                          name="conv2_sub1")
            conv2_sub1_bn = tf.layers.batch_normalization(conv2_sub1,
                                                          momentum=0.95,
                                                          epsilon=1e-5,
                                                          training=is_training,
                                                          name="conv2_sub1_bn")
            conv2_sub1_relu = tf.nn.relu(conv2_sub1_bn)

            conv3_sub1 = tf.layers.conv2d(conv2_sub1_relu,
                                          kernel_size=3,
                                          filters=64,
                                          strides=2,
                                          padding="same",
                                          use_bias=False,
                                          name="conv3_sub1")
            conv3_sub1_bn = tf.layers.batch_normalization(conv3_sub1,
                                                          momentum=0.95,
                                                          epsilon=1e-5,
                                                          training=is_training,
                                                          name="conv3_sub1_bn")
            conv3_sub1_relu = tf.nn.relu(conv3_sub1_bn)

            # Do cascade feature fusion for sub24
            sub24_cff_names = {
                "f1_conv": "conv_sub4",
                "f1_bn": "conv_sub4_bn",
                "f2_conv": "conv3_1_sub2_proj",
                "f2_bn": "conv3_1_sub2_proj_bn",
                "out": "sub24_sum"
            }
            conv5_4_interp, sub24_sum_relu = cascade_feature_fusion_module(
                f1=conv5_4_bn,
                f2=conv3_1,
                c3=128,
                is_training=is_training,
                names=sub24_cff_names)

            # Do cascade feature fusion for sub12
            sub12_cff_names = {
                "f1_conv": "conv_sub2",
                "f1_bn": "conv_sub2_bn",
                "f2_conv": "conv3_sub1_proj",
                "f2_bn": "conv3_sub1_proj_bn",
                "out": "sub12_sum"
            }
            sub24_sum_interp, sub12_sum_relu = cascade_feature_fusion_module(
                f1=sub24_sum_relu,
                f2=conv3_sub1_relu,
                c3=128,
                is_training=is_training,
                names=sub12_cff_names)

            # Get the sub outputs to use in cascade label guidance
            low_res_logits = tf.layers.conv2d(conv5_4_interp,
                                              kernel_size=1,
                                              filters=self.num_classes,
                                              strides=1,
                                              name="sub4_out")
            med_res_logits = tf.layers.conv2d(sub24_sum_interp,
                                              kernel_size=1,
                                              filters=self.num_classes,
                                              strides=1,
                                              name="sub24_out")

            # interpolate to output feature map size (1/4 input) and project to
            # final number of classes to get logits
            output_shape = (self.input_shape[0] // 4, self.input_shape[1] // 4)
            sub12_sum_interp = tf.image.resize_bilinear(
                sub12_sum_relu, output_shape, name="sub12_sum_interp")
            conv6_cls = tf.layers.conv2d(sub12_sum_interp,
                                         kernel_size=1,
                                         filters=self.num_classes,
                                         strides=1,
                                         name="conv6_cls")

            high_res_logits = conv6_cls

            # Upscale the logits and decode prediction to get final result.
            logits_up = tf.image.resize_bilinear(high_res_logits,
                                                 size=self.input_shape,
                                                 align_corners=True)
            logits_up_cropped = tf.image.crop_to_bounding_box(
                logits_up, 0, 0, self.input_shape[0], self.input_shape[1])

            # Create output node for evaluation
            raw_predict = tf.argmax(logits_up, axis=3)
            predict = tf.expand_dims(raw_predict, axis=3, name="predict")

            # Create output node for inference
            output_classes = tf.argmax(logits_up_cropped, axis=3)
            output = decode_labels(output_classes, self.input_shape,
                                   self.num_classes)

            return low_res_logits, med_res_logits, high_res_logits, predict, output
Exemplo n.º 16
0
    def _train_epoch(self, epoch):
        self.model.train()
        epoch_start = time.time()
        batch_start = time.time()
        train_loss = 0.
        running_metric_melons = runningScore(3)
        lr = self.optimizer.param_groups[0]['lr']

        for i, batch in enumerate(self.train_loader):
            if i >= self.train_loader_len:
                break
            self.global_step += 1
            lr = self.optimizer.param_groups[0]['lr']
            print(self.optimizer, self.config['local_rank'])

            # 数据进行转换和丢到gpu
            for key, value in batch.items():
                if value is not None:
                    if isinstance(value, torch.Tensor):
                        batch[key] = value.to(self.device)

            cur_batch_size = batch['img'].size()[0]
            # print('image name :',batch['img_name'])
            self.optimizer.zero_grad()
            preds = self.model(batch['img'])
            loss_dict = self.criterion(preds, batch)
            # backward
            if isinstance(preds, tuple):
                preds = preds[0]
            # print('preds:', preds.shape)

            # 反向传播时:在求导时开启侦测
            # print(loss_dict['loss'])
            # exit()
            reduce_loss = self.all_reduce_tensor(loss_dict['loss'])
            with torch.autograd.detect_anomaly():
                # loss.backward()
                loss_dict['loss'].backward()
            self.optimizer.step()
            if self.config['lr_scheduler']['type'] == 'WarmupPolyLR':
                self.scheduler.step()
            # acc iou
            target = batch['label']
            h, w = target.size(1), target.size(2)
            scale_pred = F.interpolate(input=preds,
                                       size=(h, w),
                                       mode='bilinear',
                                       align_corners=True)
            label_preds = torch.argmax(scale_pred, dim=1)
            running_metric_melons.update(target.data.cpu().numpy(),
                                         label_preds.data.cpu().numpy())
            score_, _ = running_metric_melons.get_scores()

            # loss 和 acc 记录到日志
            loss_str = 'loss: {:.4f}, '.format(reduce_loss.item())
            for idx, (key, value) in enumerate(loss_dict.items()):
                loss_dict[key] = value.item()
                if key == 'loss':
                    continue
                loss_str += '{}: {:.4f}'.format(key, loss_dict[key])
                if idx < len(loss_dict) - 1:
                    loss_str += ', '

            train_loss += loss_dict['loss']
            print(train_loss / self.train_loader_len,
                  self.config['local_rank'])
            acc = score_['Mean Acc']
            iou_Mean_map = score_['Mean IoU']
            if self.global_step % self.log_iter == 0:
                batch_time = time.time() - batch_start
                self.logger_info(
                    '[{}/{}], [{}/{}], global_step: {}, speed: {:.1f} samples/sec, acc: {:.4f}, iou_Mean_map: {:.4f}, {}, lr:{:.6}, time:{:.2f}'
                    .format(epoch, self.epochs, i + 1, self.train_loader_len,
                            self.global_step,
                            self.log_iter * cur_batch_size / batch_time, acc,
                            iou_Mean_map, loss_str, lr, batch_time))
                batch_start = time.time()
            # print('loss_str', loss_str)

            if self.tensorboard_enable and self.config['local_rank'] == 0:
                # write tensorboard
                for key, value in loss_dict.items():
                    self.writer.add_scalar('TRAIN/LOSS/{}'.format(key), value,
                                           self.global_step)
                    self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc,
                                           self.global_step)
                    self.writer.add_scalar('TRAIN/ACC_IOU/iou_Mean_map',
                                           iou_Mean_map, self.global_step)
                    self.writer.add_scalar('TRAIN/lr', lr, self.global_step)
                if self.global_step % self.show_images_iter == 0:
                    # show images on tensorboard
                    self.inverse_normalize(batch['img'])
                    preds_colors = decode_predictions(preds, cur_batch_size, 3)
                    self.writer.add_images('TRAIN/imgs',
                                           batch['img'][0].unsqueeze(0),
                                           self.global_step)
                    target = batch['label']
                    # (8, 256, 320, 3)

                    targets_colors = decode_labels(target, cur_batch_size, 3)
                    self.writer.add_image('TRAIN/labels',
                                          targets_colors[0],
                                          self.global_step,
                                          dataformats='HWC')
                    self.writer.add_image('TRAIN/preds',
                                          preds_colors[0],
                                          self.global_step,
                                          dataformats='HWC')
        return {
            'train_loss': train_loss / self.train_loader_len,
            'lr': lr,
            'time': time.time() - epoch_start,
            'epoch': epoch,
            'MeanIoU': iou_Mean_map
        }
Exemplo n.º 17
0
def test(test_loader, net, criterion, epoch, showall=False):
    cnn0_loss, cnn0_accs, cnn0_mIoUs, cnn0_acc_clss, cnn0_fscore = AverageMeter(
    ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
    cnn1_loss, cnn1_accs, cnn1_mIoUs, cnn1_acc_clss, cnn1_fscore = AverageMeter(
    ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
    cnn2_loss, cnn2_accs, cnn2_mIoUs, cnn2_acc_clss, cnn2_fscore = AverageMeter(
    ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()

    # switch to evaluation mode
    net.eval()
    start_time = time.time()
    for batch_idx, (datas, targets) in enumerate(test_loader):
        if args.cuda:
            datas = datas.cuda()
        datas = Variable(datas, volatile=True)

        # compute output
        scores = net(datas)

        multi_targets = combine_label(targets, COMB_DICTs)
        multi_targets_tensor = torch.from_numpy(multi_targets).long()
        if args.cuda:
            multi_targets_tensor = multi_targets_tensor.cuda()

        testlosses = []
        for i, score in enumerate(scores):
            targets_i = Variable(multi_targets_tensor[i, :, :, :])
            testlosses.append(criterion(score, targets_i))
        testloss = sum(testlosses)

        # measure accuracy and record loss
        preds = []
        for score in scores:
            p = score.data.max(1)[1]
            preds.append(p)
        for i, lbl_pred in enumerate(preds):
            lbl_pred = lbl_pred.cpu().numpy()[:, :, :]  # (n_batch, h, w)
            lbl_true = multi_targets[i, :, :, :]
            acc, acc_cls, mIoU, fscore = label_accuracy_score(
                lbl_true, lbl_pred, n_class=NUM_CLASSES[i])
            locals()['cnn%d_loss' % (i)].update(testlosses[i].data[0],
                                                datas.size(0))
            locals()['cnn%d_accs' % (i)].update(acc, datas.size(0))
            locals()['cnn%d_acc_clss' % (i)].update(acc_cls, datas.size(0))
            locals()['cnn%d_mIoUs' % (i)].update(mIoU, datas.size(0))
            locals()['cnn%d_fscore' % (i)].update(fscore, datas.size(0))

        if showall:
            trues = decode_labels(targets, num_images=len(targets))
            for i, t in enumerate(trues):
                Image.fromarray(t).save(
                    'runs/{}_{}/results/{}_{}_gt.png'.format(
                        args.name, args.dataset, batch_idx, i))
                Image.fromarray(
                    (unNormalize(datas.data).transpose(1, 2).transpose(
                        2, 3).cpu().numpy()[i] * 255).astype(np.uint8)).save(
                            'runs/{}_{}/results/{}_{}_img.png'.format(
                                args.name, args.dataset, batch_idx, i))
            lbl_pred = preds[2]
            pred = decode_labels(lbl_pred, num_images=len(lbl_pred))
            for i, p in enumerate(pred):
                Image.fromarray(p).save(
                    'runs/{}_{}/results/{}_{}_pred.png'.format(
                        args.name, args.dataset, batch_idx, i))

        if showall and args.visdom:
            plot_images(datas, [p.cpu().numpy() for p in preds],
                        multi_targets,
                        epoch,
                        split='test',
                        crop_size=map(int, args.input_size.split(',')))

    duration = time.time() - start_time
    print(
        '\nTest set: Loss: {:.4f}, Acc: {:.2f}%, mIoU: {:.4f}, Acc_cls: {:.2f}%, f-score: {:.2f}% ({:.3f} sec)\n'
        .format(cnn2_loss.avg, 100. * cnn2_accs.avg, cnn2_mIoUs.avg,
                100 * cnn2_acc_clss.avg, 100 * cnn2_fscore.avg, duration))
    if args.visdom:
        for i in range(3):
            plotter.plot('cnn%d_acc' % (i),
                         'test',
                         epoch,
                         locals()['cnn%d_accs' % (i)].avg,
                         exp_name=args.name + '_' + args.dataset)
            plotter.plot('cnn%d_loss' % (i),
                         'test',
                         epoch,
                         locals()['cnn%d_loss' % (i)].avg,
                         exp_name=args.name + '_' + args.dataset)
            plotter.plot('cnn%d_mIoU' % (i),
                         'test',
                         epoch,
                         locals()['cnn%d_mIoUs' % (i)].avg,
                         exp_name=args.name + '_' + args.dataset)
            plotter.plot('cnn%d_acc_cls' % (i),
                         'test',
                         epoch,
                         locals()['cnn%d_acc_clss' % (i)].avg,
                         exp_name=args.name + '_' + args.dataset)
            plotter.plot('cnn%d_fscore' % (i),
                         'test',
                         epoch,
                         locals()['cnn%d_fscore' % (i)].avg,
                         exp_name=args.name + '_' + args.dataset)

        # plot images in a grid
        if epoch == 1 or epoch % 10 == 0:
            plot_images(datas, [p.cpu().numpy() for p in preds],
                        multi_targets,
                        epoch,
                        split='test',
                        crop_size=map(int, args.input_size.split(',')))

    return cnn2_accs.avg, cnn2_mIoUs.avg