Exemplo n.º 1
0
def main(argv):
    # Variable parameters
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    if FLAGS.vf_h == 64 and FLAGS.vf_w == 64:
        data_folder = '../quadsplit/data/' + FLAGS.dataset + '/' + FLAGS.setname + '_batch'
    else:
        data_folder = './data/' + FLAGS.dataset + '/' + FLAGS.setname + '_batch'
    data_prefix = FLAGS.dataset + '_' + FLAGS.setname
    reader = data_reader.DataReader(data_folder, data_prefix)
    snapshot_file = os.path.join(
        FLAGS.sfolder,
        FLAGS.dataset + '_' + FLAGS.modelname + '_iter_%d.tfmodel')

    if FLAGS.dataset in ['unc', 'unc+', 'Gref']:
        visual_feat_dir = '../data/coco/visual_feat/'
    elif FLAGS.dataset == 'referit':
        visual_feat_dir = '../data/referit/visual_feat/'
    else:
        raise ValueError('Unknown dataset %s' % dataset)
    if FLAGS.vf_h == 64 and FLAGS.vf_w == 64:
        visual_feat_dir = visual_feat_dir[:-1] + '_512x512/'

    if FLAGS.mode == 'train':
        if not os.path.isdir(FLAGS.sfolder): os.makedirs(FLAGS.sfolder)
        train(reader, snapshot_file, visual_feat_dir)
    elif FLAGS.mode == 'test':
        test(reader, snapshot_file, visual_feat_dir)
    else:
        raise ValueError('Invalid mode: %s' % FLAGS.mode)
Exemplo n.º 2
0
    def train(self):
        init_op = tf.global_variables_initializer()
        self.sess.run(init_op)
        saver = tf.train.Saver()
        data_reader = dr.DataReader(self.data_dir,
                                    163446,
                                    self.batch_size,
                                    0.8,
                                    reproducible=True)
        tf.summary.image('image', self.image_in, 10)
        summary_op = tf.summary.merge_all()
        writer_train = tf.summary.FileWriter(self.log_dir + '/train',
                                             self.sess.graph)
        writer_test = tf.summary.FileWriter(self.log_dir + '/test',
                                            self.sess.graph)
        step = 1
        while data_reader.epoch < self.max_epoch:
            if step % 100 == 0:
                images, label = data_reader.next_batch(phase_train=False)
                reshaped_image = np.reshape(images,
                                            [self.batch_size, 250, 250, 3])
                feed_dict = {
                    self.image_in: reshaped_image,
                    self.label_in: label
                }
                start_time = time.time()
                err, acc, sum = self.sess.run(
                    [self.loss, self.accuracy, summary_op],
                    feed_dict=feed_dict)
                duration = time.time() - start_time
                print(
                    'Epoch:%d/%d\tTime:%.3f\tLoss:%2.4f\tAcc:%2.4f\t@[TEST]' %
                    (data_reader.current_test_batch_index, data_reader.epoch,
                     duration, err, acc))
                writer_test.add_summary(sum, step)
            else:
                images, label = data_reader.next_batch(phase_train=True)
                reshaped_image = np.reshape(images,
                                            [self.batch_size, 250, 250, 3])
                feed_dict = {
                    self.image_in: reshaped_image,
                    self.label_in: label
                }
                start_time = time.time()
                err, acc, sum, _ = self.sess.run(
                    [self.loss, self.accuracy, summary_op, self.opt],
                    feed_dict=feed_dict)
                duration = time.time() - start_time
                print('Epoch:%d/%d\tTime:%.3f\tLoss:%2.4f\tAcc:%2.4f\t' %
                      (data_reader.current_train_batch_index,
                       data_reader.epoch, duration, err, acc))
                writer_train.add_summary(sum, step)
            if step % 3268 == 0:
                if not os.path.exists(self.model_dir):
                    os.makedirs(self.model_dir)
                saver.save(self.sess, self.model_dir, step)

            step += 12
Exemplo n.º 3
0
    def testDataReader(self):
        batch_size = 5
        dataReader = data_reader.DataReader('/home/bingzhang/Documents/Dataset/CACD/data', 163446, batch_size, 0.8,
                                            True)

        # print dataReader.train_indices_set
        # print dataReader.test_indices_set
        for i in range(2):
            x, y = dataReader.next_batch(phase_train=True)
        x = np.reshape(x, [batch_size, 250, 250, 3])
        sio.savemat('testDataReader.mat', {'im': x, 'label': y})
Exemplo n.º 4
0
grads_and_vars = solver.compute_gradients(total_loss, var_list=train_var_list)
# Apply learning rate multiplication to gradients
grads_and_vars = [
    ((g if var_lr_mult[v] == 1 else tf.multiply(var_lr_mult[v], g)), v)
    for g, v in grads_and_vars
]
# Apply gradients
train_step = solver.apply_gradients(grads_and_vars, global_step=global_step)

################################################################################
# Initialize parameters and load data
################################################################################
snapshot_loader = tf.train.Saver(tf.trainable_variables())

# Load data
reader = data_reader.DataReader(data_folder, data_prefix)

snapshot_saver = tf.train.Saver()
sess = tf.Session()

# Run Initialization operations
sess.run(tf.global_variables_initializer())
snapshot_loader.restore(sess, pretrained_model)

################################################################################
# Optimization loop
################################################################################

cls_loss_avg = 0
avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0
decay = 0.99
def train(max_iter, snapshot, dataset, setname, mu, lr, bs, tfmodel_folder,
          conv5, model_name, stop_iter, pre_emb=False):
    iters_per_log = 100
    data_folder = './' + dataset + '/' + setname + '_batch/'
    data_prefix = dataset + '_' + setname
    snapshot_file = os.path.join(tfmodel_folder, dataset + '_iter_%d.tfmodel')
    if not os.path.isdir(tfmodel_folder):
        os.makedirs(tfmodel_folder)

    cls_loss_avg = 0
    avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0
    decay = 0.99
    vocab_size = 8803 if dataset == 'referit' else 12112
    emb_name = 'referit' if dataset == 'referit' else 'Gref'

    if pre_emb:
        print("Use pretrained Embeddings.")
        model = get_segmentation_model(model_name, mode='train',
                                       vocab_size=vocab_size, start_lr=lr,
                                       batch_size=bs, conv5=conv5, emb_name=emb_name)
    else:
        model = get_segmentation_model(model_name, mode='train',
                                       vocab_size=vocab_size, start_lr=lr,
                                       batch_size=bs, conv5=conv5)

    weights = './data/weights/deeplab_resnet_init.ckpt'
    print("Loading pretrained weights from {}".format(weights))
    load_var = {var.op.name: var for var in tf.global_variables()
                if var.name.startswith('res') or var.name.startswith('bn') or var.name.startswith('conv1')}

    snapshot_loader = tf.train.Saver(load_var)
    snapshot_saver = tf.train.Saver(max_to_keep=4)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    snapshot_loader.restore(sess, weights)

    im_h, im_w, num_steps = model.H, model.W, model.num_steps
    text_batch = np.zeros((bs, num_steps), dtype=np.float32)
    image_batch = np.zeros((bs, im_h, im_w, 3), dtype=np.float32)
    mask_batch = np.zeros((bs, im_h, im_w, 1), dtype=np.float32)
    valid_idx_batch = np.zeros((bs, 1), dtype=np.int32)

    reader = data_reader.DataReader(data_folder, data_prefix)

    # for time calculate
    last_time = time.time()
    time_avg = MovingAverage()
    for n_iter in range(max_iter):

        for n_batch in range(bs):
            batch = reader.read_batch(is_log=(n_batch == 0 and n_iter % iters_per_log == 0))
            text = batch['text_batch']
            im = batch['im_batch'].astype(np.float32)
            mask = np.expand_dims(batch['mask_batch'].astype(np.float32), axis=2)

            im = im[:, :, ::-1]
            im -= mu

            text_batch[n_batch, ...] = text
            image_batch[n_batch, ...] = im
            mask_batch[n_batch, ...] = mask

            for idx in range(text.shape[0]):
                if text[idx] != 0:
                    valid_idx_batch[n_batch, :] = idx
                    break

        _, cls_loss_val, lr_val, scores_val, label_val = sess.run([model.train_step,
                                                                   model.cls_loss,
                                                                   model.learning_rate,
                                                                   model.pred,
                                                                   model.target],
                                                                  feed_dict={
                                                                      model.words: text_batch,
                                                                      # np.expand_dims(text, axis=0),
                                                                      model.im: image_batch,
                                                                      # np.expand_dims(im, axis=0),
                                                                      model.target_fine: mask_batch,
                                                                      # np.expand_dims(mask, axis=0)
                                                                      model.valid_idx: valid_idx_batch
                                                                  })
        cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val

        # Accuracy
        accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val)
        avg_accuracy_all = decay * avg_accuracy_all + (1 - decay) * accuracy_all
        avg_accuracy_pos = decay * avg_accuracy_pos + (1 - decay) * accuracy_pos
        avg_accuracy_neg = decay * avg_accuracy_neg + (1 - decay) * accuracy_neg

        # timing
        cur_time = time.time()
        elapsed = cur_time - last_time
        last_time = cur_time

        if n_iter % iters_per_log == 0:
            print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f'
                  % (n_iter, cls_loss_val, cls_loss_avg, lr_val))
            print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)'
                  % (n_iter, accuracy_all, accuracy_pos, accuracy_neg))
            print('iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)'
                  % (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg))
            time_avg.add(elapsed)
            print('iter = %d, cur time = %.5f, avg time = %.5f, model_name: %s' % (n_iter, elapsed, time_avg.get_avg(), model_name))

        # Save snapshot
        if (n_iter + 1) % snapshot == 0 or (n_iter + 1) >= max_iter:
            snapshot_saver.save(sess, snapshot_file % (n_iter + 1))
            print('snapshot saved to ' + snapshot_file % (n_iter + 1))
        if (n_iter + 1) >= stop_iter:
            print('stop training at iter ' + str(stop_iter))
            break

    print('Optimization done.')
Exemplo n.º 6
0
    def train(self):
        #Build model, and get train_op
        self.build_model()
        train_op = self.train_op(self.total_loss, self.get_train_var_list())

        reader = data_reader.DataReader(self.data_folder, self.data_prefix)

        cls_loss_avg = 0
        avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0
        decay = 0.99

        #Accuracy palceholder
        acc_all, acc_pos, acc_neg = tf.placeholder(
            tf.float32,
            shape=()), tf.placeholder(tf.float32,
                                      shape=()), tf.placeholder(tf.float32,
                                                                shape=())
        acc_all_avg, acc_pos_avg, acc_neg_avg = tf.placeholder(
            tf.float32,
            shape=()), tf.placeholder(tf.float32,
                                      shape=()), tf.placeholder(tf.float32,
                                                                shape=())

        #Add summary for tensorboard
        tf.summary.scalar('loss', self.cls_loss, ['train'])
        tf.summary.scalar('learning_rate', self.learning_rate, ['train'])
        tf.summary.scalar('accuracy_all', acc_all, ['acc'])
        tf.summary.scalar('accuracy_positive', acc_pos, ['acc'])
        tf.summary.scalar('accuracy_negative', acc_neg, ['acc'])
        tf.summary.scalar('accuracy_all_average', acc_all_avg, ['acc'])
        tf.summary.scalar('accuracy_positive_average', acc_pos_avg, ['acc'])
        tf.summary.scalar('accuracy_negative_average', acc_neg_avg, ['acc'])
        train_summary = tf.summary.merge_all(key='train')
        acc_summary = tf.summary.merge_all(key='acc')

        # tf.train.Saver is used to save and load intermediate models.
        self.saver = tf.train.Saver(max_to_keep=50,
                                    keep_checkpoint_every_n_hours=1)

        sess = tf.Session()
        self.sess = sess

        # Init train_writer
        train_writer = tf.summary.FileWriter(self.log_folder, sess.graph)

        #Run initialization operations
        sess.run(tf.global_variables_initializer())
        self.initialize(sess)

        self.log_info()

        for n_iter in range(1, self.max_iter + 1):
            batch = reader.read_batch()
            text_seq_val = batch['text_seq_batch']
            imcrop_val = batch['imcrop_batch'].astype(
                np.float32) - self.channel_mean
            label_val = batch['label_fine_batch'].astype(np.float32)

            start_time = time.time()

            # Forward and Backward pass
            scores_val, cls_loss_val, _, lr_val, train_sum = sess.run(
                [
                    self.scores, self.cls_loss, train_op, self.learning_rate,
                    train_summary
                ],
                feed_dict={
                    self.text_seq_batch: text_seq_val,
                    self.imcrop_batch: imcrop_val,
                    self.label_batch: label_val
                })

            duration = time.time() - start_time

            cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val

            # Accuracy
            accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(
                scores_val, label_val)
            avg_accuracy_all = decay * avg_accuracy_all + (
                1 - decay) * accuracy_all
            avg_accuracy_pos = decay * avg_accuracy_pos + (
                1 - decay) * accuracy_pos
            avg_accuracy_neg = decay * avg_accuracy_neg + (
                1 - decay) * accuracy_neg

            # log accuracy per iter
            num_examples_per_step = self.batch_size
            examples_per_sec = num_examples_per_step / duration
            sec_per_batch = float(duration)

            format_str = ('%s: iter %d, %.1f data/sec; %.3f ' 'sec/batch')
            print(format_str %
                  (datetime.now(), n_iter, examples_per_sec, sec_per_batch))

            print(
                '\titer = %d, cls_loss (cur) = %f, cls_loss (avg) = %f, lr = %f'
                % (n_iter, cls_loss_val, cls_loss_avg, lr_val))
            print(
                '\titer = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' %
                (n_iter, accuracy_all, accuracy_pos, accuracy_neg))
            print(
                '\titer = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' %
                (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg))

            if n_iter % self.log_step == 0:
                #Fillin placeholder of accuracy
                acc_sum = sess.run(acc_summary,
                                   feed_dict={
                                       acc_all: accuracy_all,
                                       acc_pos: accuracy_pos,
                                       acc_neg: accuracy_neg,
                                       acc_all_avg: avg_accuracy_all,
                                       acc_pos_avg: avg_accuracy_pos,
                                       acc_neg_avg: avg_accuracy_neg
                                   })

                train_writer.add_summary(train_sum, n_iter)
                train_writer.add_summary(acc_sum, n_iter)

            if n_iter % self.checkpoint_step == 0 or n_iter >= self.max_iter:
                checkpoint_path = os.path.join(self.log_folder, 'checkpoints')
                self.save(checkpoint_path, n_iter)
Exemplo n.º 7
0
def train(modelname, max_iter, snapshot, dataset, weights, setname, mu, lr, bs,
          tfmodel_folder, conv5, re_iter):

    iters_per_log = 50000
    data_folder = './' + dataset + '/' + setname + '_batch/'
    data_prefix = dataset + '_' + setname

    tfmodel_folder = './' + dataset + '/tfmodel/CMSA/'
    snapshot_file = os.path.join(
        tfmodel_folder,
        dataset + '_' + weights + '_' + modelname + '_iter_%d.tfmodel')

    if not os.path.isdir(tfmodel_folder):
        os.makedirs(tfmodel_folder)

    cls_loss_avg = 0
    avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0
    decay = 0.99
    vocab_size = 8803 if dataset == 'referit' else 12112

    model = CMSA_model(mode='train',
                       vocab_size=vocab_size,
                       weights=weights,
                       start_lr=lr,
                       batch_size=bs,
                       conv5=conv5)

    if re_iter is None:
        pretrained_model = 'models/deeplab_resnet_init.ckpt'
        #pretrained_model = 'models/deeplab_resnet.ckpt'
        load_var = {
            var.op.name: var
            for var in tf.global_variables() if var.name.startswith('res')
            or var.name.startswith('bn') or var.name.startswith('conv1')
        }
        snapshot_loader = tf.train.Saver(load_var)
        snapshot_saver = tf.train.Saver(max_to_keep=1000)
        re_iter = 0
    else:
        print('resume from %d' % re_iter)
        pretrained_model = os.path.join(
            tfmodel_folder, dataset + '_' + weights + '_' + modelname +
            '_iter_' + str(re_iter) + '.tfmodel')
        snapshot_loader = tf.train.Saver()
        snapshot_saver = tf.train.Saver(max_to_keep=1000)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    snapshot_loader.restore(sess, pretrained_model)

    im_h, im_w, num_steps = model.H, model.W, model.num_steps
    text_batch = np.zeros((bs, num_steps), dtype=np.float32)
    image_batch = np.zeros((bs, im_h, im_w, 3), dtype=np.float32)
    mask_batch = np.zeros((bs, im_h, im_w, 1), dtype=np.float32)

    reader = data_reader.DataReader(data_folder, data_prefix)

    for n_iter in range(max_iter - re_iter):
        n_iter += re_iter
        for n_batch in range(bs):
            batch = reader.read_batch(
                is_log=(n_batch == 0 and n_iter % iters_per_log == 0))
            text = batch['text_batch']
            im = batch['im_batch'].astype(np.float32)
            mask = np.expand_dims(batch['mask_batch'].astype(np.float32),
                                  axis=2)

            im = im[:, :, ::-1]
            im -= mu

            text_batch[n_batch, ...] = text
            image_batch[n_batch, ...] = im
            mask_batch[n_batch, ...] = mask

        _, cls_loss_val, lr_val, scores_val, label_val = sess.run(
            [
                model.train_step, model.cls_loss, model.learning_rate,
                model.pred, model.target
            ],
            feed_dict={
                model.words: text_batch,
                model.im: image_batch,
                model.target_fine: mask_batch
            })

        cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val

        # Accuracy
        accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(
            scores_val, label_val)
        avg_accuracy_all = decay * avg_accuracy_all + (1 -
                                                       decay) * accuracy_all
        avg_accuracy_pos = decay * avg_accuracy_pos + (1 -
                                                       decay) * accuracy_pos
        avg_accuracy_neg = decay * avg_accuracy_neg + (1 -
                                                       decay) * accuracy_neg

        if n_iter % iters_per_log == 0:
            print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f' %
                  (n_iter, cls_loss_val, cls_loss_avg, lr_val))
            #print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)'
            #        % (n_iter, accuracy_all, accuracy_pos, accuracy_neg))
            print(
                'iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' %
                (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg))

        # Save snapshot
        if (n_iter + 1) % snapshot == 0 or (n_iter + 1) >= max_iter:
            snapshot_saver.save(sess, snapshot_file % (n_iter + 1))
            print('snapshot saved to ' + snapshot_file % (n_iter + 1))

    print('Optimization done.')
Exemplo n.º 8
0
def test(modelname, iter, dataset, weights, setname, dcrf, mu, tfmodel_folder):
    data_folder = './' + dataset + '/' + setname + '_batch/'
    data_prefix = dataset + '_' + setname

    tfmodel_folder = './' + dataset + '/tfmodel/CMSA'

    pretrained_model = os.path.join(
        tfmodel_folder, dataset + '_' + modelname + '_release' + '.tfmodel')

    score_thresh = 1e-9
    eval_seg_iou_list = [.5, .6, .7, .8, .9]
    cum_I, cum_U = 0, 0
    mean_IoU, mean_dcrf_IoU = 0, 0
    seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32)
    if dcrf:
        cum_I_dcrf, cum_U_dcrf = 0, 0
        seg_correct_dcrf = np.zeros(len(eval_seg_iou_list), dtype=np.int32)
    seg_total = 0.
    H, W = 320, 320
    vocab_size = 8803 if dataset == 'referit' else 12112
    IU_result = list()

    model = CMSA_model(H=H,
                       W=W,
                       mode='eval',
                       vocab_size=vocab_size,
                       weights=weights)

    # Load pretrained model
    snapshot_restorer = tf.train.Saver()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    snapshot_restorer.restore(sess, pretrained_model)
    reader = data_reader.DataReader(data_folder, data_prefix, shuffle=False)

    NN = reader.num_batch
    print('test in', dataset, setname)
    for n_iter in range(reader.num_batch):

        if n_iter % (NN // 50) == 0:
            if n_iter / (NN // 50) % 5 == 0:
                sys.stdout.write(str(n_iter / (NN // 50) // 5))
            else:
                sys.stdout.write('.')
            sys.stdout.flush()

        batch = reader.read_batch(is_log=False)
        text = batch['text_batch']
        im = batch['im_batch']
        mask = batch['mask_batch'].astype(np.float32)

        proc_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, H, W))
        proc_im_ = proc_im.astype(np.float32)
        proc_im_ = proc_im_[:, :, ::-1]
        proc_im_ -= mu

        scores_val, up_val, sigm_val = sess.run(
            [model.pred, model.up, model.sigm],
            feed_dict={
                model.words: np.expand_dims(text, axis=0),
                model.im: np.expand_dims(proc_im_, axis=0)
            })

        up_val = np.squeeze(up_val)
        pred_raw = (up_val >= score_thresh).astype(np.float32)
        predicts = im_processing.resize_and_crop(pred_raw, mask.shape[0],
                                                 mask.shape[1])
        if dcrf:
            # Dense CRF post-processing
            sigm_val = np.squeeze(sigm_val)
            d = densecrf.DenseCRF2D(W, H, 2)
            U = np.expand_dims(-np.log(sigm_val), axis=0)
            U_ = np.expand_dims(-np.log(1 - sigm_val), axis=0)
            unary = np.concatenate((U_, U), axis=0)
            unary = unary.reshape((2, -1))
            d.setUnaryEnergy(unary)
            d.addPairwiseGaussian(sxy=3, compat=3)
            d.addPairwiseBilateral(sxy=20, srgb=3, rgbim=proc_im, compat=10)
            Q = d.inference(5)
            pred_raw_dcrf = np.argmax(Q, axis=0).reshape(
                (H, W)).astype(np.float32)
            predicts_dcrf = im_processing.resize_and_crop(
                pred_raw_dcrf, mask.shape[0], mask.shape[1])

        I, U = eval_tools.compute_mask_IU(predicts, mask)
        IU_result.append({'batch_no': n_iter, 'I': I, 'U': U})
        mean_IoU += float(I) / U
        cum_I += I
        cum_U += U
        msg = 'cumulative IoU = %f' % (cum_I / cum_U)
        for n_eval_iou in range(len(eval_seg_iou_list)):
            eval_seg_iou = eval_seg_iou_list[n_eval_iou]
            seg_correct[n_eval_iou] += (I / U >= eval_seg_iou)
        if dcrf:
            I_dcrf, U_dcrf = eval_tools.compute_mask_IU(predicts_dcrf, mask)
            mean_dcrf_IoU += float(I_dcrf) / U_dcrf
            cum_I_dcrf += I_dcrf
            cum_U_dcrf += U_dcrf
            msg += '\tcumulative IoU (dcrf) = %f' % (cum_I_dcrf / cum_U_dcrf)
            for n_eval_iou in range(len(eval_seg_iou_list)):
                eval_seg_iou = eval_seg_iou_list[n_eval_iou]
                seg_correct_dcrf[n_eval_iou] += (I_dcrf / U_dcrf >=
                                                 eval_seg_iou)
        # print(msg)
        seg_total += 1

    # Print results
    print('Segmentation evaluation (without DenseCRF):')
    result_str = ''
    for n_eval_iou in range(len(eval_seg_iou_list)):
        result_str += 'precision@%s = %f\n' % \
            (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]/seg_total)
    result_str += 'overall IoU = %f; mean IoU = %f\n' % (cum_I / cum_U,
                                                         mean_IoU / seg_total)
    print(result_str)
    if dcrf:
        print('Segmentation evaluation (with DenseCRF):')
        result_str = ''
        for n_eval_iou in range(len(eval_seg_iou_list)):
            result_str += 'precision@%s = %f\n' % \
                (str(eval_seg_iou_list[n_eval_iou]), seg_correct_dcrf[n_eval_iou]/seg_total)
        result_str += 'overall IoU = %f; mean IoU = %f\n' % (
            cum_I_dcrf / cum_U_dcrf, mean_dcrf_IoU / seg_total)
        print(result_str)
Exemplo n.º 9
0
def test(iter,
         dataset,
         visualize,
         setname,
         dcrf,
         mu,
         tfmodel_folder,
         pre_emb=False,
         use_tree=False,
         neg_num=0.1):
    data_folder = './' + dataset + '/' + setname + '_batch/'
    data_prefix = dataset + '_' + setname
    if visualize:
        save_dir = './' + dataset + '/visualization/' + str(iter) + '/'
        if not os.path.isdir(save_dir):
            os.makedirs(save_dir)
    weights = os.path.join(tfmodel_folder,
                           dataset + '_iter_' + str(iter) + '.tfmodel')

    score_thresh = 1e-9
    eval_seg_iou_list = [.5, .6, .7, .8, .9]
    cum_I, cum_U = 0, 0
    mean_IoU, mean_dcrf_IoU = 0, 0
    seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32)
    if dcrf:
        cum_I_dcrf, cum_U_dcrf = 0, 0
        seg_correct_dcrf = np.zeros(len(eval_seg_iou_list), dtype=np.int32)
    seg_total = 0.
    H, W = 320, 320
    vocab_size = 8226 if dataset == 'referit' else 21692
    emb_name = 'referit' if dataset == 'referit' else 'Gref'

    IU_result = list()

    if pre_emb:
        # use pretrained embbeding
        print("Use pretrained Embeddings.")
        model = LSCM_model(num_steps=30,
                           H=H,
                           W=W,
                           mode='eval',
                           vocab_size=vocab_size,
                           emb_name=emb_name)
    else:
        model = LSCM_model(num_steps=30,
                           H=H,
                           W=W,
                           mode='eval',
                           vocab_size=vocab_size)

    # Load pretrained model
    snapshot_restorer = tf.train.Saver()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    snapshot_restorer.restore(sess, weights)
    reader = data_reader.DataReader(data_folder, data_prefix, shuffle=False)

    NN = reader.num_batch
    for n_iter in range(reader.num_batch):

        if n_iter % (NN // 50) == 0:
            if n_iter / (NN // 50) % 5 == 0:
                sys.stdout.write(str(n_iter / (NN // 50) // 5))
            else:
                sys.stdout.write('.')
            sys.stdout.flush()

        batch = reader.read_batch(is_log=False)
        text = batch['text_batch']
        im = batch['im_batch']
        mask = batch['mask_batch'].astype(np.float32)
        valid_idx = np.zeros([1], dtype=np.int32)
        graph = batch['graph_batch']
        height = batch['height_batch']
        for idx in range(text.shape[0]):
            if text[idx] != 0:
                valid_idx[0] = idx
                break

        if neg_num != 0.1:
            graph[graph < 0.5] = neg_num

        proc_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, H, W))
        proc_im_ = proc_im.astype(np.float32)
        proc_im_ = proc_im_[:, :, ::-1]
        proc_im_ -= mu

        if use_tree:
            scores_val, up_val, sigm_val = sess.run(
                [model.pred, model.up, model.sigm],
                feed_dict={
                    model.words: np.expand_dims(text, axis=0),
                    model.im: np.expand_dims(proc_im_, axis=0),
                    model.valid_idx: np.expand_dims(valid_idx, axis=0),
                    model.graph_adj: np.expand_dims(graph, axis=0),
                    model.tree_height: np.expand_dims(height, axis=0)
                })
        else:
            scores_val, up_val, sigm_val = sess.run(
                [model.pred, model.up, model.sigm],
                feed_dict={
                    model.words: np.expand_dims(text, axis=0),
                    model.im: np.expand_dims(proc_im_, axis=0),
                    model.valid_idx: np.expand_dims(valid_idx, axis=0)
                })

        # scores_val = np.squeeze(scores_val)
        # pred_raw = (scores_val >= score_thresh).astype(np.float32)
        up_val = np.squeeze(up_val)
        pred_raw = (up_val >= score_thresh).astype(np.float32)
        predicts = im_processing.resize_and_crop(pred_raw, mask.shape[0],
                                                 mask.shape[1])
        if dcrf:
            # Dense CRF post-processing
            sigm_val = np.squeeze(sigm_val)
            d = densecrf.DenseCRF2D(W, H, 2)
            U = np.expand_dims(-np.log(sigm_val), axis=0)
            U_ = np.expand_dims(-np.log(1 - sigm_val), axis=0)
            unary = np.concatenate((U_, U), axis=0)
            unary = unary.reshape((2, -1))
            d.setUnaryEnergy(unary)
            d.addPairwiseGaussian(sxy=3, compat=3)
            d.addPairwiseBilateral(sxy=20, srgb=3, rgbim=proc_im, compat=10)
            Q = d.inference(5)
            pred_raw_dcrf = np.argmax(Q, axis=0).reshape(
                (H, W)).astype(np.float32)
            predicts_dcrf = im_processing.resize_and_crop(
                pred_raw_dcrf, mask.shape[0], mask.shape[1])

        if visualize:
            sent = batch['sent_batch'][0]
            visualize_seg(im, mask, predicts, sent)
            if dcrf:
                visualize_seg(im, mask, predicts_dcrf, sent)

        I, U = eval_tools.compute_mask_IU(predicts, mask)
        IU_result.append({'batch_no': n_iter, 'I': I, 'U': U})
        mean_IoU += float(I) / U
        cum_I += I
        cum_U += U
        msg = 'cumulative IoU = %f' % (cum_I / cum_U)
        for n_eval_iou in range(len(eval_seg_iou_list)):
            eval_seg_iou = eval_seg_iou_list[n_eval_iou]
            seg_correct[n_eval_iou] += (I / U >= eval_seg_iou)
        if dcrf:
            I_dcrf, U_dcrf = eval_tools.compute_mask_IU(predicts_dcrf, mask)
            mean_dcrf_IoU += float(I_dcrf) / U_dcrf
            cum_I_dcrf += I_dcrf
            cum_U_dcrf += U_dcrf
            msg += '\tcumulative IoU (dcrf) = %f' % (cum_I_dcrf / cum_U_dcrf)
            for n_eval_iou in range(len(eval_seg_iou_list)):
                eval_seg_iou = eval_seg_iou_list[n_eval_iou]
                seg_correct_dcrf[n_eval_iou] += (I_dcrf / U_dcrf >=
                                                 eval_seg_iou)
        # print(msg)
        seg_total += 1

    # Print results
    print('Segmentation evaluation (without DenseCRF):')
    result_str = ''
    for n_eval_iou in range(len(eval_seg_iou_list)):
        result_str += 'precision@%s = %f\n' % \
                      (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou] / seg_total)
    result_str += 'overall IoU = %f; mean IoU = %f\n' % (cum_I / cum_U,
                                                         mean_IoU / seg_total)
    print(result_str)
    if dcrf:
        print('Segmentation evaluation (with DenseCRF):')
        result_str = ''
        for n_eval_iou in range(len(eval_seg_iou_list)):
            result_str += 'precision@%s = %f\n' % \
                          (str(eval_seg_iou_list[n_eval_iou]), seg_correct_dcrf[n_eval_iou] / seg_total)
        result_str += 'overall IoU = %f; mean IoU = %f\n' % (
            cum_I_dcrf / cum_U_dcrf, mean_dcrf_IoU / seg_total)
        print(result_str)
Exemplo n.º 10
0
def test(modelname, iter, dataset, visualize, weights, setname, dcrf, mu):

    data_folder = './' + dataset + '/' + setname + '_batch/'
    data_prefix = dataset + '_' + setname
    if visualize:
        save_dir = './' + dataset + '/visualization/' + modelname + '_' + str(iter) + '/'
        if not os.path.isdir(save_dir):
            os.makedirs(save_dir)
    pretrained_model = './' + dataset + '/tfmodel_BRI/' + dataset + '_' + weights + '_' + modelname + '_iter_' + str(iter) + '.tfmodel'
    score_thresh = 1e-9
    eval_seg_iou_list = [.5, .6, .7, .8, .9]
    cum_I, cum_U = 0, 0
    seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32)
    if dcrf:
        cum_I_dcrf, cum_U_dcrf = 0, 0
        seg_correct_dcrf = np.zeros(len(eval_seg_iou_list), dtype=np.int32)
    seg_total = 0.
    H, W = 320, 320
    vocab_size = 8803 if dataset == 'referit' else 12112
    if modelname == 'BRI':
        model = BRI_model(H=H, W=W, mode='eval', vocab_size=vocab_size, weights=weights)
    else:
        raise ValueError('Unknown model name %s' % (modelname))

    # Load pretrained model
    snapshot_restorer = tf.train.Saver()
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    snapshot_restorer.restore(sess, pretrained_model)
    reader = data_reader.DataReader(data_folder, data_prefix, shuffle=False)

    for n_iter in range(reader.num_batch):
        batch = reader.read_batch()
        text = batch['text_batch']
        im = batch['im_batch']
        mask = batch['mask_batch'].astype(np.float32)
        proc_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, H, W))
        proc_im_ = proc_im.astype(np.float32)
        proc_im_ = proc_im_[:,:,::-1]
        proc_im_ -= mu

        scores_val, up_val, sigm_val = sess.run([model.pred, model.up, model.sigm],
            feed_dict={
                model.words: np.expand_dims(text, axis=0),
                model.im: np.expand_dims(proc_im_, axis=0)
            })

        up_val = np.squeeze(up_val)
        pred_raw = (up_val >= score_thresh).astype(np.float32)
        predicts = im_processing.resize_and_crop(pred_raw, mask.shape[0], mask.shape[1])
        if dcrf:
            # Dense CRF post-processing
            sigm_val = np.squeeze(sigm_val)
            d = Dcrf.DenseCRF2D(W, H, 2)
            U = np.expand_dims(-np.log(sigm_val), axis=0)
            U_ = np.expand_dims(-np.log(1 - sigm_val), axis=0)
            unary = np.concatenate((U_, U), axis=0)
            unary = unary.reshape((2, -1))
            d.setUnaryEnergy(unary)
            d.addPairwiseGaussian(sxy=3, compat=3)
            d.addPairwiseBilateral(sxy=20, srgb=3, rgbim=proc_im, compat=10)
            Q = d.inference(5)
            pred_raw_dcrf = np.argmax(Q, axis=0).reshape((H, W)).astype(np.float32)
            predicts_dcrf = im_processing.resize_and_crop(pred_raw_dcrf, mask.shape[0], mask.shape[1])

        I, U = eval_tools.compute_mask_IU(predicts, mask)
        cum_I += I
        cum_U += U
        msg = 'cumulative IoU = %f' % (cum_I/cum_U)
        for n_eval_iou in range(len(eval_seg_iou_list)):
            eval_seg_iou = eval_seg_iou_list[n_eval_iou]
            seg_correct[n_eval_iou] += (I/U >= eval_seg_iou)
        if dcrf:
            I_dcrf, U_dcrf = eval_tools.compute_mask_IU(predicts_dcrf, mask)
            cum_I_dcrf += I_dcrf
            cum_U_dcrf += U_dcrf
            msg += '\tcumulative IoU (dcrf) = %f' % (cum_I_dcrf/cum_U_dcrf)
            for n_eval_iou in range(len(eval_seg_iou_list)):
                eval_seg_iou = eval_seg_iou_list[n_eval_iou]
                seg_correct_dcrf[n_eval_iou] += (I_dcrf/U_dcrf >= eval_seg_iou)
        print(msg)
        seg_total += 1

    # Print results
    print('Segmentation evaluation (without DenseCRF):')
    result_str = ''
    for n_eval_iou in range(len(eval_seg_iou_list)):
        result_str += 'precision@%s = %f\n' % \
            (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]/seg_total)
    result_str += 'overall IoU = %f\n' % (cum_I/cum_U)
    print(result_str)
    if dcrf:
        print('Segmentation evaluation (with DenseCRF):')
        result_str = ''
        for n_eval_iou in range(len(eval_seg_iou_list)):
            result_str += 'precision@%s = %f\n' % \
                (str(eval_seg_iou_list[n_eval_iou]), seg_correct_dcrf[n_eval_iou]/seg_total)
        result_str += 'overall IoU = %f\n' % (cum_I_dcrf/cum_U_dcrf)
        print(result_str)
Exemplo n.º 11
0
def train(modelname, max_iter, snapshot, dataset, weights, setname, mu):

    data_folder = './' + dataset + '/' + setname + '_batch/'
    data_prefix = dataset + '_' + setname
    tfmodel_folder = './' + dataset + '/tfmodel_BRI/'
    snapshot_file = tfmodel_folder + dataset + '_' + weights + '_' + modelname + '_iter_%d.tfmodel'

    if not os.path.isdir(tfmodel_folder):
        os.makedirs(tfmodel_folder)

    cls_loss_avg = 0
    avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0
    decay = 0.99
    vocab_size = 8803 if dataset == 'referit' else 12112

    if modelname == 'BRI':
        model = BRI_model(mode='train', vocab_size=vocab_size, weights=weights)
    else:
        raise ValueError('Unknown model name %s' % (modelname))

    if weights == 'resnet':
        pretrained_model = './external/TF-resnet/model/ResNet101_init.tfmodel'
        load_var = {var.op.name: var for var in tf.global_variables() if var.op.name.startswith('ResNet')}
    elif weights == 'deeplab':
        pretrained_model = './external/TF-deeplab/model/ResNet101_train.tfmodel'
        load_var = {var.op.name: var for var in tf.global_variables() if var.op.name.startswith('DeepLab/group')}

    snapshot_loader = tf.train.Saver(load_var)
    snapshot_saver = tf.train.Saver(max_to_keep = 1000)
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    snapshot_loader.restore(sess, pretrained_model)
    reader = data_reader.DataReader(data_folder, data_prefix)

    for n_iter in range(max_iter):

        batch = reader.read_batch()
        text = batch['text_batch']
        im = batch['im_batch'].astype(np.float32)
        mask = np.expand_dims(batch['mask_batch'].astype(np.float32), axis=2)
        im = im[:,:,::-1]
        im -= mu

        _, cls_loss_val, lr_val, scores_val, label_val = sess.run([model.train_step,
            model.cls_loss,
            model.learning_rate, 
            model.pred,
            model.target],
            feed_dict={
                model.words: np.expand_dims(text, axis=0),
                model.im: np.expand_dims(im, axis=0),
                model.target_fine: np.expand_dims(mask, axis=0)
            })

        cls_loss_avg = decay*cls_loss_avg + (1-decay)*cls_loss_val
        if (n_iter + 1) % 100 == 0:
            print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f' % (n_iter, cls_loss_val, cls_loss_avg, lr_val))
        # Accuracy
        accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(scores_val, label_val)
        avg_accuracy_all = decay*avg_accuracy_all + (1-decay)*accuracy_all
        avg_accuracy_pos = decay*avg_accuracy_pos + (1-decay)*accuracy_pos
        avg_accuracy_neg = decay*avg_accuracy_neg + (1-decay)*accuracy_neg
        if (n_iter + 1) % 100 == 0:
            print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)'
              % (n_iter, accuracy_all, accuracy_pos, accuracy_neg))
            print('iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)'
              % (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg))
        # Save snapshot
        if (n_iter+1) >= 400000:
            if (n_iter+1) % snapshot == 0 or (n_iter+1) >= max_iter:
                snapshot_saver.save(sess, snapshot_file % (n_iter+1))
                print('snapshot saved to ' + snapshot_file % (n_iter+1))
        gc.collect()
    print('Optimization done.')
Exemplo n.º 12
0
def train(modelname, max_iter, snapshot, dataset, weights, setname, mu, lr, bs,
          tfmodel_folder, conv5):
    iters_per_log = 100
    data_folder = './' + dataset + '/' + setname + '_batch/'
    data_prefix = dataset + '_' + setname
    # tfmodel_folder = './' + dataset + '/tfmodel/'
    # tfmodel_folder = './%s/ckpt_lr%.5f_bs%d/' % (dataset, lr, bs)
    snapshot_file = os.path.join(
        tfmodel_folder,
        dataset + '_' + weights + '_' + modelname + '_iter_%d.tfmodel')
    if not os.path.isdir(tfmodel_folder):
        os.makedirs(tfmodel_folder)

    cls_loss_avg = 0
    avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg = 0, 0, 0
    decay = 0.99
    vocab_size = 8803 if dataset == 'referit' else 12112

    if modelname == 'LSTM':
        model = LSTM_model(mode='train',
                           vocab_size=vocab_size,
                           weights=weights,
                           start_lr=lr,
                           batch_size=bs,
                           conv5=conv5)
    elif modelname == 'RMI':
        model = RMI_model(mode='train', vocab_size=vocab_size, weights=weights)
    else:
        raise ValueError('Unknown model name %s' % (modelname))

    if weights == 'resnet':
        pretrained_model = './external/TF-resnet/model/ResNet101_init.tfmodel'
        load_var = {
            var.op.name: var
            for var in tf.global_variables()
            if var.op.name.startswith('ResNet')
        }
    elif weights == 'deeplab':
        pretrained_model = '/data/ryli/text_objseg/tensorflow-deeplab-resnet/models/deeplab_resnet_init.ckpt'
        load_var = {
            var.op.name: var
            for var in tf.global_variables() if var.name.startswith('res')
            or var.name.startswith('bn') or var.name.startswith('conv1')
        }

    snapshot_loader = tf.train.Saver(load_var)
    snapshot_saver = tf.train.Saver(max_to_keep=1000)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    snapshot_loader.restore(sess, pretrained_model)

    im_h, im_w, num_steps = model.H, model.W, model.num_steps
    text_batch = np.zeros((bs, num_steps), dtype=np.float32)
    image_batch = np.zeros((bs, im_h, im_w, 3), dtype=np.float32)
    mask_batch = np.zeros((bs, im_h, im_w, 1), dtype=np.float32)

    reader = data_reader.DataReader(data_folder, data_prefix)
    for n_iter in range(max_iter):

        for n_batch in range(bs):
            batch = reader.read_batch(
                is_log=(n_batch == 0 and n_iter % iters_per_log == 0))
            text = batch['text_batch']
            im = batch['im_batch'].astype(np.float32)
            mask = np.expand_dims(batch['mask_batch'].astype(np.float32),
                                  axis=2)

            im = im[:, :, ::-1]
            im -= mu

            text_batch[n_batch, ...] = text
            image_batch[n_batch, ...] = im
            mask_batch[n_batch, ...] = mask

        _, cls_loss_val, lr_val, scores_val, label_val = sess.run(
            [
                model.train_step, model.cls_loss, model.learning_rate,
                model.pred, model.target
            ],
            feed_dict={
                model.words: text_batch,  #np.expand_dims(text, axis=0),
                model.im: image_batch,  #np.expand_dims(im, axis=0),
                model.target_fine: mask_batch  #np.expand_dims(mask, axis=0)
            })
        cls_loss_avg = decay * cls_loss_avg + (1 - decay) * cls_loss_val

        # Accuracy
        accuracy_all, accuracy_pos, accuracy_neg = compute_accuracy(
            scores_val, label_val)
        avg_accuracy_all = decay * avg_accuracy_all + (1 -
                                                       decay) * accuracy_all
        avg_accuracy_pos = decay * avg_accuracy_pos + (1 -
                                                       decay) * accuracy_pos
        avg_accuracy_neg = decay * avg_accuracy_neg + (1 -
                                                       decay) * accuracy_neg

        if n_iter % iters_per_log == 0:
            print('iter = %d, loss (cur) = %f, loss (avg) = %f, lr = %f' %
                  (n_iter, cls_loss_val, cls_loss_avg, lr_val))
            print('iter = %d, accuracy (cur) = %f (all), %f (pos), %f (neg)' %
                  (n_iter, accuracy_all, accuracy_pos, accuracy_neg))
            print(
                'iter = %d, accuracy (avg) = %f (all), %f (pos), %f (neg)' %
                (n_iter, avg_accuracy_all, avg_accuracy_pos, avg_accuracy_neg))

        # Save snapshot
        if (n_iter + 1) % snapshot == 0 or (n_iter + 1) >= max_iter:
            snapshot_saver.save(sess, snapshot_file % (n_iter + 1))
            print('snapshot saved to ' + snapshot_file % (n_iter + 1))

    print('Optimization done.')