Exemplo n.º 1
0
    def parse(self,args):
        self.cfg = self.parser.parse_args(args)

        if self.cfg.set == 'CIFAR10':
            self.cfg.num_cls = 10
            self.cfg.eval_tst = True
        elif self.cfg.set == 'CIFAR100':
            self.cfg.num_cls = 100
            self.cfg.eval_tst = True
        else:
            raise NotImplementedError('Invalid dataset {}'.format(self.cfg.set))

        self.cfg.exp_dir = osp.join(path_utils.get_checkpoint_dir() , self.cfg.name)

        os_utils.touch_dir(self.cfg.exp_dir)
        log_file = os.path.join(self.cfg.exp_dir, self.cfg.log_file)
        logging.config.dictConfig(log_utils.get_logging_dict(log_file))
        self.cfg.logger = logging.getLogger('train')

        return self.cfg
def main(cfg):

    # cfg.num_classes = 1001
    os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu
    output_dir = cfg.output_dir

    os_utils.touch_dir(output_dir)

    args_file = os.path.join(cfg.output_dir, 'args.json')
    with open(args_file, 'w') as f:
        json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True)

    log_file = os.path.join(cfg.output_dir, cfg.log_filename + '.txt')

    logger = log_utils.create_logger(log_file)

    img_name_ext = cfg.img_name
    img_name, _ = os.path.splitext(img_name_ext)
    datasets_dir = './input_imgs'
    test_img = imageio.imread('{}/{}'.format(datasets_dir, img_name_ext))
    test_img = cv2.resize(test_img, (const.frame_height, const.frame_height))
    with tf.Graph().as_default():

        images_ph = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, const.frame_height,
                                                    const.frame_height,
                                                    const.num_channels),
                                             name='input_img')
        lbls_ph = tf.compat.v1.placeholder(tf.int32,
                                           shape=(None, cfg.num_classes),
                                           name='class_lbls')
        logits_ph = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, cfg.num_classes),
                                             name='logits_lbls')
        per_class_logits_ph = tf.compat.v1.placeholder(tf.float32,
                                                       shape=(None,
                                                              cfg.num_classes),
                                                       name='logits_lbls')
        input_ph = nn_utils.adjust_color_space(images_ph, cfg.preprocess_func)
        network_class = locate(cfg.network_name)
        model = network_class(cfg, images_ph=input_ph, lbls_ph=lbls_ph)

        pre_atten_feat_map_tf = tf.compat.v1.get_default_graph(
        ).get_tensor_by_name(cfg.replicate_net_at)
        pre_atten_feat_map_tf_shape = pre_atten_feat_map_tf.shape
        sub_feat_map_ph = tf.compat.v1.placeholder(
            tf.float32,
            shape=[
                None, pre_atten_feat_map_tf_shape[1],
                pre_atten_feat_map_tf_shape[2], pre_atten_feat_map_tf_shape[3]
            ],
            name='feat_map_input')
        sub_network_class = locate(cfg.sub_network_name)
        sub_model = sub_network_class(cfg,
                                      images_ph=sub_feat_map_ph,
                                      lbls_ph=lbls_ph)
        sub_logits = sub_model.val_logits

        logits = model.val_logits

        sess = tf.compat.v1.InteractiveSession()

        atten_filter_position = cfg.atten_filter_position

        tf_atten_var = [
            v for v in tf.compat.v1.global_variables()
            if atten_filter_position.format('atten') in v.name
        ][-1]
        ## Didn't make a difference for tf_atten_var becuase tf_atten_var is created using get_varibale, i.e., shared
        tf_gate_atten_var = [
            v for v in tf.compat.v1.global_variables()
            if atten_filter_position.format('gate') in v.name
        ][-1]
        # print(tf_gate_atten_var)
        # optimizer = tf.train.AdamOptimizer(0.01)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        logger.info('Learning rate {} {}'.format(cfg.learning_rate,
                                                 cfg.max_iters))
        learning_rate = tf_utils.poly_lr(global_step, cfg)
        optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate)

        class_specific = True if cfg.caf_variant == 'cls_specific' else False
        if class_specific:
            logger.info(
                'Solving class specific optimization problem -- classification network'
            )

            mult_logits_2 = per_class_logits_ph * sub_logits
            loss_sub = tf.reduce_sum(mult_logits_2)
            grads = optimizer.compute_gradients(loss_sub,
                                                var_list=[tf_atten_var])
            train_op = optimizer.apply_gradients(grads,
                                                 global_step=global_step)
        else:
            raise NotImplementedError('cls_oblivious version implemented yet')

        # train_op = optimizer.minimize(loss, var_list=[tf_atten_var])
        tf.compat.v1.global_variables_initializer().run()
        ckpt_file = tf.train.latest_checkpoint(output_dir)
        logger.info('Model Path {}'.format(ckpt_file))
        saver = tf.compat.v1.train.Saver(
        )  # saves variables learned during training
        load_model_msg = model.load_model(output_dir,
                                          ckpt_file,
                                          sess,
                                          saver,
                                          load_logits=True)
        logger.info(load_model_msg)

        class_predictions, ground_logits = sess.run(
            [model.val_class_prediction, logits],
            feed_dict={images_ph: np.expand_dims(test_img, 0)})

        class_predictions = class_predictions[0]
        # print('Class Prediction {}'.format(imagenet_lbls[class_predictions]))

        k = 1
        top_k = np.argsort(np.squeeze(ground_logits))[::-1][:k]
        # top_k = [235,282,94,1,225]
        logger.info('Top K={} {}'.format(k, [imagenet_lbls[i] for i in top_k]))

        filter_type = cfg.filter_type
        if filter_type == 'gauss':
            rand_initilzalier = np.random.normal(
                0, 1, (tf_atten_var.shape[0], tf_atten_var.shape[1]))
        else:
            rand_initilzalier = np.random.normal(
                0, 1, (tf_atten_var.shape[0], tf_atten_var.shape[1], 1))

        close_gate = tf.compat.v1.assign(tf_gate_atten_var, False)
        open_gate = tf.compat.v1.assign(tf_gate_atten_var, True)
        random_init = tf.compat.v1.assign(tf_atten_var, rand_initilzalier)
        lr_reset = tf.compat.v1.assign(global_step, 0)
        MAX_INT = np.iinfo(np.int16).max
        # output_dir = cfg.output_dir
        for top_i in top_k:
            # top_i  = 207  # To control which top_i to work on directly
            sess.run([open_gate, random_init, lr_reset])
            # sess.run(open_gate)

            iteration = 0
            prev_loss = MAX_INT
            event_gif_images = []
            per_class_maximization = np.ones((1, cfg.num_classes))
            per_class_maximization[0, top_i] = -1

            while iteration < cfg.max_iters:

                if iteration == 0:
                    sess.run([close_gate])
                    _pre_atten_feat_map_tf, _atten_var = sess.run(
                        [pre_atten_feat_map_tf, tf_atten_var],
                        feed_dict={
                            # sub_feat_map_ph: _pre_atten_feat_map_tf,
                            images_ph: np.expand_dims(test_img, 0),
                            per_class_logits_ph: per_class_maximization
                        })
                    sess.run([open_gate])
                _atten_var, _sub_logits, _loss, _ = sess.run(
                    [tf_atten_var, sub_logits, loss_sub, train_op],
                    feed_dict={
                        sub_feat_map_ph: _pre_atten_feat_map_tf,
                        # images_ph:np.expand_dims(img_crops[crop_idx,:,:,:],0),
                        per_class_logits_ph: per_class_maximization
                    })

                if iteration % 50 == 0:
                    logger.info('Iter {0:2d}: {1:.5f} Top {2:3d} {3}'.format(
                        iteration, _loss, top_i, imagenet_lbls[top_i]))
                    # print(np.round(np.reshape(_atten_var,(7,7)),2))
                    if cfg.save_gif:
                        frame_mask = normalize_filter(filter_type, _atten_var,
                                                      tf_atten_var.shape[0],
                                                      tf_atten_var.shape[1])
                        if class_specific:
                            #
                            # heatmap_utils.save_heatmap(frame_mask,save=output_dir + img_name +'_msk_cls_{}_{}.png'.format(top_i,filter_type))
                            plt = heatmap_utils.apply_heatmap(
                                test_img / 255.0,
                                frame_mask,
                                alpha=0.7,
                                save=output_dir + img_name +
                                '_cls_{}_{}.png'.format(top_i, filter_type),
                                axis='off',
                                cmap='bwr')
                        else:
                            plt = heatmap_utils.apply_heatmap(
                                test_img / 255.0,
                                frame_mask,
                                alpha=0.7,
                                save=output_dir + img_name +
                                '_{}.png'.format(filter_type),
                                axis='off',
                                cmap='bwr')

                        fig = plt.gcf()
                        data = np.fromstring(fig.canvas.tostring_rgb(),
                                             dtype=np.uint8,
                                             sep='')
                        w, h = fig.canvas.get_width_height()
                        data_img = data.reshape((h, w, 3))
                        event_gif_images.append(data_img)
                        # imageio.imwrite(dump_dir + '{}_test.jpg'.format(iteration),data_img)
                        plt.close()

                    if np.abs(_loss - prev_loss) < 10e-5:
                        break

                    prev_loss = _loss

                iteration += 1

            frame_mask = normalize_filter(filter_type, _atten_var,
                                          tf_atten_var.shape[0],
                                          tf_atten_var.shape[1])
            if class_specific:
                # imageio.imwrite(output_dir + img_name + '_msk_cls_{}_{}.png'.format(top_i, filter_type), frame_mask)
                heatmap_utils.apply_heatmap(
                    test_img / 255.0,
                    frame_mask,
                    alpha=0.6,
                    save=output_dir + img_name +
                    '_cls_{}_{}.png'.format(top_i, filter_type),
                    axis='off',
                    cmap='bwr')
            else:
                heatmap_utils.apply_heatmap(test_img / 255.0,
                                            frame_mask,
                                            alpha=0.6,
                                            save=output_dir + img_name +
                                            '_{}.png'.format(filter_type),
                                            axis='off',
                                            cmap='bwr')
            if cfg.save_gif:
                if class_specific:
                    imageio.mimsave(
                        output_dir + img_name + '_cls_{}_{}.gif'.format(
                            top_i,
                            atten_filter_position[:-2].format('').replace(
                                '/', '')),
                        event_gif_images,
                        duration=1.0)
                else:
                    imageio.mimsave(
                        output_dir + img_name + '_cls_{}_{}.gif'.format(
                            filter_type,
                            atten_filter_position[:-2].format('').replace(
                                '/', '')),
                        event_gif_images,
                        duration=1.0)
Exemplo n.º 3
0
def main(argv):
    # Verify that parameters are set correctly.
    args = parser.parse_args(argv)

    if not os.path.exists(args.dataset):
        return

    # Possibly auto-generate the output filename.
    if args.filename is None:
        basename = os.path.basename(args.dataset)
        args.filename = os.path.splitext(basename)[0] + '_embeddings.h5'

    os_utils.touch_dir(os.path.join(args.experiment_root, args.foldername))

    log_file = os.path.join(args.experiment_root, args.foldername, "embed")
    logging.config.dictConfig(common.get_logging_dict(log_file))
    log = logging.getLogger('embed')

    args.filename = os.path.join(args.experiment_root, args.foldername,
                                 args.filename)
    var_filepath = os.path.join(args.experiment_root, args.foldername,
                                args.filename[:-3] + '_var.txt')
    # Load the args from the original experiment.
    args_file = os.path.join(args.experiment_root, 'args.json')

    if os.path.isfile(args_file):
        if not args.quiet:
            print('Loading args from {}.'.format(args_file))
        with open(args_file, 'r') as f:
            args_resumed = json.load(f)

        # Add arguments from training.
        for key, value in args_resumed.items():
            args.__dict__.setdefault(key, value)

        # A couple special-cases and sanity checks
        if (args_resumed['crop_augment']) == (args.crop_augment is None):
            print('WARNING: crop augmentation differs between training and '
                  'evaluation.')
        args.image_root = args.image_root or args_resumed['image_root']
    else:
        raise IOError(
            '`args.json` could not be found in: {}'.format(args_file))

    # Check a proper aggregator is provided if augmentation is used.
    if args.flip_augment or args.crop_augment == 'five':
        if args.aggregator is None:
            print(
                'ERROR: Test time augmentation is performed but no aggregator'
                'was specified.')
            exit(1)
    else:
        if args.aggregator is not None:
            print('ERROR: No test time augmentation that needs aggregating is '
                  'performed but an aggregator was specified.')
            exit(1)

    if not args.quiet:
        print('Evaluating using the following parameters:')
        for key, value in sorted(vars(args).items()):
            print('{}: {}'.format(key, value))

    # Load the data from the CSV file.
    _, data_fids = common.load_dataset(args.dataset, args.image_root)

    net_input_size = (args.net_input_height, args.net_input_width)
    pre_crop_size = (args.pre_crop_height, args.pre_crop_width)

    # Setup a tf Dataset containing all images.
    dataset = tf.data.Dataset.from_tensor_slices(data_fids)

    # Convert filenames to actual image tensors.
    dataset = dataset.map(lambda fid: common.fid_to_image(
        fid,
        tf.constant('dummy'),
        image_root=args.image_root,
        image_size=pre_crop_size if args.crop_augment else net_input_size),
                          num_parallel_calls=args.loading_threads)

    # Augment the data if specified by the arguments.
    # `modifiers` is a list of strings that keeps track of which augmentations
    # have been applied, so that a human can understand it later on.
    modifiers = ['original']
    if args.flip_augment:
        dataset = dataset.map(flip_augment)
        dataset = dataset.apply(tf.contrib.data.unbatch())
        modifiers = [o + m for m in ['', '_flip'] for o in modifiers]

    if args.crop_augment == 'center':
        dataset = dataset.map(lambda im, fid, pid:
                              (five_crops(im, net_input_size)[0], fid, pid))
        modifiers = [o + '_center' for o in modifiers]
    elif args.crop_augment == 'five':
        dataset = dataset.map(lambda im, fid, pid:
                              (tf.stack(five_crops(im, net_input_size)),
                               tf.stack([fid] * 5), tf.stack([pid] * 5)))
        dataset = dataset.apply(tf.contrib.data.unbatch())
        modifiers = [
            o + m for o in modifiers for m in [
                '_center', '_top_left', '_top_right', '_bottom_left',
                '_bottom_right'
            ]
        ]
    elif args.crop_augment == 'avgpool':
        modifiers = [o + '_avgpool' for o in modifiers]
    else:
        modifiers = [o + '_resize' for o in modifiers]

    # Group it back into PK batches.
    dataset = dataset.batch(args.batch_size)

    # Overlap producing and consuming.
    dataset = dataset.prefetch(1)

    #images, _, _ = dataset.make_one_shot_iterator().get_next()
    #init_iter = dataset.make_initializable_iterator()
    init_iter = tf.data.Iterator.from_structure(dataset.output_types,
                                                dataset.output_shapes)
    images, _, _ = init_iter.get_next()
    iter_init_op = init_iter.make_initializer(dataset)
    # Create the model and an embedding head.
    model = import_module('nets.' + args.model_name)
    head = import_module('heads.' + args.head_name)

    images_ph = tf.placeholder(dataset.output_types[0],
                               dataset.output_shapes[0])
    endpoints, body_prefix = model.endpoints(images_ph, is_training=False)

    with tf.name_scope('head'):
        endpoints = head.head(endpoints, args.embedding_dim, is_training=False)

    gpu_options = tf.GPUOptions(allow_growth=True)
    gpu_config = tf.ConfigProto(gpu_options=gpu_options)
    with h5py.File(args.filename,
                   'w') as f_out, tf.Session(config=gpu_config) as sess:
        # Initialize the network/load the checkpoint.
        if args.checkpoint is None:
            checkpoint = tf.train.latest_checkpoint(args.experiment_root)
        else:
            checkpoint = os.path.join(args.experiment_root, args.checkpoint)
        if not args.quiet:
            print('Restoring from checkpoint: {}'.format(checkpoint))
        tf.train.Saver().restore(sess, checkpoint)

        # Go ahead and embed the whole dataset, with all augmented versions too.
        emb_storage = np.zeros(
            (len(data_fids) * len(modifiers), args.embedding_dim), np.float32)

        ##sess.run(init_iter.initializer)
        sess.run(iter_init_op)

        for start_idx in count(step=args.batch_size):
            try:
                current_imgs = sess.run(images)
                batch_embedding = endpoints['emb']
                emb = sess.run(batch_embedding,
                               feed_dict={images_ph: current_imgs})
                emb_storage[start_idx:start_idx + len(emb)] += emb
                print('\rEmbedded batch {}-{}/{}'.format(
                    start_idx, start_idx + len(emb), len(emb_storage)),
                      flush=True,
                      end='')
            except tf.errors.OutOfRangeError:
                break  # This just indicates the end of the dataset.

        if not args.quiet:
            print("Done with embedding, aggregating augmentations...",
                  flush=True)

        if len(modifiers) > 1:
            # Pull out the augmentations into a separate first dimension.
            emb_storage = emb_storage.reshape(len(data_fids), len(modifiers),
                                              -1)
            emb_storage = emb_storage.transpose((1, 0, 2))  # (Aug,FID,128D)

            # Store the embedding of all individual variants too.
            emb_dataset = f_out.create_dataset('emb_aug', data=emb_storage)

            # Aggregate according to the specified parameter.
            emb_storage = AGGREGATORS[args.aggregator](emb_storage)

        # Store the final embeddings.
        emb_dataset = f_out.create_dataset('emb', data=emb_storage)

        # Store information about the produced augmentation and in case no crop
        # augmentation was used, if the images are resized or avg pooled.
        f_out.create_dataset('augmentation_types',
                             data=np.asarray(modifiers, dtype='|S'))
Exemplo n.º 4
0
def main(argv):
    cfg = BaseConfig().parse(argv)
    os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu
    save_model_dir = cfg.checkpoint_dir
    model_basename = os.path.basename(save_model_dir)
    touch_dir(save_model_dir)

    args_file = os.path.join(cfg.checkpoint_dir, 'args.json')
    with open(args_file, 'w') as f:
        json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True)
    # os_utils.touch_dir(save_model_dir)

    log_file = os.path.join(cfg.checkpoint_dir, cfg.log_filename + '.txt')
    os_utils.touch_dir(cfg.checkpoint_dir)
    logger = log_utils.create_logger(log_file)

    img_generator_class = locate(cfg.db_tuple_loader)
    args = dict()
    args['db_path'] = cfg.db_path
    args['tuple_loader_queue_size'] = cfg.tuple_loader_queue_size
    args['preprocess_func'] = cfg.preprocess_func
    args['batch_size'] = cfg.batch_size
    args['shuffle'] = False
    args['csv_file'] = cfg.train_csv_file
    args['img_size'] = const.max_frame_size
    args['gen_hot_vector'] = True
    train_iter = img_generator_class(args)
    args['batch_size'] = cfg.batch_size
    args['csv_file'] = cfg.test_csv_file
    val_iter = img_generator_class(args)

    trn_images, trn_lbls = train_iter.imgs_and_lbls()
    val_imgs, val_lbls = val_iter.imgs_and_lbls()

    with tf.Graph().as_default():
        if cfg.train_mode == 'semi_hard' or cfg.train_mode == 'hard' or cfg.train_mode == 'cntr':
            train_dataset = TripletTupleLoader(trn_images, trn_lbls,
                                               cfg).dataset
        elif cfg.train_mode == 'vanilla':
            train_dataset = QuickTupleLoader(trn_images,
                                             trn_lbls,
                                             cfg,
                                             is_training=True,
                                             shuffle=True,
                                             repeat=True).dataset
        else:
            raise NotImplementedError('{} is not a valid train mode'.format(
                cfg.train_mode))

        val_dataset = QuickTupleLoader(val_imgs,
                                       val_lbls,
                                       cfg,
                                       is_training=False,
                                       repeat=False).dataset
        handle = tf.placeholder(tf.string, shape=[])
        iterator = tf.data.Iterator.from_string_handle(
            handle, train_dataset.output_types, train_dataset.output_shapes)
        images_ph, lbls_ph = iterator.get_next()

        network_class = locate(cfg.network_name)
        model = network_class(cfg, images_ph=images_ph, lbls_ph=lbls_ph)

        # Which loss fn to impose. For example, softmax only is applied in vanilla mode,
        # while softmax + semi-hard triplet is applied in semi_hard mode.
        if cfg.train_mode == 'semi_hard':
            pre_logits = model.train_pre_logits
            _, w, h, channels = pre_logits.shape
            embed_dim = cfg.emb_dim
            embedding_net = ConvEmbed(emb_dim=embed_dim,
                                      n_input=channels,
                                      n_h=h,
                                      n_w=w)
            embedding = embedding_net.forward(pre_logits)
            embedding = tf.nn.l2_normalize(embedding, axis=-1, epsilon=1e-10)
            margin = cfg.margin
            gt_lbls = tf.argmax(model.gt_lbls, 1)
            metric_loss = triplet_semi.triplet_semihard_loss(
                gt_lbls, embedding, margin)
            logger.info('Triplet loss lambda {}, with margin {}'.format(
                cfg.triplet_loss_lambda, margin))
            total_loss = model.train_loss + cfg.triplet_loss_lambda * tf.reduce_mean(
                metric_loss)
        elif cfg.train_mode == 'hard':
            pre_logits = model.train_pre_logits
            _, w, h, channels = pre_logits.shape
            embed_dim = cfg.emb_dim
            embedding_net = ConvEmbed(emb_dim=embed_dim,
                                      n_input=channels,
                                      n_h=h,
                                      n_w=w)
            embedding = embedding_net.forward(pre_logits)
            embedding = tf.nn.l2_normalize(embedding, axis=-1, epsilon=1e-10)
            margin = cfg.margin

            logger.info('Triplet loss lambda {}, with margin {}'.format(
                cfg.triplet_loss_lambda, margin))
            gt_lbls = tf.argmax(model.gt_lbls, 1)
            metric_loss = triplet_hard.batch_hard(gt_lbls, embedding, margin)
            total_loss = model.train_loss + cfg.triplet_loss_lambda * tf.reduce_mean(
                metric_loss)
        elif cfg.train_mode == 'cntr':

            pre_logits = model.train_pre_logits
            _, w, h, channels = pre_logits.shape
            embed_dim = cfg.emb_dim
            embedding_net = ConvEmbed(emb_dim=embed_dim,
                                      n_input=channels,
                                      n_h=h,
                                      n_w=w)
            embedding = embedding_net.forward(pre_logits)
            embedding = tf.nn.l2_normalize(embedding, axis=-1, epsilon=1e-10)
            CENTER_LOSS_LAMBDA = 0.003
            CENTER_LOSS_ALPHA = 0.5
            num_fg_classes = cfg.num_classes
            gt_lbls = tf.argmax(model.gt_lbls, 1)
            center_loss_order, centroids, centers_update_op, appear_times, diff = center_loss.get_center_loss(
                embedding, gt_lbls, CENTER_LOSS_ALPHA, num_fg_classes)
            # sample_centroid = tf.reshape(tf.gather(centroids, gt_lbls), [-1, config.emb_dim])
            # center_loss_order = center_loss.center_loss(sample_centroid , embedding)
            logger.info('Center loss lambda {}'.format(CENTER_LOSS_LAMBDA))
            total_loss = model.train_loss + CENTER_LOSS_LAMBDA * tf.reduce_mean(
                center_loss_order)

        elif cfg.train_mode == 'vanilla':
            total_loss = model.train_loss

        logger.info('Train Mode {}'.format(cfg.train_mode))
        # variables_to_train = model.var_2_train();
        # logger.info('variables_to_train  ' + str(variables_to_train))

        trainable_vars = tf.trainable_variables()
        if cfg.caffe_iter_size > 1:  ## Accumulated Gradient
            ## Creation of a list of variables with the same shape as the trainable ones
            # initialized with 0s
            accum_vars = [
                tf.Variable(tf.zeros_like(tv.initialized_value()),
                            trainable=False) for tv in trainable_vars
            ]
            zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars]

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if cfg.train_mode == const.Train_Mode.CNTR:
            update_ops.append(centers_update_op)

        # print(update_ops)

        with tf.control_dependencies(update_ops):

            global_step = tf.Variable(0, name='global_step', trainable=False)
            learning_rate = tf_utils.poly_lr(global_step, cfg)
            optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)

            if cfg.caffe_iter_size > 1:  ## Accumulated Gradient
                # grads = tf.Print(grads,[grads],'Grad Print');
                grads = optimizer.compute_gradients(total_loss, trainable_vars)
                # Adds to each element from the list you initialized earlier with zeros its gradient (works because accum_vars and gvs are in the same order)
                accum_ops = [
                    accum_vars[i].assign_add(gv[0])
                    for i, gv in enumerate(grads)
                ]
                iter_size = cfg.caffe_iter_size
                # Define the training step (part with variable value update)
                train_op = optimizer.apply_gradients(
                    [(accum_vars[i] / iter_size, gv[1])
                     for i, gv in enumerate(grads)],
                    global_step=global_step)

            else:
                grads = optimizer.compute_gradients(total_loss)
                train_op = optimizer.apply_gradients(grads,
                                                     global_step=global_step)

        sess = tf.InteractiveSession()
        training_iterator = train_dataset.make_one_shot_iterator()
        validation_iterator = val_dataset.make_initializable_iterator()
        training_handle = sess.run(training_iterator.string_handle())
        validation_handle = sess.run(validation_iterator.string_handle())

        tb_path = save_model_dir
        logger.info(tb_path)
        start_iter = tb_utils.get_latest_iteration(tb_path)

        train_writer = tf.summary.FileWriter(tb_path, sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver()  # saves variables learned during training

        ckpt_file = tf.train.latest_checkpoint(save_model_dir)
        logger.info('Model Path {}'.format(ckpt_file))
        load_model_msg = model.load_model(save_model_dir,
                                          ckpt_file,
                                          sess,
                                          saver,
                                          load_logits=False)
        logger.info(load_model_msg)

        ckpt_file = os.path.join(save_model_dir, cfg.checkpoint_filename)

        val_loss = tf.summary.scalar('Val_Loss', model.val_loss)
        val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy)
        model_acc_op = tf.summary.scalar('Split_Val_Accuracy',
                                         model.val_accumulated_accuracy)

        best_model_step = 0
        best_acc = 0
        logger.info('Start Training from {}, till {}'.format(
            start_iter, cfg.train_iters))
        # Start Training
        for step in range(start_iter + 1, cfg.train_iters + 1):

            start_time_train = time.time()

            # Update network weights while supporting caffe_iter_size
            for mini_batch in range(cfg.caffe_iter_size - 1):
                feed_dict = {handle: training_handle}
                model_loss_value, accuracy_value, _ = sess.run(
                    [model.train_loss, model.train_accuracy, accum_ops],
                    feed_dict)

            feed_dict = {handle: training_handle}
            model_loss_value, accuracy_value, _ = sess.run(
                [model.train_loss, model.train_accuracy, train_op], feed_dict)
            if cfg.caffe_iter_size > 1:  ## Accumulated Gradient
                sess.run(zero_ops)

            train_time = time.time() - start_time_train

            if (step == 1 or step % cfg.logging_threshold == 0):
                logger.info(
                    'i {0:04d} loss {1:4f} Acc {2:2f} Batch Time {3:3f}'.
                    format(step, model_loss_value, accuracy_value, train_time))

                if (step % cfg.test_interval == 0):
                    run_metadata = tf.RunMetadata()
                    tf.local_variables_initializer().run()
                    sess.run(validation_iterator.initializer)

                    _val_acc_op = 0
                    while True:
                        try:

                            # Eval network on validation/testing split
                            feed_dict = {handle: validation_handle}
                            val_loss_op, batch_accuracy, accuracy_op, _val_acc_op, _val_acc, c_cnf_mat, macro_acc = sess.run(
                                [
                                    val_loss, model.val_accuracy, model_acc_op,
                                    val_acc_op, model.val_accumulated_accuracy,
                                    model.val_confusion_mat,
                                    model.val_per_class_acc_acc
                                ], feed_dict)
                        except tf.errors.OutOfRangeError:
                            logger.info('Val Acc {0}, Macro Acc: {1}'.format(
                                _val_acc, macro_acc))
                            break

                    train_writer.add_run_metadata(run_metadata,
                                                  'step%03d' % step)
                    train_writer.add_summary(val_loss_op, step)
                    train_writer.add_summary(_val_acc_op, step)
                    train_writer.add_summary(accuracy_op, step)
                    train_writer.flush()

                    if (step % 100 == 0):
                        saver.save(sess, ckpt_file)
                        if best_acc < _val_acc:
                            saver.save(sess, ckpt_file + 'best')
                            best_acc = _val_acc
                            best_model_step = step

                        logger.info('Best Acc {0} at {1} == {2}'.format(
                            best_acc, best_model_step, model_basename))

        logger.info('Triplet loss lambda {}'.format(cfg.triplet_loss_lambda))
        logger.info('Mode {}'.format(cfg.train_mode))
        logger.info('Loop complete')
        sess.close()
def main(argv):

    cfg = BaseConfig().parse(argv)
    os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu

    img_generator_class = locate(cfg.db_tuple_loader)
    args = dict()
    args['db_path'] = cfg.db_path
    args['tuple_loader_queue_size'] = cfg.tuple_loader_queue_size
    args['preprocess_func'] = cfg.preprocess_func
    args['batch_size'] = cfg.batch_size
    args['shuffle'] = False
    args['img_size'] = const.max_frame_size
    args['gen_hot_vector'] = True
    args['csv_file'] = cfg.train_csv_file
    train_iter = img_generator_class(args)

    args['csv_file'] = cfg.test_csv_file
    val_iter = img_generator_class(args)

    train_imgs, train_lbls = train_iter.imgs_and_lbls()
    val_imgs, val_lbls = val_iter.imgs_and_lbls()

    # Where to save the trained model
    save_model_dir = cfg.checkpoint_dir
    model_basename = os.path.basename(save_model_dir)
    touch_dir(save_model_dir)


    ## Log experiment
    args_file = os.path.join(cfg.checkpoint_dir, 'args.json')
    with open(args_file, 'w') as f:
        json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True)
    # os_utils.touch_dir(save_model_dir)

    log_file = os.path.join(cfg.checkpoint_dir, cfg.log_filename + '.txt')
    os_utils.touch_dir(cfg.checkpoint_dir)

    logger = log_utils.create_logger(log_file)


    with tf.Graph().as_default():

        # Create train and val dataset following tensorflow Data API
        ## A dataset element has an image and lable
        train_dataset = TensorflowTupleLoader(train_imgs, train_lbls,cfg, is_training=True).dataset
        val_dataset = TensorflowTupleLoader(val_imgs, val_lbls,cfg, is_training=False, batch_size=cfg.batch_size,
                                       repeat=False).dataset

        handle = tf.placeholder(tf.string, shape=[])

        iterator = tf.data.Iterator.from_string_handle(
            handle, train_dataset.output_types, train_dataset.output_shapes)
        images_ph, lbls_ph = iterator.get_next()

        training_iterator = train_dataset.make_one_shot_iterator()
        validation_iterator = val_dataset.make_initializable_iterator()

        ## Load a pretrained network {resnet_v2 or densenet161} based on config.network_name configuration
        network_class = locate(cfg.network_name)
        model = network_class(cfg, is_training=True, images_ph=images_ph, lbls_ph=lbls_ph)


        trainable_vars = tf.trainable_variables()
        if cfg.caffe_iter_size > 1:  ## Accumulated Gradient
            ## Creation of a list of variables with the same shape as the trainable ones
            # initialized with 0s
            accum_vars = [tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in trainable_vars]
            zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars]

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):

            global_step = tf.Variable(0, name='global_step', trainable=False)
            learning_rate = tf_utils.poly_lr(global_step,cfg)
            optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)

            if cfg.caffe_iter_size > 1:  ## Accumulated Gradient

                grads = optimizer.compute_gradients(model.train_loss, trainable_vars)
                # Adds to each element from the list you initialized earlier with zeros its gradient (works because accum_vars and gvs are in the same order)
                accum_ops = [accum_vars[i].assign_add(gv[0]) for i, gv in enumerate(grads)]
                iter_size = cfg.caffe_iter_size
                # Define the training step (part with variable value update)
                train_op = optimizer.apply_gradients([(accum_vars[i] / iter_size, gv[1]) for i, gv in enumerate(grads)],
                                                     global_step=global_step)

            else: # If accumulated gradient disabled, do regular training

                grads = optimizer.compute_gradients(model.train_loss)
                train_op = optimizer.apply_gradients(grads, global_step=global_step)

        # logger.info('=========================================================')
        # for v in tf.trainable_variables():
        #     mprint('trainable_variables:  {0} \t {1}'.format(str(v.name),str(v.shape)))


        sess = tf.InteractiveSession()
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()

        training_handle = sess.run(training_iterator.string_handle())
        validation_handle = sess.run(validation_iterator.string_handle())


        # now = datetime.now()
        # if (config.tensorbaord_file == None):
        #     tb_path = config.tensorbaord_dir + now.strftime("%Y%m%d-%H%M%S")
        # else:
        #     tb_path = config.tensorbaord_dir + config.tensorbaord_file

        start_iter = 1 # No Resume in this code version

        # train_writer = tf.summary.FileWriter(tb_path, sess.graph)

        saver = tf.train.Saver()  # saves variables learned during training

        ckpt_file = os.path.join(save_model_dir, cfg.checkpoint_filename)
        print('Model Path ', ckpt_file)



        load_model_msg = model.load_model(save_model_dir, ckpt_file, sess, saver, is_finetuning=True)
        logger.info(load_model_msg)


        val_loss = tf.summary.scalar('Val_Loss', model.val_loss)
        val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy)
        model_acc_op = tf.summary.scalar('Split_Val_Accuracy', model.val_accumulated_accuracy)

        logger.info('Start Training ***********')
        best_acc = 0
        best_model_step = 0
        for current_iter in range(start_iter, cfg.train_iters+1):
            start_time_train = time.time()
            feed_dict = {handle: training_handle}

            ## Here is where training and backpropagation start

            # In case accumulated gradient enabled, i.e. config.caffe_iter_size > 1
            for mini_batch in range(cfg.caffe_iter_size - 1):
                sess.run(accum_ops, feed_dict)


            model_loss_value, accuracy_value, _ = sess.run([model.train_loss, model.train_accuracy, train_op],
                                                           feed_dict)

            # In case accumulated gradient enabled, reset shadow variables
            if cfg.caffe_iter_size > 1:
                sess.run(zero_ops)

            ## Here is where training and backpropagation end

            train_time = time.time() - start_time_train


            if (current_iter % cfg.logging_threshold == 0 or current_iter ==1):
                logger.info(
                    'i {0:04d} loss {1:4f} Acc {2:2f} Batch Time {3:3f}'.format(current_iter, model_loss_value, accuracy_value,
                                                                                train_time))

                if (current_iter % cfg.test_interval == 0):
                    # run_metadata = tf.RunMetadata()

                    tf.local_variables_initializer().run()
                    sess.run(validation_iterator.initializer)

                    while True:
                        try:
                            feed_dict = {handle: validation_handle}
                            val_loss_op, batch_accuracy, accuracy_op, _val_acc_op, _val_acc, c_cnf_mat = sess.run(
                                [val_loss, model.val_accuracy, model_acc_op, val_acc_op, model.val_accumulated_accuracy,
                                 model.val_confusion_mat], feed_dict)
                        except tf.errors.OutOfRangeError:
                            logger.info('Val Acc {0}'.format(_val_acc))
                            break



                    # train_writer.add_run_metadata(run_metadata, 'step%03d' % current_iter)
                    # train_writer.add_summary(val_loss_op, current_iter)
                    # train_writer.add_summary(_val_acc_op, current_iter)
                    # train_writer.add_summary(accuracy_op, current_iter)
                    #
                    # train_writer.flush()


                    if (current_iter % cfg.logging_threshold == 0):
                        saver.save(sess, ckpt_file)
                        if best_acc < _val_acc:
                            saver.save(sess, ckpt_file + 'best')
                            best_acc = _val_acc
                            best_model_step = current_iter
                        ## Early dropping style.
                        logger.info('Best Acc {0} at {1} == {2}'.format(best_acc, best_model_step, model_basename))

        saver.save(sess, ckpt_file)  ## Save final ckpt before closing
        sess.close()
Exemplo n.º 6
0
def main(argv):
    # Verify that parameters are set correctly.
    args = parser.parse_args(argv)

    if not os.path.exists(args.dataset):
        return

    # Possibly auto-generate the output filename.
    if args.filename is None:
        basename = os.path.basename(args.dataset)
        args.filename = os.path.splitext(basename)[0] + '_embeddings.h5'

    os_utils.touch_dir(os.path.join(args.experiment_root, args.foldername))

    log_file = os.path.join(args.experiment_root, args.foldername, "embed")
    logging.config.dictConfig(common.get_logging_dict(log_file))
    log = logging.getLogger('embed')

    args.filename = os.path.join(args.experiment_root, args.foldername,
                                 args.filename)
    var_filepath = os.path.join(args.experiment_root, args.foldername,
                                args.filename[:-3] + '_var.txt')
    # Load the args from the original experiment.
    args_file = os.path.join(args.experiment_root, 'args.json')

    if os.path.isfile(args_file):
        if not args.quiet:
            print('Loading args from {}.'.format(args_file))
        with open(args_file, 'r') as f:
            args_resumed = json.load(f)

        # Add arguments from training.
        for key, value in args_resumed.items():
            args.__dict__.setdefault(key, value)

        # A couple special-cases and sanity checks
        if (args_resumed['crop_augment']) == (args.crop_augment is None):
            print('WARNING: crop augmentation differs between training and '
                  'evaluation.')
        args.image_root = args.image_root or args_resumed['image_root']
    else:
        raise IOError(
            '`args.json` could not be found in: {}'.format(args_file))

    # Check a proper aggregator is provided if augmentation is used.
    if args.flip_augment or args.crop_augment == 'five':
        if args.aggregator is None:
            print(
                'ERROR: Test time augmentation is performed but no aggregator'
                'was specified.')
            exit(1)
    else:
        if args.aggregator is not None:
            print('ERROR: No test time augmentation that needs aggregating is '
                  'performed but an aggregator was specified.')
            exit(1)

    if not args.quiet:
        print('Evaluating using the following parameters:')
        for key, value in sorted(vars(args).items()):
            print('{}: {}'.format(key, value))

    # Load the data from the CSV file.
    _, data_fids = common.load_dataset(args.dataset, args.image_root)

    net_input_size = (args.net_input_height, args.net_input_width)
    pre_crop_size = (args.pre_crop_height, args.pre_crop_width)

    # Setup a tf Dataset containing all images.
    dataset = tf.data.Dataset.from_tensor_slices(data_fids)

    # Convert filenames to actual image tensors.
    dataset = dataset.map(lambda fid: common.fid_to_image(
        fid,
        tf.constant('dummy'),
        image_root=args.image_root,
        image_size=pre_crop_size if args.crop_augment else net_input_size),
                          num_parallel_calls=args.loading_threads)

    # Augment the data if specified by the arguments.
    # `modifiers` is a list of strings that keeps track of which augmentations
    # have been applied, so that a human can understand it later on.
    modifiers = ['original']
    if args.flip_augment:
        dataset = dataset.map(flip_augment)
        dataset = dataset.apply(tf.contrib.data.unbatch())
        modifiers = [o + m for m in ['', '_flip'] for o in modifiers]

    if args.crop_augment == 'center':
        dataset = dataset.map(lambda im, fid, pid:
                              (five_crops(im, net_input_size)[0], fid, pid))
        modifiers = [o + '_center' for o in modifiers]
    elif args.crop_augment == 'five':
        dataset = dataset.map(lambda im, fid, pid:
                              (tf.stack(five_crops(im, net_input_size)),
                               tf.stack([fid] * 5), tf.stack([pid] * 5)))
        dataset = dataset.apply(tf.contrib.data.unbatch())
        modifiers = [
            o + m for o in modifiers for m in [
                '_center', '_top_left', '_top_right', '_bottom_left',
                '_bottom_right'
            ]
        ]
    elif args.crop_augment == 'avgpool':
        modifiers = [o + '_avgpool' for o in modifiers]
    else:
        modifiers = [o + '_resize' for o in modifiers]

    emb_model = EmbeddingModel(args)

    # Group it back into PK batches.
    dataset = dataset.batch(args.batch_size)
    dataset = dataset.map(lambda im, fid, pid:
                          (emb_model.preprocess_input(im), fid, pid))
    # Overlap producing and consuming.
    dataset = dataset.prefetch(1)
    tf.keras.backend.set_learning_phase(0)

    with h5py.File(args.filename, 'w') as f_out:

        ckpt = tf.train.Checkpoint(step=tf.Variable(1), net=emb_model)
        manager = tf.train.CheckpointManager(ckpt,
                                             osp.join(args.experiment_root,
                                                      'tf_ckpts'),
                                             max_to_keep=1)
        ckpt.restore(manager.latest_checkpoint)
        if manager.latest_checkpoint:
            print("Restored from {}".format(manager.latest_checkpoint))
        else:
            print("Initializing from scratch.")

        emb_storage = np.zeros(
            (len(data_fids) * len(modifiers), args.embedding_dim), np.float32)

        # for batch_idx,batch in enumerate(dataset):
        dataset_iter = iter(dataset)
        for start_idx in count(step=args.batch_size):

            try:
                images, _, _ = next(dataset_iter)
                emb = emb_model(images)
                emb_storage[start_idx:start_idx + len(emb)] += emb
                print('\rEmbedded batch {}-{}/{}'.format(
                    start_idx, start_idx + len(emb), len(emb_storage)),
                      flush=True,
                      end='')
            except StopIteration:
                break  # This just indicates the end of the dataset.

        if not args.quiet:
            print("Done with embedding, aggregating augmentations...",
                  flush=True)

        if len(modifiers) > 1:
            # Pull out the augmentations into a separate first dimension.
            emb_storage = emb_storage.reshape(len(data_fids), len(modifiers),
                                              -1)
            emb_storage = emb_storage.transpose((1, 0, 2))  # (Aug,FID,128D)

            # Store the embedding of all individual variants too.
            emb_dataset = f_out.create_dataset('emb_aug', data=emb_storage)

            # Aggregate according to the specified parameter.
            emb_storage = AGGREGATORS[args.aggregator](emb_storage)

        # Store the final embeddings.
        emb_dataset = f_out.create_dataset('emb', data=emb_storage)

        # Store information about the produced augmentation and in case no crop
        # augmentation was used, if the images are resized or avg pooled.
        f_out.create_dataset('augmentation_types',
                             data=np.asarray(modifiers, dtype='|S'))
Exemplo n.º 7
0
def directories(cfg, state='train'):
    # Seleccionamos los directorios dependiendo del dataset

    if cfg.dataset == 'cub':
        dataset_dir = 'CUB_200_2011'
        dataset_file = 'cub_train'
        test_file ='cub_test'
    elif cfg.dataset == 'bags':
        dataset_dir = 'BAGS'
        dataset_file = 'bags_train'
        test_file ='bags_test'
    elif cfg.dataset == 'bags_40':
        dataset_dir = 'BAGS_40'
        dataset_file = 'bags_train'
        test_file ='bags_test'
    elif cfg.dataset == 'bags_40_v2':
        dataset_dir = 'BAGS_40_v2'
        dataset_file = 'bags_train'
        test_file ='bags_test'
    else:
        raise NotImplementedError('El dataset {} no existe'.format(cfg.dataset))

    cfg.dirs=Namespace()
    cfg.dirs.csv_file= os.path.join(const.dataset_dir, dataset_dir, dataset_file + '.csv')
    cfg.dirs.images= os.path.join(const.dataset_dir,dataset_dir, 'images')
    cfg.dirs.trained_models = const.trained_models_dir

    # Creamos el nombre para la almacenar la información del emb_modelo
    if cfg.model.fit.loss == "angular_loss":
        exp_name = [cfg.dataset, cfg.model.name, cfg.model.head,cfg.model.fit.optimizer,cfg.model.fit.loss, 'alpha_{}'.format(cfg.model.fit.alpha)]
    else:
        exp_name = [cfg.dataset, cfg.model.name, cfg.model.head,cfg.model.fit.optimizer,cfg.model.fit.loss, 'm_{}'.format(cfg.model.fit.margin)]
    cfg.model_name = '_'.join(exp_name)

    # El directorio de los checkpoint
    cfg.dirs.checkpoint = os.path.join(const.experiment_root_dir, cfg.model_name ,'tf_ckpts')
    os_utils.touch_dir(cfg.dirs.checkpoint)


    # El directorio de los modelos entrenados
    cfg.dirs.trained = os.path.join(const.trained_models_dir,cfg.model_name)
    os_utils.touch_dir(cfg.dirs.trained)

    # Definimos el directorio donde almacenar los datos de Tensorboard
    current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
    cfg.dirs.train_log = os.path.join(const.tensorboard_dir, cfg.model_name,'train', current_time)
    os_utils.touch_dir(cfg.dirs.train_log)
    cfg.dirs.test_log = os.path.join(const.tensorboard_dir, cfg.model_name,'test', current_time)
    os_utils.touch_dir(cfg.dirs.train_log)
    cfg.dirs.eval_log = os.path.join(const.tensorboard_dir, cfg.model_name,'eval', current_time)
    os_utils.touch_dir(cfg.dirs.eval_log)

    # Definimos la ubicación de los log
    cfg.dirs.logs = os.path.join(const.experiment_root_dir, "train")

    # Cuando vayamos a testear el modelo
    if state == 'test':
        cfg.dirs.embeddings= const.embeddings_dir
        cfg.dirs.embeddings_file = os.path.join(cfg.dirs.embeddings, cfg.model_name +".h5")
        cfg.dirs.test_file= os.path.join(const.dataset_dir, dataset_dir, test_file + '.csv')
        #Creamos el directorios
        os_utils.touch_dir(cfg.dirs.embeddings)
    if state == 'emb':
        cfg.dirs.embeddings= const.embeddings_dir
        cfg.dirs.embeddings_file = os.path.join(cfg.dirs.embeddings, cfg.model_name +".h5")
        cfg.dirs.emb_log = os.path.join(const.tensorboard_dir, cfg.model_name,'emb', current_time)
        os_utils.touch_dir(cfg.dirs.emb_log)
        #Creamos el directorios
        os_utils.touch_dir(cfg.dirs.embeddings)
    elif state =='production':
        if cfg.dataset == 'cub':
            labels_file = 'classes.txt'
        elif cfg.dataset == 'bags':
            labels_file = 'bags_labels.csv'
        elif cfg.dataset == 'bags_40':
            labels_file = 'bags_labels.csv'
        elif cfg.dataset == 'bags_40_v2':
            labels_file = 'bags_labels.csv'
        # Directorio para recuperar los nombres de las etiquetas de las imágenes

        cfg.dirs.labels_file= os.path.join(const.dataset_dir, dataset_dir, labels_file)
        cfg.dirs.embeddings= const.embeddings_dir
        cfg.dirs.embeddings_file = os.path.join(cfg.dirs.embeddings, cfg.model_name +".h5")
        cfg.dirs.cbir_log = os.path.join(const.tensorboard_dir, cfg.model_name,'cbir', current_time)
        os_utils.touch_dir(cfg.dirs.cbir_log)




    return cfg