Exemplo n.º 1
0
def run_through_dataset(
    dataset, support: str = "-1->1", batch_size: int = 50, image_key: str = "image", model_name=""
):
    """

    :param dataset: DatasetMixin which contains the images.
    :param support: Support of images. One of '-1->1', '0->1' or '0->255'
    :param batch_size: The images numpy array is split into batches with batch size
                     batch_size. A reasonable batch size depends on the disposable hardware.
    :param image_key: Dataset key containing the image to be embedded
    :return: np.ndarray embedding.
    """

    dataset_length = len(dataset)
    if batch_size > dataset_length:
        print(Warning("Setting batch size to length of the dataset.."))
        batch_size = dataset_length

    batches = make_batches(dataset, batch_size, shuffle=False)
    n_batches = len(batches)
    n_used_imgs = n_batches * batch_size
    embeddings = []  # np.empty((n_used_imgs, 128))
    labels = []
    sess_config = tf.compat.v1.ConfigProto()
    sess_config.gpu_options.allow_growth = True
    session = tf.compat.v1.Session(config=sess_config)

    model = reIdModel(model_name=model_name)
    if os.path.basename(TRIP_CHECK) == "checkpoint-25000":
        initialize_model(model, TRIP_CHECK, session)

    for i, batch in enumerate(tqdm(batches, desc="reID")):
        if i >= n_batches:
            break
        images = retrieve(batch, image_key)
        labels_batch = retrieve(batch, "pose_pid")
        images = adjust_support(
            np.array(images), future_support="0->255", current_support=support, clip=True
        )
        images = images.astype(np.float32)[..., :3]

        batch_embeddings = get_embedding(model, session=session, image=images)["emb"]
        embeddings += [batch_embeddings.reshape(batch_size, -1)]
        labels += [labels_batch]
    batches.finalize()
    return np.array(embeddings), np.array(labels)
Exemplo n.º 2
0
def train(config, root, checkpoint=None, retrain=False):
    """Run training. Loads model, iterator and dataset according to config."""
    from edflow.iterators.batches import make_batches

    log.set_log_target("train")
    logger = log.get_logger("train")
    logger.info("Starting Training.")

    implementations = get_implementations_from_config(
        config, ["model", "iterator", "dataset"])
    logger.info("Instantiating dataset.")
    dataset = implementations["dataset"](config=config)
    dataset.expand = True
    logger.info("Number of training samples: {}".format(len(dataset)))
    if "validation_dataset" in config:
        use_validation_dataset = True
        implementations["validation_dataset"] = get_obj_from_str(
            config["validation_dataset"])
        logger.info("Instantiating validation dataset.")
        validation_dataset = implementations["validation_dataset"](
            config=config)
        logger.info("Number of validation samples: {}".format(
            len(validation_dataset)))
    else:
        use_validation_dataset = False

    n_processes = config.get("n_data_processes", min(16, config["batch_size"]))
    n_prefetch = config.get("n_prefetch", 1)
    batches = make_batches(
        dataset,
        batch_size=config["batch_size"],
        shuffle=True,
        n_processes=n_processes,
        n_prefetch=n_prefetch,
        error_on_timeout=config.get("error_on_timeout", False),
    )
    if use_validation_dataset:
        validation_batches = make_batches(
            validation_dataset,
            batch_size=config["batch_size"],
            shuffle=True,
            n_processes=n_processes,
            n_prefetch=n_prefetch,
            error_on_timeout=config.get("error_on_timeout", False),
        )
    else:
        validation_batches = None
    try:
        if "num_steps" in config:
            # set number of epochs to perform at least num_steps steps
            steps_per_epoch = len(dataset) / config["batch_size"]
            num_epochs = config["num_steps"] / steps_per_epoch
            config["num_epochs"] = math.ceil(num_epochs)
        else:
            steps_per_epoch = len(dataset) / config["batch_size"]
            num_steps = config["num_epochs"] * steps_per_epoch
            config["num_steps"] = math.ceil(num_steps)

        logger.info("Instantiating model.")
        Model = implementations["model"](config)
        if not "hook_freq" in config:
            config["hook_freq"] = 1
        compat_kwargs = dict(hook_freq=config["hook_freq"],
                             num_epochs=config["num_epochs"])
        logger.info("Instantiating iterator.")
        Trainer = implementations["iterator"](config,
                                              root,
                                              Model,
                                              dataset=dataset,
                                              **compat_kwargs)

        logger.info("Initializing model.")
        if checkpoint is not None:
            Trainer.initialize(checkpoint_path=checkpoint)
        else:
            Trainer.initialize()

        if retrain:
            Trainer.reset_global_step()

        # save current config
        logger.info("Starting Training with config:\n{}".format(
            yaml.dump(config)))
        cpath = _save_config(config, prefix="train")
        logger.info("Saved config at {}".format(cpath))

        logger.info("Iterating.")
        Trainer.iterate(batches, validation_batches)
    finally:
        batches.finalize()
        if use_validation_dataset:
            validation_batches.finalize()
Exemplo n.º 3
0
def test(config, root, checkpoint=None, nogpu=False, bar_position=0):
    """Run tests. Loads model, iterator and dataset from config."""
    from edflow.iterators.batches import make_batches

    log.set_log_target("latest_eval")
    logger = log.get_logger("test")
    logger.info("Starting Evaluation.")

    if "test_batch_size" in config:
        config["batch_size"] = config["test_batch_size"]
    if "test_mode" not in config:
        config["test_mode"] = True

    implementations = get_implementations_from_config(
        config, ["model", "iterator", "dataset"])

    dataset = implementations["dataset"](config=config)
    dataset.expand = True
    logger.info("Number of testing samples: {}".format(len(dataset)))
    n_processes = config.get("n_data_processes", min(16, config["batch_size"]))
    n_prefetch = config.get("n_prefetch", 1)
    batches = make_batches(
        dataset,
        batch_size=config["batch_size"],
        shuffle=False,
        n_processes=n_processes,
        n_prefetch=n_prefetch,
        error_on_timeout=config.get("error_on_timeout", False),
    )

    try:
        logger.info("Initializing model.")
        Model = implementations["model"](config)

        config["hook_freq"] = 1
        config["num_epochs"] = 1
        config["nogpu"] = nogpu
        compat_kwargs = dict(
            hook_freq=config["hook_freq"],
            bar_position=bar_position,
            nogpu=config["nogpu"],
            num_epochs=config["num_epochs"],
        )
        Evaluator = implementations["iterator"](config,
                                                root,
                                                Model,
                                                dataset=dataset,
                                                **compat_kwargs)

        logger.info("Initializing model.")
        if checkpoint is not None:
            Evaluator.initialize(checkpoint_path=checkpoint)
        else:
            Evaluator.initialize()

        # save current config
        logger.info("Starting Evaluation with config:\n{}".format(
            yaml.dump(config)))
        prefix = "eval"
        if bar_position > 0:
            prefix = prefix + str(bar_position)
        cpath = _save_config(config, prefix=prefix)
        logger.info("Saved config at {}".format(cpath))

        logger.info("Iterating")
        while True:
            Evaluator.iterate(batches)
            if not config.get("eval_forever", False):
                break
    finally:
        batches.finalize()
Exemplo n.º 4
0
def get_activations_from_dset(dset,
                              imsupport,
                              sess,
                              batch_size=50,
                              imkey='image',
                              verbose=False):
    """Calculates the activations of the pool_3 layer for all images.

    Params:
    -- dset        : DatasetMixin which contains the images.
    -- imsupport   : Support of images. One of '-1->1', '0->1' or '0->255'
    -- sess        : current session
    -- batch_size  : the images numpy array is split into batches with batch size
                     batch_size. A reasonable batch size depends on the disposable hardware.
    -- imkey      : Key at which the images can be found in each example.
    -- verbose    : If set to True and parameter out_step is given, the number of calculated
                     batches is reported.
    Returns:
    -- A numpy array of dimension (num images, 2048) that contains the
       activations of the given tensor when feeding inception with the query tensor.
    """
    inception_layer = _get_inception_layer(sess)
    d0 = len(dset)
    if batch_size > d0:
        print(
            "warning: batch size is bigger than the data size. setting batch size to data size"
        )
        batch_size = d0

    batches = make_batches(dset, batch_size, shuffle=False)
    n_batches = len(batches)
    n_used_imgs = n_batches * batch_size
    pred_arr = np.empty((n_used_imgs, 2048))

    print('d0', d0)
    print('n_batches', n_batches)
    print('n_ui', n_used_imgs)

    for i, batch in enumerate(tqdm(batches, desc='FID')):
        if i >= n_batches:
            break
        if verbose:
            print("\rPropagating batch %d/%d" % (i + 1, n_batches),
                  end="",
                  flush=True)
        start = i * batch_size
        end = start + batch_size
        images = retrieve(batch, imkey)
        images = adjust_support(np.array(images),
                                future_support='0->255',
                                current_support=imsupport,
                                clip=True)

        if len(images.shape) == 3:
            images = images[:, :, :, None]
            images = np.tile(images, [1, 1, 1, 3])
        elif images.shape[-1] == 1:
            images = np.tile(images, [1, 1, 1, 3])

        images = images.astype(np.float32)[..., :3]

        if len(pred_arr[start:end]) == 0:
            continue

        pred = sess.run(inception_layer,
                        {'FID_Inception_Net/ExpandDims:0': images})
        pred_arr[start:end] = pred.reshape(batch_size, -1)
        del batch  # clean up memory
    batches.finalize()
    if verbose:
        print(" done")
    return pred_arr
Exemplo n.º 5
0
Arquivo: main.py Projeto: jhaux/edflow
def _train(config, root, checkpoint=None, retrain=False):
    """Run training. Loads model, iterator and dataset according to config."""
    from edflow.iterators.batches import make_batches

    LogSingleton().set_default("train")
    logger = get_logger("train")
    logger.info("Starting Training.")

    implementations = get_implementations_from_config(
        config, ["model", "iterator", "dataset"]
    )

    # fork early to avoid taking all the crap into forked processes
    logger.info("Instantiating dataset.")
    dataset = implementations["dataset"](config=config)
    dataset.expand = True
    logger.info("Number of training samples: {}".format(len(dataset)))
    n_processes = config.get("n_data_processes", min(16, config["batch_size"]))
    n_prefetch = config.get("n_prefetch", 1)
    with make_batches(
        dataset,
        batch_size=config["batch_size"],
        shuffle=True,
        n_processes=n_processes,
        n_prefetch=n_prefetch,
        error_on_timeout=config.get("error_on_timeout", False),
    ) as batches:
        # get them going
        logger.info("Warm up batches.")
        next(batches)
        batches.reset()
        logger.info("Reset batches.")

        if "num_steps" in config:
            # set number of epochs to perform at least num_steps steps
            steps_per_epoch = len(dataset) / config["batch_size"]
            num_epochs = config["num_steps"] / steps_per_epoch
            config["num_epochs"] = math.ceil(num_epochs)
        else:
            steps_per_epoch = len(dataset) / config["batch_size"]
            num_steps = config["num_epochs"] * steps_per_epoch
            config["num_steps"] = math.ceil(num_steps)

        logger.info("Instantiating model.")
        Model = implementations["model"](config)
        if not "hook_freq" in config:
            config["hook_freq"] = 1
        compat_kwargs = dict(
            hook_freq=config["hook_freq"], num_epochs=config["num_epochs"]
        )
        logger.info("Instantiating iterator.")
        Trainer = implementations["iterator"](
            config, root, Model, dataset=dataset, **compat_kwargs
        )

        logger.info("Initializing model.")
        if checkpoint is not None:
            Trainer.initialize(checkpoint_path=checkpoint)
        else:
            Trainer.initialize()

        if retrain:
            Trainer.reset_global_step()

        # save current config
        logger.info("Starting Training with config:\n{}".format(yaml.dump(config)))
        cpath = _save_config(config, prefix="train")
        logger.info("Saved config at {}".format(cpath))

        logger.info("Iterating.")
        Trainer.iterate(batches)
Exemplo n.º 6
0
def train(config, root, checkpoint=None, retrain=False, debug=False):
    """Run training. Loads model, iterator and dataset according to config."""
    from edflow.iterators.batches import make_batches

    # disable integrations in debug mode
    if debug:
        if retrieve(config, "debug/disable_integrations", default=True):
            integrations = retrieve(config, "integrations", default=dict())
            for k in integrations:
                config["integrations"][k]["active"] = False
        max_steps = retrieve(config, "debug/max_steps", default=5 * 2)
        if max_steps > 0:
            config["num_steps"] = max_steps

    # backwards compatibility
    if not "datasets" in config:
        config["datasets"] = {"train": config["dataset"]}
        if "validation_dataset" in config:
            config["datasets"]["validation"] = config["validation_dataset"]

    log.set_log_target("train")
    logger = log.get_logger("train")
    logger.info("Starting Training.")

    model = get_obj_from_str(config["model"])
    iterator = get_obj_from_str(config["iterator"])
    datasets = dict(
        (split, get_obj_from_str(config["datasets"][split]))
        for split in config["datasets"]
    )

    logger.info("Instantiating datasets.")
    for split in datasets:
        datasets[split] = datasets[split](config=config)
        datasets[split].expand = True
        logger.info("{} dataset size: {}".format(split, len(datasets[split])))
        if debug:
            max_examples = retrieve(
                config, "debug/max_examples", default=5 * config["batch_size"]
            )
            if max_examples > 0:
                logger.info(
                    "Monkey patching {} dataset __len__ to {} examples".format(
                        split, max_examples
                    )
                )
                type(datasets[split]).__len__ = lambda self: max_examples

    n_processes = config.get("n_data_processes", min(16, config["batch_size"]))
    n_prefetch = config.get("n_prefetch", 1)
    logger.info("Building batches.")
    batches = dict()
    for split in datasets:
        batches[split] = make_batches(
            datasets[split],
            batch_size=config["batch_size"],
            shuffle=True,
            n_processes=n_processes,
            n_prefetch=n_prefetch,
            error_on_timeout=config.get("error_on_timeout", False),
        )
    main_split = "train"
    try:
        if "num_steps" in config:
            # set number of epochs to perform at least num_steps steps
            steps_per_epoch = len(datasets[main_split]) / config["batch_size"]
            num_epochs = config["num_steps"] / steps_per_epoch
            config["num_epochs"] = math.ceil(num_epochs)
        else:
            steps_per_epoch = len(datasets[main_split]) / config["batch_size"]
            num_steps = config["num_epochs"] * steps_per_epoch
            config["num_steps"] = math.ceil(num_steps)

        logger.info("Instantiating model.")
        model = model(config)
        if not "hook_freq" in config:
            config["hook_freq"] = 1
        compat_kwargs = dict(
            hook_freq=config["hook_freq"], num_epochs=config["num_epochs"]
        )
        logger.info("Instantiating iterator.")
        iterator = iterator(config, root, model, datasets=datasets, **compat_kwargs)

        logger.info("Initializing model.")
        if checkpoint is not None:
            iterator.initialize(checkpoint_path=checkpoint)
        else:
            iterator.initialize()

        if retrain:
            iterator.reset_global_step()

        # save current config
        logger.info("Starting Training with config:\n{}".format(yaml.dump(config)))
        cpath = _save_config(config, prefix="train")
        logger.info("Saved config at {}".format(cpath))

        logger.info("Iterating.")
        iterator.iterate(batches)
    finally:
        for split in batches:
            batches[split].finalize()
Exemplo n.º 7
0
def test(config, root, checkpoint=None, nogpu=False, bar_position=0, debug=False):
    """Run tests. Loads model, iterator and dataset from config."""
    from edflow.iterators.batches import make_batches

    # backwards compatibility
    if not "datasets" in config:
        config["datasets"] = {"train": config["dataset"]}
        if "validation_dataset" in config:
            config["datasets"]["validation"] = config["validation_dataset"]

    log.set_log_target("latest_eval")
    logger = log.get_logger("test")
    logger.info("Starting Evaluation.")

    if "test_batch_size" in config:
        config["batch_size"] = config["test_batch_size"]
    if "test_mode" not in config:
        config["test_mode"] = True

    model = get_obj_from_str(config["model"])
    iterator = get_obj_from_str(config["iterator"])
    datasets = dict(
        (split, get_obj_from_str(config["datasets"][split]))
        for split in config["datasets"]
    )

    logger.info("Instantiating datasets.")
    for split in datasets:
        datasets[split] = datasets[split](config=config)
        datasets[split].expand = True
        logger.info("{} dataset size: {}".format(split, len(datasets[split])))
        if debug:
            max_examples = retrieve(
                config, "debug/max_examples", default=5 * config["batch_size"]
            )
            if max_examples > 0:
                logger.info(
                    "Monkey patching {} dataset __len__ to {} examples".format(
                        split, max_examples
                    )
                )
                type(datasets[split]).__len__ = lambda self: max_examples

    n_processes = config.get("n_data_processes", min(16, config["batch_size"]))
    n_prefetch = config.get("n_prefetch", 1)
    logger.info("Building batches.")
    batches = dict()
    for split in datasets:
        batches[split] = make_batches(
            datasets[split],
            batch_size=config["batch_size"],
            shuffle=False,
            n_processes=n_processes,
            n_prefetch=n_prefetch,
            error_on_timeout=config.get("error_on_timeout", False),
        )
    try:
        logger.info("Initializing model.")
        model = model(config)

        config["hook_freq"] = 1
        config["num_epochs"] = 1
        config["nogpu"] = nogpu
        compat_kwargs = dict(
            hook_freq=config["hook_freq"],
            bar_position=bar_position,
            nogpu=config["nogpu"],
            num_epochs=config["num_epochs"],
        )
        iterator = iterator(config, root, model, datasets=datasets, **compat_kwargs)

        logger.info("Initializing model.")
        if checkpoint is not None:
            iterator.initialize(checkpoint_path=checkpoint)
        else:
            iterator.initialize()

        # save current config
        logger.info("Starting Evaluation with config:\n{}".format(yaml.dump(config)))
        prefix = "eval"
        if bar_position > 0:
            prefix = prefix + str(bar_position)
        cpath = _save_config(config, prefix=prefix)
        logger.info("Saved config at {}".format(cpath))

        logger.info("Iterating")
        while True:
            iterator.iterate(batches)
            if not config.get("eval_forever", False):
                break
    finally:
        for split in batches:
            batches[split].finalize()
Exemplo n.º 8
0
        def log_op():
            return {
                'scalars': {
                    'loss': loss,
                    'kl': kl_loss,
                    'det': log_det_loss
                }
            }

        return {'train_op': train_op, 'eval_op': eval_op, 'log_op': log_op}

    def save(self, path):
        pass


# In[19]:

P = init_project('logs', code_root=None, postfix='tflow')

Trainer = Iterator({'test_mode': False}, P.root, TM1, Moon, num_epochs=5)

# In[20]:

Trainer.iterate(make_batches(Moon, batch_size=64, shuffle=True))

del P

# In[ ]:

# In[ ]: