Exemplo n.º 1
0
def test_aerial_dataset_creation(aerial_image_size, aerial_nb_labels):
    """Create a AerialImage dataset
    """
    d = AerialDataset(aerial_image_size)
    assert d.image_size == aerial_image_size
    assert d.get_nb_labels() == aerial_nb_labels
    assert d.get_nb_images() == 0
Exemplo n.º 2
0
def test_aerial_training_dataset_population(
    aerial_image_size,
    aerial_training_temp_dir,
    aerial_raw_sample,
    aerial_nb_images,
    nb_tiles_per_image,
    aerial_training_config,
    aerial_nb_labels,
    aerial_nb_output_training_images,
):
    """Populate a Aerial dataset
    """
    d = AerialDataset(aerial_image_size)
    d.populate(
        str(aerial_training_temp_dir),
        aerial_raw_sample,
        nb_images=aerial_nb_images,
        nb_tiles_per_image=nb_tiles_per_image,
    )
    d.save(str(aerial_training_config))
    assert d.get_nb_labels() == aerial_nb_labels
    assert d.get_nb_images() >= 0.1 * aerial_nb_output_training_images
    assert d.get_nb_images() <= aerial_nb_output_training_images + 3
    assert os.path.isfile(str(aerial_training_config))
    assert all(
        len(os.listdir(os.path.join(str(aerial_training_temp_dir), tmp_dir)))
        == d.get_nb_images() for tmp_dir in ["images", "labels"])
Exemplo n.º 3
0
def test_aerial_dataset_loading(aerial_tile_size, aerial_config,
                                aerial_nb_labels, aerial_nb_output_images):
    """Load images into a AerialImage dataset
    """
    d = AerialDataset(aerial_tile_size)
    d.load(aerial_config)
    assert d.get_nb_labels() == aerial_nb_labels
    assert d.get_nb_images() == aerial_nb_output_images
Exemplo n.º 4
0
def test_aerial_dataset_population(aerial_tile_size, aerial_temp_dir,
                                   aerial_raw_sample, aerial_nb_images,
                                   aerial_config, aerial_nb_labels,
                                   aerial_nb_output_images):
    """Populate a AerialImage dataset
    """
    d = AerialDataset(aerial_tile_size)
    d.populate(str(aerial_temp_dir),
               aerial_raw_sample,
               nb_images=aerial_nb_images)
    d.save(str(aerial_config))
    assert d.get_nb_labels() == aerial_nb_labels
    assert d.get_nb_images() == aerial_nb_output_images
    assert os.path.isfile(str(aerial_config))
    assert all(
        len(os.listdir(os.path.join(str(aerial_temp_dir), tmp_dir))) ==
        aerial_nb_output_images for tmp_dir in ["images", "labels"])
Exemplo n.º 5
0
def test_aerial_testing_dataset_population(
    aerial_image_size,
    aerial_testing_temp_dir,
    aerial_raw_sample,
    aerial_nb_images,
    aerial_testing_config,
    aerial_nb_labels,
    aerial_nb_output_testing_images,
):
    """Populate a Aerial dataset
    """
    d = AerialDataset(aerial_image_size)
    d.populate(str(aerial_testing_temp_dir),
               aerial_raw_sample,
               nb_images=aerial_nb_images,
               labelling=False)
    d.save(str(aerial_testing_config))
    assert d.get_nb_labels() == aerial_nb_labels
    assert d.get_nb_images() == aerial_nb_output_testing_images
    assert os.path.isfile(str(aerial_testing_config))
    assert (len(
        os.listdir(os.path.join(str(aerial_testing_temp_dir),
                                "images"))) == d.get_nb_images())
Exemplo n.º 6
0
def main(args):
    # Data path and repository management
    input_folder = utils.prepare_input_folder(args.datapath, args.dataset)
    prepro_folder = utils.prepare_preprocessed_folder(args.datapath,
                                                      args.dataset,
                                                      args.image_size)

    # Dataset creation
    if args.dataset == "mapillary":
        config_path = os.path.join(input_folder, "config_aggregate.json")
        train_dataset = MapillaryDataset(args.image_size, config_path)
        validation_dataset = MapillaryDataset(args.image_size, config_path)
        test_dataset = MapillaryDataset(args.image_size, config_path)
    elif args.dataset == "shapes":
        train_dataset = ShapeDataset(args.image_size)
        validation_dataset = ShapeDataset(args.image_size)
        test_dataset = ShapeDataset(args.image_size)
        os.makedirs(os.path.join(prepro_folder["testing"], "labels"),
                    exist_ok=True)
    elif args.dataset == "aerial":
        train_dataset = AerialDataset(args.image_size)
        validation_dataset = AerialDataset(args.image_size)
        test_dataset = AerialDataset(args.image_size)
    elif args.dataset == "tanzania":
        train_dataset = TanzaniaDataset(args.image_size)
        validation_dataset = TanzaniaDataset(args.image_size)
        test_dataset = TanzaniaDataset(args.image_size)
    else:
        logger.error(
            "Unsupported dataset type. Please choose amongst %s",
            AVAILABLE_DATASETS,
        )
        sys.exit(1)

    # Dataset populating/loading
    # (depends on the existence of a specification file)
    if args.nb_training_image > 0:
        if os.path.isfile(prepro_folder["training_config"]):
            train_dataset.load(prepro_folder["training_config"],
                               args.nb_training_image)
        else:
            logger.info((
                "No existing configuration file for this dataset. "
                "Create %s.",
                prepro_folder["training_config"],
            ))
            input_image_dir = os.path.join(input_folder, "training")
            train_dataset.populate(
                prepro_folder["training"],
                input_image_dir,
                nb_images=args.nb_training_image,
                nb_processes=int(config.get("running", "processes")),
            )
            train_dataset.save(prepro_folder["training_config"])

    if args.nb_validation_image > 0:
        if os.path.isfile(prepro_folder["validation_config"]):
            validation_dataset.load(prepro_folder["validation_config"],
                                    args.nb_validation_image)
        else:
            logger.info((
                "No existing configuration file for this dataset. "
                "Create %s.",
                prepro_folder["validation_config"],
            ))
            input_image_dir = os.path.join(input_folder, "validation")
            validation_dataset.populate(
                prepro_folder["validation"],
                input_image_dir,
                nb_images=args.nb_validation_image,
                nb_processes=int(config.get("running", "processes")),
            )
            validation_dataset.save(prepro_folder["validation_config"])

    if args.nb_testing_image > 0:
        if os.path.isfile(prepro_folder["testing_config"]):
            test_dataset.load(prepro_folder["testing_config"],
                              args.nb_testing_image)
        else:
            logger.info((
                "No existing configuration file for this dataset. "
                "Create %s.",
                prepro_folder["testing_config"],
            ))
            input_image_dir = os.path.join(input_folder, "testing")
            test_dataset.populate(
                prepro_folder["testing"],
                input_image_dir,
                nb_images=args.nb_testing_image,
                labelling=False,
                nb_processes=int(config.get("running", "processes")),
            )
            test_dataset.save(prepro_folder["testing_config"])

    glossary = pd.DataFrame(train_dataset.labels)
    glossary["popularity"] = train_dataset.get_label_popularity()
    logger.info("Data glossary:\n%s", glossary)
    sys.exit(0)
Exemplo n.º 7
0
def main(args):
    # Data path and repository management
    input_folder = utils.prepare_input_folder(args.datapath, args.dataset)
    prepro_folder = utils.prepare_preprocessed_folder(args.datapath,
                                                      args.dataset,
                                                      args.image_size)
    if (args.dataset in GEOGRAPHIC_DATASETS
            and (args.nb_training_image > 0 or args.nb_validation_image > 0)
            and args.nb_tiles_per_image is None):
        raise ValueError(
            "The amount of tiles per image must be specified for "
            f"the {args.dataset} dataset, if training and/or validation images "
            "are required. See 'deepo datagen -h' for more details.")

    # Dataset creation
    if args.dataset == "mapillary":
        config_path = os.path.join(input_folder, "config_aggregate.json")
        train_dataset = MapillaryDataset(args.image_size, config_path)
        validation_dataset = MapillaryDataset(args.image_size, config_path)
        test_dataset = MapillaryDataset(args.image_size, config_path)
    elif args.dataset == "shapes":
        train_dataset = ShapeDataset(args.image_size)
        validation_dataset = ShapeDataset(args.image_size)
        test_dataset = ShapeDataset(args.image_size)
        os.makedirs(os.path.join(prepro_folder["testing"], "labels"),
                    exist_ok=True)
    elif args.dataset == "aerial":
        train_dataset = AerialDataset(args.image_size)
        validation_dataset = AerialDataset(args.image_size)
        test_dataset = AerialDataset(args.image_size)
    elif args.dataset == "tanzania":
        train_dataset = TanzaniaDataset(args.image_size)
        validation_dataset = TanzaniaDataset(args.image_size)
        test_dataset = TanzaniaDataset(args.image_size)
    else:
        raise ValueError(
            f"Unsupported dataset type. Please choose amongst {AVAILABLE_DATASETS}"
        )

    # Dataset populating/loading
    # (depends on the existence of a specification file)
    if args.nb_training_image > 0:
        if os.path.isfile(prepro_folder["training_config"]):
            train_dataset.load(prepro_folder["training_config"],
                               args.nb_training_image)
        else:
            logger.info(
                "No existing configuration file for this dataset. Create %s.",
                prepro_folder["training_config"],
            )
            input_image_dir = os.path.join(input_folder, "training")
            train_dataset.populate(
                prepro_folder["training"],
                input_image_dir,
                nb_images=args.nb_training_image,
                nb_processes=int(config.get("running", "processes")),
                nb_tiles_per_image=args.nb_tiles_per_image,
            )
            train_dataset.save(prepro_folder["training_config"])

    if args.nb_validation_image > 0:
        if os.path.isfile(prepro_folder["validation_config"]):
            validation_dataset.load(prepro_folder["validation_config"],
                                    args.nb_validation_image)
        else:
            logger.info(
                "No existing configuration file for this dataset. Create %s.",
                prepro_folder["validation_config"],
            )
            input_image_dir = os.path.join(input_folder, "validation")
            validation_dataset.populate(
                prepro_folder["validation"],
                input_image_dir,
                nb_images=args.nb_validation_image,
                nb_processes=int(config.get("running", "processes")),
                nb_tiles_per_image=args.nb_tiles_per_image,
            )
            validation_dataset.save(prepro_folder["validation_config"])

    if args.nb_testing_image > 0:
        if os.path.isfile(prepro_folder["testing_config"]):
            test_dataset.load(prepro_folder["testing_config"],
                              args.nb_testing_image)
        else:
            logger.info(
                "No existing configuration file for this dataset. Create %s.",
                prepro_folder["testing_config"],
            )
            input_image_dir = os.path.join(input_folder, "testing")
            test_dataset.populate(
                prepro_folder["testing"],
                input_image_dir,
                nb_images=args.nb_testing_image,
                labelling=False,
                nb_processes=int(config.get("running", "processes")),
            )
            test_dataset.save(prepro_folder["testing_config"])

    glossary = pd.DataFrame(train_dataset.labels)
    glossary["popularity"] = train_dataset.get_label_popularity()
    logger.info("Data glossary:\n%s", glossary)
Exemplo n.º 8
0
    # Dataset creation
    if args.dataset == "mapillary":
        config_name = "config.json" if not args.aggregate_label else "config_aggregate.json"
        config_path = os.path.join(input_folder, config_name)
        train_dataset = MapillaryDataset(args.image_size, config_path)
        validation_dataset = MapillaryDataset(args.image_size, config_path)
        test_dataset = MapillaryDataset(args.image_size, config_path)
    elif args.dataset == "shapes":
        train_dataset = ShapeDataset(args.image_size)
        validation_dataset = ShapeDataset(args.image_size)
        test_dataset = ShapeDataset(args.image_size)
        os.makedirs(os.path.join(prepro_folder["testing"], "labels"),
                    exist_ok=True)
    elif args.dataset == "aerial":
        train_dataset = AerialDataset(args.image_size)
        validation_dataset = AerialDataset(args.image_size)
        test_dataset = AerialDataset(args.image_size)
    elif args.dataset == "tanzania":
        train_dataset = TanzaniaDataset(args.image_size)
        validation_dataset = TanzaniaDataset(args.image_size)
        test_dataset = TanzaniaDataset(args.image_size)
    else:
        logger.error("Unsupported dataset type. Please choose amongst %s",
                     AVAILABLE_DATASETS)
        sys.exit(1)

    # Dataset populating/loading (depends on the existence of a specification file)
    if args.nb_training_image > 0:
        if os.path.isfile(prepro_folder["training_config"]):
            train_dataset.load(prepro_folder["training_config"],