Пример #1
0
    def test_is_tif(self):
        """Test identifying tif or vrt files as tif"""
        img_dir = op.join('test', 'fixtures')

        # tif with .tif extension identified as tif
        test_tif = op.join(img_dir, 'drone.tif')
        self.assertTrue(is_tif(test_tif))

        # vrt with .vrt extension identified as tif
        test_vrt = op.join(img_dir, 'drone.vrt')
        self.assertTrue(is_tif(test_vrt))

        # tif with no extension identified as tif
        with tempfile.TemporaryDirectory() as tmpdirname:
            test_tif_no_ext = op.join(tmpdirname, 'drone')
            shutil.copy(test_tif, test_tif_no_ext)
            self.assertTrue(is_tif(test_tif_no_ext))

        # vrt with no extension identified as tif
        with tempfile.TemporaryDirectory() as tmpdirname:
            test_vrt_no_ext = op.join(tmpdirname, 'drone')
            shutil.copy(test_vrt, test_vrt_no_ext)
            self.assertTrue(is_tif(test_vrt_no_ext))
Пример #2
0
def preview(dest_folder, number, classes, imagery, ml_type, imagery_offset,
            **kwargs):
    """Produce imagery examples for specified classes

    Parameters
    ------------
    dest_folder: str
        Folder to save labels and example tiles into
    number: int
        Number of preview images to download per class
    classes: list
        A list of classes for machine learning training. Each class is defined as a dict
        with two required properties:
          - name: class name
          - filter: A Mapbox GL Filter.
        See the README for more details
    imagery: str
        Imagery template to download satellite images from.
        Ex: http://a.tiles.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token=ACCESS_TOKEN
    ml_type: str
        Defines the type of machine learning. One of "classification", "object-detection", or "segmentation"
    imagery_offset: list
        An optional list of integers representing the number of pixels to offset imagery. Ex. [15, -5] will
        move the images 15 pixels right and 5 pixels up relative to the requested tile bounds
    **kwargs: dict
        Other properties from CLI config passed as keywords to other utility functions
    """
    # open labels file
    labels_file = op.join(dest_folder, 'labels.npz')
    tiles = np.load(labels_file)

    # create example tiles directory
    examples_dir = op.join(dest_folder, 'examples')
    if not op.isdir(examples_dir):
        makedirs(examples_dir)

    # find examples tiles for each class and download
    print('Writing example images to {}'.format(examples_dir))

    # get image acquisition function based on imagery string
    image_function = download_tile_tms
    if is_tif(imagery):
        image_function = get_tile_tif

    for i, cl in enumerate(classes):
        # create class directory
        class_dir = op.join(dest_folder, 'examples', cl.get('name'))
        if not op.isdir(class_dir):
            makedirs(class_dir)

        class_tiles = (t for t in tiles.files
                       if class_match(ml_type, tiles[t], i + 1))
        print('Downloading at most {} tiles for class {}'.format(
            number, cl.get('name')))
        for n, tile in enumerate(class_tiles):
            if n > number:
                break

            tile_img = image_function(tile, imagery, class_dir, imagery_offset)

            if ml_type == 'object-detection':
                img = Image.open(tile_img)
                draw = ImageDraw.Draw(img)
                for box in tiles[tile]:
                    draw.rectangle(((box[0], box[1]), (box[2], box[3])),
                                   outline='red')
                img.save(tile_img)
            elif ml_type == 'segmentation':
                final = Image.new('RGB', (256, 256))
                img = Image.open(tile_img)
                mask = Image.fromarray(tiles[tile] * 255)
                final.paste(img, mask)
                final.save(tile_img)
Пример #3
0
def package_directory(dest_folder,
                      classes,
                      imagery,
                      ml_type,
                      seed=False,
                      split_names=('train', 'test'),
                      split_vals=(0.8, .2),
                      **kwargs):
    """Generate an .npz file containing arrays for training machine learning algorithms

    Parameters
    ------------
    dest_folder: str
        Folder to save labels, tiles, and final numpy arrays into
    classes: list
        A list of classes for machine learning training. Each class is defined
        as a dict with two required properties:
          - name: class name
          - filter: A Mapbox GL Filter.
        See the README for more details
    imagery: str
        Imagery template to download satellite images from.
        Ex: http://a.tiles.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token=ACCESS_TOKEN
    ml_type: str
        Defines the type of machine learning. One of "classification",
        "object-detection", or "segmentation"
    seed: int
        Random generator seed. Optional, use to make results reproducible.
    split_vals: tuple
        Percentage of data to put in each catagory listed in split_names. Must
        be floats and must sum to one. Default: (0.8, 0.2)
    split_names: tupel
        Default: ('train', 'test')
        List of names for each subset of the data.
    **kwargs: dict
        Other properties from CLI config passed as keywords to other utility
        functions.
    """
    # if a seed is given, use it
    if seed:
        np.random.seed(seed)

    if len(split_names) != len(split_vals):
        raise ValueError('`split_names` and `split_vals` must be the same '
                         'length. Please update your config.')
    if not np.isclose(sum(split_vals), 1):
        raise ValueError(
            '`split_vals` must sum to one. Please update your config.')

    # open labels file, create tile array
    labels_file = op.join(dest_folder, 'labels.npz')
    labels = np.load(labels_file)
    tile_names = [tile for tile in labels.files]
    tile_names.sort()
    tiles = np.array(tile_names)
    np.random.shuffle(tiles)

    # find maximum number of features in advance so numpy shapes match
    if ml_type == 'object-detection':
        max_features = 0
        for tile in labels.files:
            features = len(labels[tile])
            if features > max_features:
                max_features = features

    x_vals = []
    y_vals = []

    # open the images and load those plus the labels into the final arrays
    if is_tif(imagery):  # if a TIF is provided, use jpg as tile format
        image_format = '.jpg'

    else:
        image_format = get_image_format(imagery, kwargs)

    for tile in tiles:
        image_file = op.join(dest_folder, 'tiles',
                             '{}{}'.format(tile, image_format))
        try:
            img = Image.open(image_file)
        except FileNotFoundError:
            # we often don't download images for each label (e.g. background tiles)
            continue
        except OSError:
            print('Couldn\'t open {}, skipping'.format(image_file))
            continue

        np_image = np.array(img)
        img.close()

        x_vals.append(np_image)
        if ml_type == 'classification':
            y_vals.append(labels[tile])
        elif ml_type == 'object-detection':
            # zero pad object-detection arrays
            cl = labels[tile]
            y_vals.append(
                np.concatenate((cl, np.zeros((max_features - len(cl), 5)))))
        elif ml_type == 'segmentation':
            y_vals.append(labels[tile][...,
                                       np.newaxis])  # Add grayscale channel

    # Convert lists to numpy arrays
    x_vals = np.array(x_vals, dtype=np.uint8)
    y_vals = np.array(y_vals, dtype=np.uint8)

    # Get number of data samples per split from the float proportions
    split_n_samps = [len(x_vals) * val for val in split_vals]

    if np.any(split_n_samps == 0):
        raise ValueError('Split must not generate zero samples per partition. '
                         'Change ratio of values in config file.')

    # Convert into a cumulative sum to get indices
    split_inds = np.cumsum(split_n_samps).astype(np.integer)

    # Exclude last index as `np.split` handles splitting without that value
    split_arrs_x = np.split(x_vals, split_inds[:-1])
    split_arrs_y = np.split(y_vals, split_inds[:-1])

    save_dict = {}

    for si, split_name in enumerate(split_names):
        save_dict['x_{}'.format(split_name)] = split_arrs_x[si]
        save_dict['y_{}'.format(split_name)] = split_arrs_y[si]

    np.savez(op.join(dest_folder, 'data.npz'), **save_dict)
    print('Saving packaged file to {}'.format(op.join(dest_folder,
                                                      'data.npz')))
Пример #4
0
def download_images(dest_folder,
                    classes,
                    imagery,
                    ml_type,
                    background_ratio,
                    imagery_offset=False,
                    **kwargs):
    """Download satellite images specified by a URL and a label.npz file
    Parameters
    ------------
    dest_folder: str
        Folder to save labels, tiles, and final numpy arrays into
    classes: list
        A list of classes for machine learning training. Each class is defined as a dict
        with two required properties:
          - name: class name
          - filter: A Mapbox GL Filter.
        See the README for more details
    imagery: str
        Imagery template to download satellite images from.
        Ex: http://a.tiles.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token=ACCESS_TOKEN
    ml_type: str
        Defines the type of machine learning. One of "classification", "object-detection", or "segmentation"
    background_ratio: float
        Determines the number of background images to download in single class problems. Ex. A value
        of 1 will download an equal number of background images to class images.
    imagery_offset: list
        An optional list of integers representing the number of pixels to offset imagery. Ex. [15, -5] will
        move the images 15 pixels right and 5 pixels up relative to the requested tile bounds
    **kwargs: dict
        Other properties from CLI config passed as keywords to other utility functions
    """
    # open labels file
    labels_file = op.join(dest_folder, 'labels.npz')
    tiles = np.load(labels_file)

    # create tiles directory
    tiles_dir = op.join(dest_folder, 'tiles')
    if not op.isdir(tiles_dir):
        makedirs(tiles_dir)

    # find tiles which have any matching class
    def class_test(value):
        """Determine if a label matches a given class index"""
        if ml_type == 'object-detection':
            return len(value)
        elif ml_type == 'segmentation':
            return np.sum(value) > 0
        elif ml_type == 'classification':
            return value[0] == 0
        return None

    class_tiles = [tile for tile in tiles.files if class_test(tiles[tile])]

    # for classification problems with a single class, we also get background
    # tiles up to len(class_tiles) * config.get('background_ratio')
    background_tiles = []
    limit = len(class_tiles) * background_ratio
    if ml_type == 'classification' and len(classes) == 1:
        background_tiles_full = [
            tile for tile in tiles.files if tile not in class_tiles
        ]
        shuffle(background_tiles_full)
        background_tiles = background_tiles_full[:limit]

    # download tiles
    tiles = class_tiles + background_tiles
    print('Downloading {} tiles to {}'.format(len(tiles), tiles_dir))

    # get image acquisition function based on imagery string
    image_function = download_tile_tms
    if is_tif(imagery):
        image_function = get_tile_tif

    for tile in tiles:
        image_function(tile, imagery, tiles_dir, imagery_offset)
Пример #5
0
def package_directory(dest_folder, classes, imagery, ml_type, seed=False, train_size=0.8, **kwargs):
    """Generate an .npz file containing arrays for training machine learning algorithms

    Parameters
    ------------
    dest_folder: str
        Folder to save labels, tiles, and final numpy arrays into
    classes: list
        A list of classes for machine learning training. Each class is defined as a dict
        with two required properties:
          - name: class name
          - filter: A Mapbox GL Filter.
        See the README for more details
    imagery: str
        Imagery template to download satellite images from.
        Ex: http://a.tiles.mapbox.com/v4/mapbox.satellite/{z}/{x}/{y}.jpg?access_token=ACCESS_TOKEN
    ml_type: str
        Defines the type of machine learning. One of "classification", "object-detection", or "segmentation"
    seed: int
        Random generator seed. Optional, use to make results reproducable.
    train_size: float
        Portion of the data to use in training, the remainder is used as test data (default 0.8)
    **kwargs: dict
        Other properties from CLI config passed as keywords to other utility functions
    """
    # if a seed is given, use it
    if seed:
        np.random.seed(seed)

    # open labels file, create tile array
    labels_file = op.join(dest_folder, 'labels.npz')
    labels = np.load(labels_file)
    tile_names = [tile for tile in labels.files]
    tile_names.sort()
    tiles = np.array(tile_names)
    np.random.shuffle(tiles)

    # find maximum number of features in advance so numpy shapes match
    if ml_type == 'object-detection':
        max_features = 0
        for tile in labels.files:
            features = len(labels[tile])
            if features > max_features:
                max_features = features

    x_vals = []
    y_vals = []

    # open the images and load those plus the labels into the final arrays
    o = urlparse(imagery)
    _, image_format = op.splitext(o.path)
    if is_tif(imagery): # if a TIF is provided, use jpg as tile format
        image_format = '.jpg'
    for tile in tiles:
        image_file = op.join(dest_folder, 'tiles', '{}{}'.format(tile, image_format))
        try:
            img = Image.open(image_file)
        except FileNotFoundError:
            # we often don't download images for each label (e.g. background tiles)
            continue
        except OSError:
            print('Couldn\'t open {}, skipping'.format(image_file))
            continue

        np_image = np.array(img)
        img.close()

        x_vals.append(np_image)
        if ml_type == 'classification':
            y_vals.append(labels[tile])
        elif ml_type == 'object-detection':
            # zero pad object-detection arrays
            cl = labels[tile]
            y_vals.append(np.concatenate((cl, np.zeros((max_features - len(cl), 5)))))
        elif ml_type == 'segmentation':
            y_vals.append(labels[tile][..., np.newaxis])  # Add grayscale channel

    # split into train and test
    split_index = int(len(x_vals) * train_size)

    # convert lists to numpy arrays
    x_vals = np.array(x_vals, dtype=np.uint8)
    y_vals = np.array(y_vals, dtype=np.uint8)

    print('Saving packaged file to {}'.format(op.join(dest_folder, 'data.npz')))
    np.savez(op.join(dest_folder, 'data.npz'),
             x_train=x_vals[:split_index, ...],
             y_train=y_vals[:split_index, ...],
             x_test=x_vals[split_index:, ...],
             y_test=y_vals[split_index:, ...])