예제 #1
0
def gen_label_samples(np_label, dist_samples, tile_size):
    h, w = np_label.shape
    for row in range(0, h, dist_samples):
        for column in range(0, w, dist_samples):
            target = np_label[row:row + tile_size, column:column + tile_size]
            target_row = target.shape[0]
            target_col = target.shape[1]
            if target_row < tile_size or target_col < tile_size:
                padding = pad_diff(
                    target_row, target_col, tile_size, tile_size
                )  # array, actual height, actual width, desired size
                target = pad(target, padding, fill=-1)
            indices = (row, column)
            yield target, indices
예제 #2
0
def gen_img_samples(rst_pth, tile_size, dist_samples, *band_order):
    with rasterio.open(rst_pth) as src:
        for row in range(0, src.height, dist_samples):
            for column in range(0, src.width, dist_samples):
                window = Window.from_slices(slice(row, row + tile_size),
                                            slice(column, column + tile_size))
                if band_order:
                    window_array = reshape_as_image(
                        src.read(band_order[0], window=window))
                else:
                    window_array = reshape_as_image(src.read(window=window))

                if window_array.shape[0] < tile_size or window_array.shape[
                        1] < tile_size:
                    padding = pad_diff(window_array.shape[0],
                                       window_array.shape[1], tile_size,
                                       tile_size)
                    window_array = pad(window_array, padding, fill=np.nan)

                yield window_array
예제 #3
0
def samples_preparation(in_img_array,
                        label_array,
                        sample_size,
                        overlap,
                        samples_count,
                        num_classes,
                        samples_file,
                        val_percent,
                        val_sample_file,
                        dataset,
                        pixel_classes,
                        image_metadata=None,
                        dontcare=0,
                        min_annot_perc=None,
                        class_prop=None):
    """
    Extract and write samples from input image and reference image
    :param in_img_array: numpy array of the input image
    :param label_array: numpy array of the annotation image
    :param sample_size: (int) Size (in pixel) of the samples to create # TODO: could there be a different sample size for tst dataset? shows results closer to inference
    :param overlap: (int) Desired overlap between samples in %
    :param samples_count: (dict) Current number of samples created (will be appended and return)
    :param num_classes: (dict) Number of classes in reference data (will be appended and return)
    :param samples_file: (hdf5 dataset) hdfs file where samples will be written
    :param val_percent: (int) percentage of validation samples
    :param val_sample_file: (hdf5 dataset) hdfs file where samples will be written (val)
    :param dataset: (str) Type of dataset where the samples will be written. Can be 'trn' or 'val' or 'tst'
    :param pixel_classes: (dict) samples pixel statistics
    :param image_metadata: (dict) metadata associated to source raster
    :param dontcare: Value in gpkg features that will ignored during training
    :param min_annot_perc: optional, minimum annotated percent required for sample to be created
    :param class_prop: optional, minimal proportion of pixels for each class required for sample to be created
    :return: updated samples count and number of classes.
    """

    # read input and reference images as array
    h, w, num_bands = in_img_array.shape
    if dataset == 'trn':
        idx_samples = samples_count['trn']
        append_to_dataset(val_sample_file["metadata"], repr(image_metadata))
    elif dataset == 'tst':
        idx_samples = samples_count['tst']
    else:
        raise ValueError(
            f"Dataset value must be trn or tst. Provided value is {dataset}")

    idx_samples_v = samples_count['val']

    # Adds raster metadata to the dataset. All samples created by tiling below will point to that metadata by index
    metadata_idx = append_to_dataset(samples_file["metadata"],
                                     repr(image_metadata))

    if overlap > 25:
        warnings.warn(
            "high overlap >25%, note that automatic train/val split creates very similar samples in both sets"
        )
    dist_samples = round(sample_size * (1 - (overlap / 100)))
    added_samples = 0
    excl_samples = 0

    with tqdm(
            range(0, h, dist_samples),
            position=1,
            leave=True,
            desc=f'Writing samples. Dataset currently contains {idx_samples} '
            f'samples') as _tqdm:

        for row in _tqdm:
            for column in range(0, w, dist_samples):
                data = (in_img_array[row:row + sample_size,
                                     column:column + sample_size, :])
                target = np.squeeze(
                    label_array[row:row + sample_size,
                                column:column + sample_size, :],
                    axis=2)
                data_row = data.shape[0]
                data_col = data.shape[1]
                if data_row < sample_size or data_col < sample_size:
                    padding = pad_diff(
                        data_row, data_col, sample_size
                    )  # array, actual height, actual width, desired size
                    data = pad(
                        data, padding, fill=np.nan
                    )  # don't fill with 0 if possible. Creates false min value when scaling.

                target_row = target.shape[0]
                target_col = target.shape[1]
                if target_row < sample_size or target_col < sample_size:
                    padding = pad_diff(
                        target_row, target_col, sample_size
                    )  # array, actual height, actual width, desired size
                    target = pad(target, padding, fill=dontcare)
                u, count = np.unique(target, return_counts=True)
                target_background_percent = round(
                    count[0] / np.sum(count) * 100 if 0 in u else 0, 1)

                sample_metadata = {'sample_indices': (row, column)}

                val = False
                if minimum_annotated_percent(target_background_percent, min_annot_perc) and \
                        class_proportion(target, sample_size, class_prop):
                    val = add_to_datasets(dataset=dataset,
                                          samples_file=samples_file,
                                          val_percent=val_percent,
                                          val_sample_file=val_sample_file,
                                          data=data,
                                          target=target,
                                          sample_metadata=sample_metadata,
                                          metadata_idx=metadata_idx,
                                          dict_classes=pixel_classes)
                    if val:
                        idx_samples_v += 1
                    else:
                        idx_samples += 1
                    added_samples += 1
                else:
                    excl_samples += 1

                target_class_num = np.max(u)
                if num_classes < target_class_num:
                    num_classes = target_class_num

                final_dataset = 'val' if val else dataset
                _tqdm.set_postfix(
                    Dataset=final_dataset,
                    Excld_samples=excl_samples,
                    Added_samples=
                    f'{added_samples}/{len(_tqdm) * len(range(0, w, dist_samples))}',
                    Target_annot_perc=100 - target_background_percent)

    assert added_samples > 0, "No sample added for current raster. Problems may occur with use of metadata"
    if dataset == 'tst':
        samples_count['tst'] = idx_samples
    else:
        samples_count['trn'] = idx_samples
        samples_count['val'] = idx_samples_v
    # return the appended samples count and number of classes.
    return samples_count, num_classes
예제 #4
0
def segmentation(raster, clip_gpkg, model, sample_size, num_bands, device):
    # switch to evaluate mode
    model.eval()
    img_array, input_image, dataset_nodata = image_reader_as_array(
        input_image=raster, clip_gpkg=clip_gpkg)
    metadata = add_metadata_from_raster_to_sample(img_array,
                                                  input_image,
                                                  meta_map=None,
                                                  raster_info=None)
    h, w, bands = img_array.shape
    assert num_bands <= bands, f"Num of specified bands is not compatible with image shape {img_array.shape}"
    if num_bands < bands:
        img_array = img_array[:, :, :num_bands]
    h_ = sample_size * math.ceil(h / sample_size)
    w_ = sample_size * math.ceil(w / sample_size)
    pred_img = np.empty((h_, w_), dtype=np.uint8)
    for row in tqdm(range(0, h, sample_size),
                    position=1,
                    leave=False,
                    desc='Inferring rows'):
        with tqdm(range(0, w, sample_size),
                  position=2,
                  leave=False,
                  desc='Inferring columns') as _tqdm:
            for column in _tqdm:
                sample = {'sat_img': None, 'metadata': None}
                sample['metadata'] = metadata
                totensor_transform = augmentation.compose_transforms(
                    params, dataset="tst", type='totensor')
                sub_images = img_array[row:row + sample_size,
                                       column:column + sample_size, :]
                sub_images_row = sub_images.shape[0]
                sub_images_col = sub_images.shape[1]

                if sub_images_row < sample_size or sub_images_col < sample_size:
                    padding = pad_diff(actual_height=sub_images_row,
                                       actual_width=sub_images_col,
                                       desired_shape=sample_size)
                    sub_images = pad(
                        sub_images, padding, fill=0
                    )  # FIXME combine pad and pad_diff into one function
                sample['sat_img'] = sub_images
                sample = totensor_transform(sample)
                inputs = sample['sat_img'].unsqueeze_(0)
                inputs = inputs.to(device)

                if inputs.shape[1] == 4 and any(
                        "module.modelNIR" in s
                        for s in model.state_dict().keys()):
                    ############################
                    # Test Implementation of the NIR
                    ############################
                    # Init NIR   TODO: make a proper way to read the NIR channel
                    #                  and put an option to be able to give the idex of the NIR channel
                    # Extract the NIR channel -> [batch size, H, W] since it's only one channel
                    inputs_NIR = inputs[:, -1, ...]
                    # add a channel to get the good size -> [:, 1, :, :]
                    inputs_NIR.unsqueeze_(1)
                    # take out the NIR channel and take only the RGB for the inputs
                    inputs = inputs[:, :-1, ...]
                    # Suggestion of implementation
                    #inputs_NIR = data['NIR'].to(device)
                    inputs = [inputs, inputs_NIR]
                    #outputs = model(inputs, inputs_NIR)
                    ############################
                    # End of the test implementation module
                    ############################

                outputs = model(inputs)
                # torchvision models give output in 'out' key. May cause problems in future versions of torchvision.
                if isinstance(outputs,
                              OrderedDict) and 'out' in outputs.keys():
                    outputs = outputs['out']
                outputs = F.softmax(
                    outputs, dim=1).argmax(dim=1).squeeze(dim=0).cpu().numpy()

                pred_img[row:row + sample_size,
                         column:column + sample_size] = outputs

    return pred_img[:h, :w]
예제 #5
0
def samples_preparation(in_img_array,
                        label_array,
                        sample_size,
                        overlap,
                        samples_count,
                        num_classes,
                        samples_file,
                        val_percent,
                        val_sample_file,
                        dataset,
                        pixel_classes,
                        dontcare,
                        image_metadata=None,
                        min_annot_perc=None,
                        class_prop=None,
                        stratd=None):
    """
    Extract and write samples from input image and reference image
    :param in_img_array: numpy array of the input image
    :param label_array: numpy array of the annotation image
    :param sample_size: (int) Size (in pixel) of the samples to create # TODO: could there be a different sample size for tst dataset? shows results closer to inference
    :param overlap: (int) Desired overlap between samples in %
    :param samples_count: (dict) Current number of samples created (will be appended and return)
    :param num_classes: (dict) Number of classes in reference data (will be appended and return)
    :param samples_file: (hdf5 dataset) hdfs file where samples will be written
    :param val_percent: (int) percentage of validation samples
    :param val_sample_file: (hdf5 dataset) hdfs file where samples will be written (val)
    :param dataset: (str) Type of dataset where the samples will be written. Can be 'trn' or 'val' or 'tst'
    :param pixel_classes: (dict) samples pixel statistics
    :param image_metadata: (dict) metadata associated to source raster
    :param dontcare: Value in gpkg features that will ignored during training
    :param min_annot_perc: optional, minimum annotated percent required for sample to be created
    :param class_prop: optional, minimal proportion of pixels for each class required for sample to be created
    :return: updated samples count and number of classes.
    """

    # read input and reference images as array
    h, w, num_bands = in_img_array.shape
    if dataset == 'trn':
        idx_samples = samples_count['trn']
        append_to_dataset(val_sample_file["metadata"], repr(image_metadata))
    elif dataset == 'tst':
        idx_samples = samples_count['tst']
    else:
        raise ValueError(
            f"Dataset value must be trn or tst. Provided value is {dataset}")

    idx_samples_v = samples_count['val']

    # Adds raster metadata to the dataset. All samples created by tiling below will point to that metadata by index
    metadata_idx = append_to_dataset(samples_file["metadata"],
                                     repr(image_metadata))

    if overlap > 25:
        logging.warning(
            "\nhigh overlap >25%, note that automatic train/val split creates very similar samples in both sets"
        )
    dist_samples = round(sample_size * (1 - (overlap / 100)))
    added_samples = 0
    excl_samples = 0

    # with tqdm(range(0, h, dist_samples), position=1, leave=True,
    #           desc=f'Writing samples. Dataset currently contains {idx_samples} '
    #                f'samples') as _tqdm:
    with tqdm(range(0, h, dist_samples), position=1, leave=True) as _tqdm:
        for row in _tqdm:
            for column in range(0, w, dist_samples):
                data = (in_img_array[row:row + sample_size,
                                     column:column + sample_size, :])
                target = np.squeeze(
                    label_array[row:row + sample_size,
                                column:column + sample_size, :],
                    axis=2)
                data_row = data.shape[0]
                data_col = data.shape[1]
                if data_row < sample_size or data_col < sample_size:
                    padding = pad_diff(
                        data_row,
                        data_col,
                        sample_size,
                        sample_size  # array, actual height, actual width, desired size
                    )
                    # don't fill with 0 if possible. Creates false min value when scaling.
                    data = pad(data, padding, fill=np.nan)

                target_row = target.shape[0]
                target_col = target.shape[1]
                if target_row < sample_size or target_col < sample_size:
                    padding = pad_diff(
                        target_row, target_col, sample_size, sample_size
                    )  # array, actual height, actual width, desired size
                    target = pad(target, padding, fill=dontcare)
                backgr_ct = np.sum(target == 0)
                backgr_ct += np.sum(target == dontcare)
                target_background_percent = round(
                    backgr_ct / target.size * 100, 1)

                sample_metadata = {'sample_indices': (row, column)}

                # Stratification bias
                if (stratd is not None) and (dataset == 'trn'):
                    tile_size = target.size
                    u, count = np.unique(target, return_counts=True)
                    tile_counts = {x: y for x, y in zip(u, count)}
                    tile_props = {x: y / tile_size for x, y in zip(u, count)}
                    for key in tile_props.keys():
                        if key not in stratd['trn']['total_counts']:
                            stratd['trn']['total_counts'][key] = 0
                        if key not in stratd['val']['total_counts']:
                            stratd['val']['total_counts'][key] = 0
                    if stratd['trn']['total_pixels'] == 0:
                        stratd['trn']['total_props'] = {
                            key: 0.0
                            for key in stratd['trn']['total_counts'].keys()
                        }
                    else:
                        stratd['trn']['total_props'] = {
                            key: val / stratd['trn']['total_pixels']
                            for key, val in stratd['trn']
                            ['total_counts'].items()
                        }
                    if stratd['val']['total_pixels'] == 0:
                        stratd['val']['total_props'] = {
                            key: 0.0
                            for key in stratd['val']['total_counts'].keys()
                        }
                    else:
                        stratd['val']['total_props'] = {
                            key: val / stratd['val']['total_pixels']
                            for key, val in stratd['val']
                            ['total_counts'].items()
                        }
                    distances_trn = {
                        key: np.abs(val - stratd['trn']['total_props'][key])
                        for key, val in tile_props.items()
                    }
                    distances_val = {
                        key: np.abs(val - stratd['val']['total_props'][key])
                        for key, val in tile_props.items()
                    }
                    dist_trn = np.mean(
                        np.array(list(distances_trn.values()))**2)
                    dist_val = np.mean(
                        np.array(list(distances_val.values()))**2)
                    dist = dist_val - dist_trn
                    stratification_bias = stratd['strat_factor'] * np.sign(
                        dist)
                else:
                    stratification_bias = 0.0

                val = False
                if minimum_annotated_percent(target_background_percent, min_annot_perc) and \
                        class_proportion(target, sample_size, class_prop):
                    val = add_to_datasets(
                        dataset=dataset,
                        samples_file=samples_file,
                        val_percent=val_percent,
                        val_sample_file=val_sample_file,
                        data=data,
                        target=target,
                        sample_metadata=sample_metadata,
                        metadata_idx=metadata_idx,
                        dict_classes=pixel_classes,
                        stratification_bias=stratification_bias,
                        stratification_dict=stratd)
                    if val:
                        idx_samples_v += 1
                    else:
                        idx_samples += 1
                    added_samples += 1

                    # Stratification update
                    if (stratd is not None) and (dataset == 'trn'):
                        for key, val in tile_counts.items():
                            stratd[stratd['latest_assignment']][
                                'total_counts'][key] += val
                        stratd[stratd['latest_assignment']][
                            'total_pixels'] += tile_size

                else:
                    excl_samples += 1

                target_class_num = np.max(target)
                if num_classes < target_class_num:
                    num_classes = target_class_num

                final_dataset = 'val' if val else dataset
                logging.debug(
                    f'Dset={final_dataset}, '
                    f'Added samps={added_samples}/{len(_tqdm) * len(range(0, w, dist_samples))}, '
                    f'Excld samps={excl_samples}/{len(_tqdm) * len(range(0, w, dist_samples))}, '
                    f'Target annot perc={100 - target_background_percent:.1f}')

    if added_samples == 0:
        logging.warning(
            f"No sample added for current raster. Problems may occur with use of metadata"
        )
    if dataset == 'tst':
        samples_count['tst'] = idx_samples
    else:
        samples_count['trn'] = idx_samples
        samples_count['val'] = idx_samples_v
    # return the appended samples count and number of classes.
    return samples_count, num_classes