def gen_label_samples(np_label, dist_samples, tile_size): h, w = np_label.shape for row in range(0, h, dist_samples): for column in range(0, w, dist_samples): target = np_label[row:row + tile_size, column:column + tile_size] target_row = target.shape[0] target_col = target.shape[1] if target_row < tile_size or target_col < tile_size: padding = pad_diff( target_row, target_col, tile_size, tile_size ) # array, actual height, actual width, desired size target = pad(target, padding, fill=-1) indices = (row, column) yield target, indices
def gen_img_samples(rst_pth, tile_size, dist_samples, *band_order): with rasterio.open(rst_pth) as src: for row in range(0, src.height, dist_samples): for column in range(0, src.width, dist_samples): window = Window.from_slices(slice(row, row + tile_size), slice(column, column + tile_size)) if band_order: window_array = reshape_as_image( src.read(band_order[0], window=window)) else: window_array = reshape_as_image(src.read(window=window)) if window_array.shape[0] < tile_size or window_array.shape[ 1] < tile_size: padding = pad_diff(window_array.shape[0], window_array.shape[1], tile_size, tile_size) window_array = pad(window_array, padding, fill=np.nan) yield window_array
def samples_preparation(in_img_array, label_array, sample_size, overlap, samples_count, num_classes, samples_file, val_percent, val_sample_file, dataset, pixel_classes, image_metadata=None, dontcare=0, min_annot_perc=None, class_prop=None): """ Extract and write samples from input image and reference image :param in_img_array: numpy array of the input image :param label_array: numpy array of the annotation image :param sample_size: (int) Size (in pixel) of the samples to create # TODO: could there be a different sample size for tst dataset? shows results closer to inference :param overlap: (int) Desired overlap between samples in % :param samples_count: (dict) Current number of samples created (will be appended and return) :param num_classes: (dict) Number of classes in reference data (will be appended and return) :param samples_file: (hdf5 dataset) hdfs file where samples will be written :param val_percent: (int) percentage of validation samples :param val_sample_file: (hdf5 dataset) hdfs file where samples will be written (val) :param dataset: (str) Type of dataset where the samples will be written. Can be 'trn' or 'val' or 'tst' :param pixel_classes: (dict) samples pixel statistics :param image_metadata: (dict) metadata associated to source raster :param dontcare: Value in gpkg features that will ignored during training :param min_annot_perc: optional, minimum annotated percent required for sample to be created :param class_prop: optional, minimal proportion of pixels for each class required for sample to be created :return: updated samples count and number of classes. """ # read input and reference images as array h, w, num_bands = in_img_array.shape if dataset == 'trn': idx_samples = samples_count['trn'] append_to_dataset(val_sample_file["metadata"], repr(image_metadata)) elif dataset == 'tst': idx_samples = samples_count['tst'] else: raise ValueError( f"Dataset value must be trn or tst. Provided value is {dataset}") idx_samples_v = samples_count['val'] # Adds raster metadata to the dataset. All samples created by tiling below will point to that metadata by index metadata_idx = append_to_dataset(samples_file["metadata"], repr(image_metadata)) if overlap > 25: warnings.warn( "high overlap >25%, note that automatic train/val split creates very similar samples in both sets" ) dist_samples = round(sample_size * (1 - (overlap / 100))) added_samples = 0 excl_samples = 0 with tqdm( range(0, h, dist_samples), position=1, leave=True, desc=f'Writing samples. Dataset currently contains {idx_samples} ' f'samples') as _tqdm: for row in _tqdm: for column in range(0, w, dist_samples): data = (in_img_array[row:row + sample_size, column:column + sample_size, :]) target = np.squeeze( label_array[row:row + sample_size, column:column + sample_size, :], axis=2) data_row = data.shape[0] data_col = data.shape[1] if data_row < sample_size or data_col < sample_size: padding = pad_diff( data_row, data_col, sample_size ) # array, actual height, actual width, desired size data = pad( data, padding, fill=np.nan ) # don't fill with 0 if possible. Creates false min value when scaling. target_row = target.shape[0] target_col = target.shape[1] if target_row < sample_size or target_col < sample_size: padding = pad_diff( target_row, target_col, sample_size ) # array, actual height, actual width, desired size target = pad(target, padding, fill=dontcare) u, count = np.unique(target, return_counts=True) target_background_percent = round( count[0] / np.sum(count) * 100 if 0 in u else 0, 1) sample_metadata = {'sample_indices': (row, column)} val = False if minimum_annotated_percent(target_background_percent, min_annot_perc) and \ class_proportion(target, sample_size, class_prop): val = add_to_datasets(dataset=dataset, samples_file=samples_file, val_percent=val_percent, val_sample_file=val_sample_file, data=data, target=target, sample_metadata=sample_metadata, metadata_idx=metadata_idx, dict_classes=pixel_classes) if val: idx_samples_v += 1 else: idx_samples += 1 added_samples += 1 else: excl_samples += 1 target_class_num = np.max(u) if num_classes < target_class_num: num_classes = target_class_num final_dataset = 'val' if val else dataset _tqdm.set_postfix( Dataset=final_dataset, Excld_samples=excl_samples, Added_samples= f'{added_samples}/{len(_tqdm) * len(range(0, w, dist_samples))}', Target_annot_perc=100 - target_background_percent) assert added_samples > 0, "No sample added for current raster. Problems may occur with use of metadata" if dataset == 'tst': samples_count['tst'] = idx_samples else: samples_count['trn'] = idx_samples samples_count['val'] = idx_samples_v # return the appended samples count and number of classes. return samples_count, num_classes
def segmentation(raster, clip_gpkg, model, sample_size, num_bands, device): # switch to evaluate mode model.eval() img_array, input_image, dataset_nodata = image_reader_as_array( input_image=raster, clip_gpkg=clip_gpkg) metadata = add_metadata_from_raster_to_sample(img_array, input_image, meta_map=None, raster_info=None) h, w, bands = img_array.shape assert num_bands <= bands, f"Num of specified bands is not compatible with image shape {img_array.shape}" if num_bands < bands: img_array = img_array[:, :, :num_bands] h_ = sample_size * math.ceil(h / sample_size) w_ = sample_size * math.ceil(w / sample_size) pred_img = np.empty((h_, w_), dtype=np.uint8) for row in tqdm(range(0, h, sample_size), position=1, leave=False, desc='Inferring rows'): with tqdm(range(0, w, sample_size), position=2, leave=False, desc='Inferring columns') as _tqdm: for column in _tqdm: sample = {'sat_img': None, 'metadata': None} sample['metadata'] = metadata totensor_transform = augmentation.compose_transforms( params, dataset="tst", type='totensor') sub_images = img_array[row:row + sample_size, column:column + sample_size, :] sub_images_row = sub_images.shape[0] sub_images_col = sub_images.shape[1] if sub_images_row < sample_size or sub_images_col < sample_size: padding = pad_diff(actual_height=sub_images_row, actual_width=sub_images_col, desired_shape=sample_size) sub_images = pad( sub_images, padding, fill=0 ) # FIXME combine pad and pad_diff into one function sample['sat_img'] = sub_images sample = totensor_transform(sample) inputs = sample['sat_img'].unsqueeze_(0) inputs = inputs.to(device) if inputs.shape[1] == 4 and any( "module.modelNIR" in s for s in model.state_dict().keys()): ############################ # Test Implementation of the NIR ############################ # Init NIR TODO: make a proper way to read the NIR channel # and put an option to be able to give the idex of the NIR channel # Extract the NIR channel -> [batch size, H, W] since it's only one channel inputs_NIR = inputs[:, -1, ...] # add a channel to get the good size -> [:, 1, :, :] inputs_NIR.unsqueeze_(1) # take out the NIR channel and take only the RGB for the inputs inputs = inputs[:, :-1, ...] # Suggestion of implementation #inputs_NIR = data['NIR'].to(device) inputs = [inputs, inputs_NIR] #outputs = model(inputs, inputs_NIR) ############################ # End of the test implementation module ############################ outputs = model(inputs) # torchvision models give output in 'out' key. May cause problems in future versions of torchvision. if isinstance(outputs, OrderedDict) and 'out' in outputs.keys(): outputs = outputs['out'] outputs = F.softmax( outputs, dim=1).argmax(dim=1).squeeze(dim=0).cpu().numpy() pred_img[row:row + sample_size, column:column + sample_size] = outputs return pred_img[:h, :w]
def samples_preparation(in_img_array, label_array, sample_size, overlap, samples_count, num_classes, samples_file, val_percent, val_sample_file, dataset, pixel_classes, dontcare, image_metadata=None, min_annot_perc=None, class_prop=None, stratd=None): """ Extract and write samples from input image and reference image :param in_img_array: numpy array of the input image :param label_array: numpy array of the annotation image :param sample_size: (int) Size (in pixel) of the samples to create # TODO: could there be a different sample size for tst dataset? shows results closer to inference :param overlap: (int) Desired overlap between samples in % :param samples_count: (dict) Current number of samples created (will be appended and return) :param num_classes: (dict) Number of classes in reference data (will be appended and return) :param samples_file: (hdf5 dataset) hdfs file where samples will be written :param val_percent: (int) percentage of validation samples :param val_sample_file: (hdf5 dataset) hdfs file where samples will be written (val) :param dataset: (str) Type of dataset where the samples will be written. Can be 'trn' or 'val' or 'tst' :param pixel_classes: (dict) samples pixel statistics :param image_metadata: (dict) metadata associated to source raster :param dontcare: Value in gpkg features that will ignored during training :param min_annot_perc: optional, minimum annotated percent required for sample to be created :param class_prop: optional, minimal proportion of pixels for each class required for sample to be created :return: updated samples count and number of classes. """ # read input and reference images as array h, w, num_bands = in_img_array.shape if dataset == 'trn': idx_samples = samples_count['trn'] append_to_dataset(val_sample_file["metadata"], repr(image_metadata)) elif dataset == 'tst': idx_samples = samples_count['tst'] else: raise ValueError( f"Dataset value must be trn or tst. Provided value is {dataset}") idx_samples_v = samples_count['val'] # Adds raster metadata to the dataset. All samples created by tiling below will point to that metadata by index metadata_idx = append_to_dataset(samples_file["metadata"], repr(image_metadata)) if overlap > 25: logging.warning( "\nhigh overlap >25%, note that automatic train/val split creates very similar samples in both sets" ) dist_samples = round(sample_size * (1 - (overlap / 100))) added_samples = 0 excl_samples = 0 # with tqdm(range(0, h, dist_samples), position=1, leave=True, # desc=f'Writing samples. Dataset currently contains {idx_samples} ' # f'samples') as _tqdm: with tqdm(range(0, h, dist_samples), position=1, leave=True) as _tqdm: for row in _tqdm: for column in range(0, w, dist_samples): data = (in_img_array[row:row + sample_size, column:column + sample_size, :]) target = np.squeeze( label_array[row:row + sample_size, column:column + sample_size, :], axis=2) data_row = data.shape[0] data_col = data.shape[1] if data_row < sample_size or data_col < sample_size: padding = pad_diff( data_row, data_col, sample_size, sample_size # array, actual height, actual width, desired size ) # don't fill with 0 if possible. Creates false min value when scaling. data = pad(data, padding, fill=np.nan) target_row = target.shape[0] target_col = target.shape[1] if target_row < sample_size or target_col < sample_size: padding = pad_diff( target_row, target_col, sample_size, sample_size ) # array, actual height, actual width, desired size target = pad(target, padding, fill=dontcare) backgr_ct = np.sum(target == 0) backgr_ct += np.sum(target == dontcare) target_background_percent = round( backgr_ct / target.size * 100, 1) sample_metadata = {'sample_indices': (row, column)} # Stratification bias if (stratd is not None) and (dataset == 'trn'): tile_size = target.size u, count = np.unique(target, return_counts=True) tile_counts = {x: y for x, y in zip(u, count)} tile_props = {x: y / tile_size for x, y in zip(u, count)} for key in tile_props.keys(): if key not in stratd['trn']['total_counts']: stratd['trn']['total_counts'][key] = 0 if key not in stratd['val']['total_counts']: stratd['val']['total_counts'][key] = 0 if stratd['trn']['total_pixels'] == 0: stratd['trn']['total_props'] = { key: 0.0 for key in stratd['trn']['total_counts'].keys() } else: stratd['trn']['total_props'] = { key: val / stratd['trn']['total_pixels'] for key, val in stratd['trn'] ['total_counts'].items() } if stratd['val']['total_pixels'] == 0: stratd['val']['total_props'] = { key: 0.0 for key in stratd['val']['total_counts'].keys() } else: stratd['val']['total_props'] = { key: val / stratd['val']['total_pixels'] for key, val in stratd['val'] ['total_counts'].items() } distances_trn = { key: np.abs(val - stratd['trn']['total_props'][key]) for key, val in tile_props.items() } distances_val = { key: np.abs(val - stratd['val']['total_props'][key]) for key, val in tile_props.items() } dist_trn = np.mean( np.array(list(distances_trn.values()))**2) dist_val = np.mean( np.array(list(distances_val.values()))**2) dist = dist_val - dist_trn stratification_bias = stratd['strat_factor'] * np.sign( dist) else: stratification_bias = 0.0 val = False if minimum_annotated_percent(target_background_percent, min_annot_perc) and \ class_proportion(target, sample_size, class_prop): val = add_to_datasets( dataset=dataset, samples_file=samples_file, val_percent=val_percent, val_sample_file=val_sample_file, data=data, target=target, sample_metadata=sample_metadata, metadata_idx=metadata_idx, dict_classes=pixel_classes, stratification_bias=stratification_bias, stratification_dict=stratd) if val: idx_samples_v += 1 else: idx_samples += 1 added_samples += 1 # Stratification update if (stratd is not None) and (dataset == 'trn'): for key, val in tile_counts.items(): stratd[stratd['latest_assignment']][ 'total_counts'][key] += val stratd[stratd['latest_assignment']][ 'total_pixels'] += tile_size else: excl_samples += 1 target_class_num = np.max(target) if num_classes < target_class_num: num_classes = target_class_num final_dataset = 'val' if val else dataset logging.debug( f'Dset={final_dataset}, ' f'Added samps={added_samples}/{len(_tqdm) * len(range(0, w, dist_samples))}, ' f'Excld samps={excl_samples}/{len(_tqdm) * len(range(0, w, dist_samples))}, ' f'Target annot perc={100 - target_background_percent:.1f}') if added_samples == 0: logging.warning( f"No sample added for current raster. Problems may occur with use of metadata" ) if dataset == 'tst': samples_count['tst'] = idx_samples else: samples_count['trn'] = idx_samples samples_count['val'] = idx_samples_v # return the appended samples count and number of classes. return samples_count, num_classes