def sem_seg_inference(bucket, model, image, overlay): """Inference on images using semantic segmentation Args: bucket: bucket in which data is stored if using AWS S3 model: model to use for inference image: full path of the image to infer on overlay: amount of overlay to apply returns a numpy array of the same size (h,w) as the input image, where each value is the predicted output. """ # Chunk size. Should not be modified often. We want the biggest chunk to be process at a time but, # a too large image chunk will bust the GPU memory when processing. chunk_size = 512 # switch to evaluate mode model.eval() if bucket: input_image = image_reader_as_array( f"Images/{os.path.basename(image)}") else: input_image = image_reader_as_array(image) if len(input_image.shape) == 3: h, w, nb = input_image.shape padded_array = np.pad(input_image, ((overlay, chunk_size), (overlay, chunk_size), (0, 0)), mode='constant') elif len(input_image.shape) == 2: h, w = input_image.shape padded_array = np.expand_dims(np.pad(input_image, ((overlay, chunk_size), (overlay, chunk_size)), mode='constant'), axis=0) else: h = 0 w = 0 padded_array = None output_np = np.empty( [h + overlay + chunk_size, w + overlay + chunk_size, 1], dtype=np.uint8) if padded_array.any(): with torch.no_grad(): for row in range(0, h, chunk_size - (2 * overlay)): for col in range(0, w, chunk_size - (2 * overlay)): chunk_input = padded_array[row:row + chunk_size, col:col + chunk_size, :] inputs = torch.from_numpy( np.float32(np.transpose(chunk_input, (2, 0, 1)))) inputs.unsqueeze_(0) if torch.cuda.is_available(): inputs = inputs.cuda() # forward outputs = model(inputs) a, pred = torch.max(outputs, dim=1) segmentation = torch.squeeze(pred) row_from = row + overlay row_to = row + chunk_size - overlay col_from = col + overlay col_to = col + chunk_size - overlay useful_sem_seg = segmentation[overlay:chunk_size - overlay, overlay:chunk_size - overlay] output_np[row_from:row_to, col_from:col_to, 0] = useful_sem_seg.cpu() # Resize the output array to the size of the input image and write it output_np = output_np[overlay:h + overlay, overlay:w + overlay] return output_np else: print( "Error classifying image : Image shape of {:1} is not recognized". format(len(input_image.shape)))
def main(params): """ Identify the class to which each image belongs. :param params: (dict) Parameters found in the yaml config file. """ since = time.time() csv_file = params['inference']['img_csv_file'] bucket = None bucket_name = params['global']['bucket_name'] model, state_dict_path, model_name = net(params, inference=True) if torch.cuda.is_available(): model = model.cuda() if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'img_csv_file.csv') list_img = read_csv('img_csv_file.csv', inference=True) else: list_img = read_csv(csv_file, inference=True) if params['global']['task'] == 'classification': classifier(params, list_img, model) elif params['global']['task'] == 'segmentation': if bucket: bucket.download_file(state_dict_path, "saved_model.pth.tar") model = load_from_checkpoint("saved_model.pth.tar", model) else: model = load_from_checkpoint(state_dict_path, model) chunk_size, nbr_pix_overlap = calc_overlap(params) num_classes = params['global']['num_classes'] for img in list_img: img_name = os.path.basename(img['tif']) if bucket: local_img = f"Images/{img_name}" bucket.download_file(img['tif'], local_img) inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif" else: local_img = img['tif'] inference_image = os.path.join( params['inference']['working_folder'], f"{img_name.split('.')[0]}_inference.tif") assert_band_number(local_img, params['global']['number_of_bands']) nd_array_tif = image_reader_as_array(local_img) sem_seg_results = sem_seg_inference(model, nd_array_tif, nbr_pix_overlap, chunk_size, num_classes) create_new_raster_from_base(local_img, inference_image, sem_seg_results) print(f"Semantic segmentation of image {img_name} completed") if bucket: bucket.upload_file( inference_image, os.path.join(params['inference']['working_folder'], f"{img_name.split('.')[0]}_inference.tif")) else: raise ValueError( f"The task should be either classification or segmentation. The provided value is {params['global']['task']}" ) time_elapsed = time.time() - since print('Inference completed in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60))
def main(params): """ Training and validation datasets preparation. :param params: (dict) Parameters found in the yaml config file. """ gpkg_file = [] bucket_name = params['global']['bucket_name'] data_path = params['global']['data_path'] metadata_file = params['global']['metadata_file'] csv_file = params['sample']['prep_csv_file'] if metadata_file: image_metadata = read_parameters(metadata_file) else: image_metadata = None final_samples_folder = None if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'samples_prep.csv') list_data_prep = read_csv('samples_prep.csv') if data_path: final_samples_folder = os.path.join(data_path, "samples") else: final_samples_folder = "samples" samples_folder = "samples" out_label_folder = "label" else: list_data_prep = read_csv(csv_file) samples_folder = os.path.join(data_path, "samples") out_label_folder = os.path.join(data_path, "label") create_or_empty_folder(samples_folder) create_or_empty_folder(out_label_folder) number_samples = {'trn': 0, 'val': 0, 'tst': 0} number_classes = 0 trn_hdf5, val_hdf5, tst_hdf5 = create_files_and_datasets( params, samples_folder) with tqdm(list_data_prep) as _tqdm: for info in _tqdm: if bucket_name: bucket.download_file(info['tif'], "Images/" + info['tif'].split('/')[-1]) info['tif'] = "Images/" + info['tif'].split('/')[-1] if info['gpkg'] not in gpkg_file: gpkg_file.append(info['gpkg']) bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1]) info['gpkg'] = info['gpkg'].split('/')[-1] assert_band_number(info['tif'], params['global']['number_of_bands']) _tqdm.set_postfix( OrderedDict(file=f'{info["tif"]}', sample_size=params['global']['samples_size'])) # Read the input raster image np_input_image = image_reader_as_array(info['tif']) # Validate the number of class in the vector file validate_num_classes(info['gpkg'], params['global']['num_classes'], info['attribute_name']) # Burn vector file in a raster file np_label_raster = vector_to_raster(info['gpkg'], info['tif'], info['attribute_name']) # Guidelines for pre-processing: http://cs231n.github.io/neural-networks-2/#datapre # Scale arrays to values [0,1]. Default: will scale. Useful if dealing with 8 bit *and* 16 bit images. scale = params['global']['scale_data'] if params['global'][ 'scale_data'] else True if scale: sc_min, sc_max = params['global']['scale_data'] np_input_image = minmax_scale( np_input_image, orig_range=(np.min(np_input_image), np.max(np_input_image)), scale_range=(sc_min, sc_max)) # Mask the zeros from input image into label raster. if params['sample']['mask_reference']: np_label_raster = mask_image(np_input_image, np_label_raster) if info['dataset'] == 'trn': out_file = trn_hdf5 elif info['dataset'] == 'val': out_file = val_hdf5 elif info['dataset'] == 'tst': out_file = tst_hdf5 else: raise ValueError( f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}" ) np_label_raster = np.reshape( np_label_raster, (np_label_raster.shape[0], np_label_raster.shape[1], 1)) number_samples, number_classes = samples_preparation( np_input_image, np_label_raster, params['global']['samples_size'], params['sample']['samples_dist'], number_samples, number_classes, out_file, info['dataset'], params['sample']['min_annotated_percent'], image_metadata) _tqdm.set_postfix(OrderedDict(number_samples=number_samples)) out_file.flush() trn_hdf5.close() val_hdf5.close() tst_hdf5.close() print("Number of samples created: ", number_samples) if bucket_name and final_samples_folder: print('Transfering Samples to the bucket') bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5') bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5') bucket.upload_file(samples_folder + "/tst_samples.hdf5", final_samples_folder + '/tst_samples.hdf5') print("End of process")
def samples_preparation(sat_img, ref_img, sample_size, dist_samples, samples_count, num_classes, samples_file, dataset, background_switch): """Extract and write samples from input image and reference image Args: sat_img: Path and name to the input image ref_img: path and name to the reference image sample_size: Size (in pixel) of the samples to create dist_samples: Distance (in pixel) between samples in both images samples_count: Current number of samples created (will be appended and return) num_classes: Number of classes in reference data (will be appended and return) samples_file: hdfs file where samples will be written dataset: Type of dataset where the samples will be written. Can be either of 'trn' or 'val' background_switch: Indicate if samples containing only background pixels will be written or discarded """ # read input and reference images as array in_img_array = image_reader_as_array(sat_img) label_array = image_reader_as_array(ref_img) h, w, num_bands = in_img_array.shape if dataset == 'trn': idx_samples = samples_count['trn'] elif dataset == 'val': idx_samples = samples_count['val'] # half tile padding half_tile = int(sample_size / 2) pad_in_img_array = np.pad(in_img_array, ((half_tile, half_tile), (half_tile, half_tile), (0, 0)), mode='constant') pad_label_array = np.pad(label_array, ((half_tile, half_tile), (half_tile, half_tile), (0, 0)), mode='constant') for row in range(0, h, dist_samples): for column in range(0, w, dist_samples): data = (pad_in_img_array[row:row + sample_size, column:column + sample_size, :]) target = np.squeeze( pad_label_array[row:row + sample_size, column:column + sample_size, :], axis=2) target_class_num = max(target.ravel()) if (background_switch and target_class_num != 0) or (not background_switch): resize_datasets(samples_file) samples_file["sat_img"][idx_samples, ...] = data samples_file["map_img"][idx_samples, ...] = target idx_samples += 1 if num_classes < target_class_num: num_classes = target_class_num if dataset == 'trn': samples_count['trn'] = idx_samples elif dataset == 'val': samples_count['val'] = idx_samples # return the appended samples count and number of classes. return samples_count, num_classes
def main(bucket_name, data_path, samples_size, num_classes, number_of_bands, csv_file, samples_dist, remove_background, mask_input_image, mask_reference): gpkg_file = [] if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'samples_prep.csv') list_data_prep = read_csv('samples_prep.csv') if data_path: final_samples_folder = os.path.join(data_path, "samples") final_out_label_folder = os.path.join(data_path, "label") else: final_samples_folder = "samples" final_out_label_folder = "label" samples_folder = "samples" out_label_folder = "label" else: list_data_prep = read_csv(csv_file) samples_folder = os.path.join(data_path, "samples") out_label_folder = os.path.join(data_path, "label") create_or_empty_folder(samples_folder) create_or_empty_folder(out_label_folder) number_samples = {'trn': 0, 'val': 0} number_classes = 0 trn_hdf5 = h5py.File(os.path.join(samples_folder, "trn_samples.hdf5"), "w") val_hdf5 = h5py.File(os.path.join(samples_folder, "val_samples.hdf5"), "w") trn_hdf5.create_dataset("sat_img", (0, samples_size, samples_size, number_of_bands), np.float32, maxshape=(None, samples_size, samples_size, number_of_bands)) trn_hdf5.create_dataset("map_img", (0, samples_size, samples_size), np.uint8, maxshape=(None, samples_size, samples_size)) val_hdf5.create_dataset("sat_img", (0, samples_size, samples_size, number_of_bands), np.float32, maxshape=(None, samples_size, samples_size, number_of_bands)) val_hdf5.create_dataset("map_img", (0, samples_size, samples_size), np.uint8, maxshape=(None, samples_size, samples_size)) for info in list_data_prep: img_name = os.path.basename(info['tif']).split('.')[0] tmp_label_name = os.path.join(out_label_folder, img_name + "_label_tmp.tif") label_name = os.path.join(out_label_folder, img_name + "_label.tif") if bucket_name: bucket.download_file(info['tif'], "Images/" + info['tif'].split('/')[-1]) info['tif'] = "Images/" + info['tif'].split('/')[-1] if info['gpkg'] not in gpkg_file: gpkg_file.append(info['gpkg']) bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1]) info['gpkg'] = info['gpkg'].split('/')[-1] assert_band_number(info['tif'], number_of_bands) value_field = info['attribute_name'] validate_num_classes(info['gpkg'], num_classes, value_field) # Mask zeros from input image into label raster. if mask_reference: tmp_label_raster = create_new_raster_from_base( info['tif'], tmp_label_name, 1) vector_to_raster(info['gpkg'], info['attribute_name'], tmp_label_raster) tmp_label_raster = None masked_array = mask_image(image_reader_as_array(info['tif']), image_reader_as_array(tmp_label_name)) create_new_raster_from_base(info['tif'], label_name, 1, masked_array) os.remove(tmp_label_name) else: label_raster = create_new_raster_from_base(info['tif'], label_name, 1) vector_to_raster(info['gpkg'], info['attribute_name'], label_raster) label_raster = None # Mask zeros from label raster into input image. if mask_input_image: masked_img = mask_image(image_reader_as_array(label_name), image_reader_as_array(info['tif'])) create_new_raster_from_base(label_name, info['tif'], number_of_bands, masked_img) if info['dataset'] == 'trn': out_file = trn_hdf5 elif info['dataset'] == 'val': out_file = val_hdf5 number_samples, number_classes = samples_preparation( info['tif'], label_name, samples_size, samples_dist, number_samples, number_classes, out_file, info['dataset'], remove_background) print(info['tif']) print(number_samples) out_file.flush() trn_hdf5.close() val_hdf5.close() print("Number of samples created: ", number_samples) if bucket_name: print('Transfering Samples to the bucket') bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5') bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5') print("End of process")
def main(params): """ Training and validation datasets preparation. :param params: (dict) Parameters found in the yaml config file. """ gpkg_file = [] bucket_name = params['global']['bucket_name'] data_path = params['global']['data_path'] csv_file = params['sample']['prep_csv_file'] if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'samples_prep.csv') list_data_prep = read_csv('samples_prep.csv') if data_path: final_samples_folder = os.path.join(data_path, "samples") else: final_samples_folder = "samples" samples_folder = "samples" out_label_folder = "label" else: list_data_prep = read_csv(csv_file) samples_folder = os.path.join(data_path, "samples") out_label_folder = os.path.join(data_path, "label") create_or_empty_folder(samples_folder) create_or_empty_folder(out_label_folder) number_samples = {'trn': 0, 'val': 0, 'tst': 0} number_classes = 0 trn_hdf5, val_hdf5, tst_hdf5 = create_files_and_datasets( params, samples_folder) for info in list_data_prep: if bucket_name: bucket.download_file(info['tif'], "Images/" + info['tif'].split('/')[-1]) info['tif'] = "Images/" + info['tif'].split('/')[-1] if info['gpkg'] not in gpkg_file: gpkg_file.append(info['gpkg']) bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1]) info['gpkg'] = info['gpkg'].split('/')[-1] assert_band_number(info['tif'], params['global']['number_of_bands']) # Read the input raster image np_input_image = image_reader_as_array(info['tif']) # Validate the number of class in the vector file validate_num_classes(info['gpkg'], params['global']['num_classes'], info['attribute_name']) # Burn vector file in a raster file np_label_raster = vector_to_raster(info['gpkg'], info['tif'], info['attribute_name']) # Mask the zeros from input image into label raster. if params['sample']['mask_reference']: np_label_raster = mask_image(np_input_image, np_label_raster) if info['dataset'] == 'trn': out_file = trn_hdf5 elif info['dataset'] == 'val': out_file = val_hdf5 elif info['dataset'] == 'tst': out_file = tst_hdf5 else: raise ValueError( f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}" ) np_label_raster = np.reshape( np_label_raster, (np_label_raster.shape[0], np_label_raster.shape[1], 1)) number_samples, number_classes = samples_preparation( np_input_image, np_label_raster, params['global']['samples_size'], params['sample']['samples_dist'], number_samples, number_classes, out_file, info['dataset'], params['sample']['min_annotated_percent']) print(info['tif']) print(number_samples) out_file.flush() trn_hdf5.close() val_hdf5.close() tst_hdf5.close() print("Number of samples created: ", number_samples) if bucket_name: print('Transfering Samples to the bucket') bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5') bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5') bucket.upload_file(samples_folder + "/tst_samples.hdf5", final_samples_folder + '/tst_samples.hdf5') print("End of process")
def main(bucket_name, data_path, samples_size, num_classes, number_of_bands, csv_file, samples_dist, remove_background, mask_input_image, mask_reference): gpkg_file = [] if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'samples_prep.csv') list_data_prep = read_csv('samples_prep.csv') if data_path: final_samples_folder = os.path.join(data_path, "samples") else: final_samples_folder = "samples" samples_folder = "samples" out_label_folder = "label" else: list_data_prep = read_csv(csv_file) samples_folder = os.path.join(data_path, "samples") out_label_folder = os.path.join(data_path, "label") create_or_empty_folder(samples_folder) create_or_empty_folder(out_label_folder) number_samples = {'trn': 0, 'val': 0} number_classes = 0 trn_hdf5 = h5py.File(os.path.join(samples_folder, "trn_samples.hdf5"), "w") val_hdf5 = h5py.File(os.path.join(samples_folder, "val_samples.hdf5"), "w") trn_hdf5.create_dataset("sat_img", (0, samples_size, samples_size, number_of_bands), np.float32, maxshape=(None, samples_size, samples_size, number_of_bands)) trn_hdf5.create_dataset("map_img", (0, samples_size, samples_size), np.uint8, maxshape=(None, samples_size, samples_size)) val_hdf5.create_dataset("sat_img", (0, samples_size, samples_size, number_of_bands), np.float32, maxshape=(None, samples_size, samples_size, number_of_bands)) val_hdf5.create_dataset("map_img", (0, samples_size, samples_size), np.uint8, maxshape=(None, samples_size, samples_size)) for info in list_data_prep: if bucket_name: bucket.download_file(info['tif'], "Images/" + info['tif'].split('/')[-1]) info['tif'] = "Images/" + info['tif'].split('/')[-1] if info['gpkg'] not in gpkg_file: gpkg_file.append(info['gpkg']) bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1]) info['gpkg'] = info['gpkg'].split('/')[-1] assert_band_number(info['tif'], number_of_bands) # Read the input raster image np_input_image = image_reader_as_array(info['tif']) # Validate the number of class in the vector file validate_num_classes(info['gpkg'], num_classes, info['attribute_name']) # Burn vector file in a raster file np_label_raster = vector_to_raster(info['gpkg'], info['tif'], info['attribute_name']) # Mask the zeros from input image into label raster. if mask_reference: np_label_raster = mask_image(np_input_image, np_label_raster) # Mask zeros from label raster into input image otherwise use original image if mask_input_image: np_input_image = mask_image(np_label_raster, np_input_image) if info['dataset'] == 'trn': out_file = trn_hdf5 elif info['dataset'] == 'val': out_file = val_hdf5 np_label_raster = np.reshape( np_label_raster, (np_label_raster.shape[0], np_label_raster.shape[1], 1)) number_samples, number_classes = samples_preparation( np_input_image, np_label_raster, samples_size, samples_dist, number_samples, number_classes, out_file, info['dataset'], remove_background) print(info['tif']) print(number_samples) out_file.flush() trn_hdf5.close() val_hdf5.close() print("Number of samples created: ", number_samples) if bucket_name: print('Transfering Samples to the bucket') bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5') bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5') print("End of process")