def main(params): """ Identify the class to which each image belongs. :param params: (dict) Parameters found in the yaml config file. """ since = time.time() csv_file = params['inference']['img_csv_file'] bucket = None bucket_name = params['global']['bucket_name'] model, state_dict_path, model_name = net(params, inference=True) if torch.cuda.is_available(): model = model.cuda() if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'img_csv_file.csv') list_img = read_csv('img_csv_file.csv', inference=True) else: list_img = read_csv(csv_file, inference=True) if params['global']['task'] == 'classification': classifier(params, list_img, model) elif params['global']['task'] == 'segmentation': if bucket: bucket.download_file(state_dict_path, "saved_model.pth.tar") model = load_from_checkpoint("saved_model.pth.tar", model) else: model = load_from_checkpoint(state_dict_path, model) chunk_size, nbr_pix_overlap = calc_overlap(params) num_classes = params['global']['num_classes'] for img in list_img: img_name = os.path.basename(img['tif']) if bucket: local_img = f"Images/{img_name}" bucket.download_file(img['tif'], local_img) inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif" else: local_img = img['tif'] inference_image = os.path.join( params['inference']['working_folder'], f"{img_name.split('.')[0]}_inference.tif") assert_band_number(local_img, params['global']['number_of_bands']) nd_array_tif = image_reader_as_array(local_img) sem_seg_results = sem_seg_inference(model, nd_array_tif, nbr_pix_overlap, chunk_size, num_classes) create_new_raster_from_base(local_img, inference_image, sem_seg_results) print(f"Semantic segmentation of image {img_name} completed") if bucket: bucket.upload_file( inference_image, os.path.join(params['inference']['working_folder'], f"{img_name.split('.')[0]}_inference.tif")) else: raise ValueError( f"The task should be either classification or segmentation. The provided value is {params['global']['task']}" ) time_elapsed = time.time() - since print('Inference completed in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60))
def main(bucket, work_folder, img_list, weights_file_name, model, number_of_bands, overlay, classify, num_classes): """Identify the class to which each image belongs. Args: bucket: bucket in which data is stored if using AWS S3 work_folder: full file path of the folder containing images img_list: list containing images to classify weights_file_name: full file path of the file containing weights model: loaded model with which inference should be done number_of_bands: number of bands in the input rasters overlay: amount of overlay to apply classify: True if doing a classification task, False if doing semantic segmentation """ if torch.cuda.is_available(): model = model.cuda() if bucket: bucket.download_file(weights_file_name, "saved_model.pth.tar") model = load_from_checkpoint("saved_model.pth.tar", model) if classify: classes_file = weights_file_name.split('/')[:-1] class_csv = '' for folder in classes_file: class_csv = os.path.join(class_csv, folder) bucket.download_file(os.path.join(class_csv, 'classes.csv'), 'classes.csv') with open('classes.csv', 'rt') as file: reader = csv.reader(file) classes = list(reader) else: model = load_from_checkpoint(weights_file_name, model) if classify: classes_file = weights_file_name.split('/')[:-1] class_path = '' for c in classes_file: class_path = class_path + c + '/' with open(class_path + 'classes.csv', 'rt') as f: reader = csv.reader(f) classes = list(reader) since = time.time() classified_results = np.empty((0, 2 + num_classes)) for img in img_list: img_name = os.path.basename(img['tif']) if bucket: local_img = f"Images/{img_name}" bucket.download_file(img['tif'], local_img) inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif" else: local_img = img['tif'] inference_image = os.path.join( work_folder, f"{img_name.split('.')[0]}_inference.tif") assert_band_number(local_img, number_of_bands) if classify: outputs, predicted = classifier(bucket, model, img['tif']) top5 = heapq.nlargest(5, outputs.cpu().numpy()[0]) top5_loc = [] for i in top5: top5_loc.append(np.where(outputs.cpu().numpy()[0] == i)[0][0]) print(f"Image {img_name} classified as {classes[0][predicted]}") print('Top 5 classes:') for i in range(0, 5): print(f"\t{classes[0][top5_loc[i]]} : {top5[i]}") classified_results = np.append(classified_results, [ np.append([img['tif'], classes[0][predicted]], outputs.cpu().numpy()[0]) ], axis=0) print() else: sem_seg_results = sem_seg_inference(bucket, model, img['tif'], overlay) create_new_raster_from_base(local_img, inference_image, sem_seg_results) print(f"Semantic segmentation of image {img_name} completed") if bucket: if not classify: bucket.upload_file( inference_image, os.path.join(work_folder, f"{img_name.split('.')[0]}_inference.tif")) if classify: csv_results = 'classification_results.csv' if bucket: np.savetxt(csv_results, classified_results, fmt='%s', delimiter=',') bucket.upload_file(csv_results, os.path.join(work_folder, csv_results)) else: np.savetxt(os.path.join(work_folder, csv_results), classified_results, fmt='%s', delimiter=',') time_elapsed = time.time() - since print('Inference completed in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60))
def main(params): """ Training and validation datasets preparation. :param params: (dict) Parameters found in the yaml config file. """ gpkg_file = [] bucket_name = params['global']['bucket_name'] data_path = params['global']['data_path'] metadata_file = params['global']['metadata_file'] csv_file = params['sample']['prep_csv_file'] if metadata_file: image_metadata = read_parameters(metadata_file) else: image_metadata = None final_samples_folder = None if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'samples_prep.csv') list_data_prep = read_csv('samples_prep.csv') if data_path: final_samples_folder = os.path.join(data_path, "samples") else: final_samples_folder = "samples" samples_folder = "samples" out_label_folder = "label" else: list_data_prep = read_csv(csv_file) samples_folder = os.path.join(data_path, "samples") out_label_folder = os.path.join(data_path, "label") create_or_empty_folder(samples_folder) create_or_empty_folder(out_label_folder) number_samples = {'trn': 0, 'val': 0, 'tst': 0} number_classes = 0 trn_hdf5, val_hdf5, tst_hdf5 = create_files_and_datasets( params, samples_folder) with tqdm(list_data_prep) as _tqdm: for info in _tqdm: if bucket_name: bucket.download_file(info['tif'], "Images/" + info['tif'].split('/')[-1]) info['tif'] = "Images/" + info['tif'].split('/')[-1] if info['gpkg'] not in gpkg_file: gpkg_file.append(info['gpkg']) bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1]) info['gpkg'] = info['gpkg'].split('/')[-1] assert_band_number(info['tif'], params['global']['number_of_bands']) _tqdm.set_postfix( OrderedDict(file=f'{info["tif"]}', sample_size=params['global']['samples_size'])) # Read the input raster image np_input_image = image_reader_as_array(info['tif']) # Validate the number of class in the vector file validate_num_classes(info['gpkg'], params['global']['num_classes'], info['attribute_name']) # Burn vector file in a raster file np_label_raster = vector_to_raster(info['gpkg'], info['tif'], info['attribute_name']) # Guidelines for pre-processing: http://cs231n.github.io/neural-networks-2/#datapre # Scale arrays to values [0,1]. Default: will scale. Useful if dealing with 8 bit *and* 16 bit images. scale = params['global']['scale_data'] if params['global'][ 'scale_data'] else True if scale: sc_min, sc_max = params['global']['scale_data'] np_input_image = minmax_scale( np_input_image, orig_range=(np.min(np_input_image), np.max(np_input_image)), scale_range=(sc_min, sc_max)) # Mask the zeros from input image into label raster. if params['sample']['mask_reference']: np_label_raster = mask_image(np_input_image, np_label_raster) if info['dataset'] == 'trn': out_file = trn_hdf5 elif info['dataset'] == 'val': out_file = val_hdf5 elif info['dataset'] == 'tst': out_file = tst_hdf5 else: raise ValueError( f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}" ) np_label_raster = np.reshape( np_label_raster, (np_label_raster.shape[0], np_label_raster.shape[1], 1)) number_samples, number_classes = samples_preparation( np_input_image, np_label_raster, params['global']['samples_size'], params['sample']['samples_dist'], number_samples, number_classes, out_file, info['dataset'], params['sample']['min_annotated_percent'], image_metadata) _tqdm.set_postfix(OrderedDict(number_samples=number_samples)) out_file.flush() trn_hdf5.close() val_hdf5.close() tst_hdf5.close() print("Number of samples created: ", number_samples) if bucket_name and final_samples_folder: print('Transfering Samples to the bucket') bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5') bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5') bucket.upload_file(samples_folder + "/tst_samples.hdf5", final_samples_folder + '/tst_samples.hdf5') print("End of process")
def main(bucket_name, data_path, samples_size, num_classes, number_of_bands, csv_file, samples_dist, remove_background, mask_input_image, mask_reference): gpkg_file = [] if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'samples_prep.csv') list_data_prep = read_csv('samples_prep.csv') if data_path: final_samples_folder = os.path.join(data_path, "samples") final_out_label_folder = os.path.join(data_path, "label") else: final_samples_folder = "samples" final_out_label_folder = "label" samples_folder = "samples" out_label_folder = "label" else: list_data_prep = read_csv(csv_file) samples_folder = os.path.join(data_path, "samples") out_label_folder = os.path.join(data_path, "label") create_or_empty_folder(samples_folder) create_or_empty_folder(out_label_folder) number_samples = {'trn': 0, 'val': 0} number_classes = 0 trn_hdf5 = h5py.File(os.path.join(samples_folder, "trn_samples.hdf5"), "w") val_hdf5 = h5py.File(os.path.join(samples_folder, "val_samples.hdf5"), "w") trn_hdf5.create_dataset("sat_img", (0, samples_size, samples_size, number_of_bands), np.float32, maxshape=(None, samples_size, samples_size, number_of_bands)) trn_hdf5.create_dataset("map_img", (0, samples_size, samples_size), np.uint8, maxshape=(None, samples_size, samples_size)) val_hdf5.create_dataset("sat_img", (0, samples_size, samples_size, number_of_bands), np.float32, maxshape=(None, samples_size, samples_size, number_of_bands)) val_hdf5.create_dataset("map_img", (0, samples_size, samples_size), np.uint8, maxshape=(None, samples_size, samples_size)) for info in list_data_prep: img_name = os.path.basename(info['tif']).split('.')[0] tmp_label_name = os.path.join(out_label_folder, img_name + "_label_tmp.tif") label_name = os.path.join(out_label_folder, img_name + "_label.tif") if bucket_name: bucket.download_file(info['tif'], "Images/" + info['tif'].split('/')[-1]) info['tif'] = "Images/" + info['tif'].split('/')[-1] if info['gpkg'] not in gpkg_file: gpkg_file.append(info['gpkg']) bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1]) info['gpkg'] = info['gpkg'].split('/')[-1] assert_band_number(info['tif'], number_of_bands) value_field = info['attribute_name'] validate_num_classes(info['gpkg'], num_classes, value_field) # Mask zeros from input image into label raster. if mask_reference: tmp_label_raster = create_new_raster_from_base( info['tif'], tmp_label_name, 1) vector_to_raster(info['gpkg'], info['attribute_name'], tmp_label_raster) tmp_label_raster = None masked_array = mask_image(image_reader_as_array(info['tif']), image_reader_as_array(tmp_label_name)) create_new_raster_from_base(info['tif'], label_name, 1, masked_array) os.remove(tmp_label_name) else: label_raster = create_new_raster_from_base(info['tif'], label_name, 1) vector_to_raster(info['gpkg'], info['attribute_name'], label_raster) label_raster = None # Mask zeros from label raster into input image. if mask_input_image: masked_img = mask_image(image_reader_as_array(label_name), image_reader_as_array(info['tif'])) create_new_raster_from_base(label_name, info['tif'], number_of_bands, masked_img) if info['dataset'] == 'trn': out_file = trn_hdf5 elif info['dataset'] == 'val': out_file = val_hdf5 number_samples, number_classes = samples_preparation( info['tif'], label_name, samples_size, samples_dist, number_samples, number_classes, out_file, info['dataset'], remove_background) print(info['tif']) print(number_samples) out_file.flush() trn_hdf5.close() val_hdf5.close() print("Number of samples created: ", number_samples) if bucket_name: print('Transfering Samples to the bucket') bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5') bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5') print("End of process")
def main(params): """ Training and validation datasets preparation. :param params: (dict) Parameters found in the yaml config file. """ gpkg_file = [] bucket_name = params['global']['bucket_name'] data_path = params['global']['data_path'] csv_file = params['sample']['prep_csv_file'] if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'samples_prep.csv') list_data_prep = read_csv('samples_prep.csv') if data_path: final_samples_folder = os.path.join(data_path, "samples") else: final_samples_folder = "samples" samples_folder = "samples" out_label_folder = "label" else: list_data_prep = read_csv(csv_file) samples_folder = os.path.join(data_path, "samples") out_label_folder = os.path.join(data_path, "label") create_or_empty_folder(samples_folder) create_or_empty_folder(out_label_folder) number_samples = {'trn': 0, 'val': 0, 'tst': 0} number_classes = 0 trn_hdf5, val_hdf5, tst_hdf5 = create_files_and_datasets( params, samples_folder) for info in list_data_prep: if bucket_name: bucket.download_file(info['tif'], "Images/" + info['tif'].split('/')[-1]) info['tif'] = "Images/" + info['tif'].split('/')[-1] if info['gpkg'] not in gpkg_file: gpkg_file.append(info['gpkg']) bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1]) info['gpkg'] = info['gpkg'].split('/')[-1] assert_band_number(info['tif'], params['global']['number_of_bands']) # Read the input raster image np_input_image = image_reader_as_array(info['tif']) # Validate the number of class in the vector file validate_num_classes(info['gpkg'], params['global']['num_classes'], info['attribute_name']) # Burn vector file in a raster file np_label_raster = vector_to_raster(info['gpkg'], info['tif'], info['attribute_name']) # Mask the zeros from input image into label raster. if params['sample']['mask_reference']: np_label_raster = mask_image(np_input_image, np_label_raster) if info['dataset'] == 'trn': out_file = trn_hdf5 elif info['dataset'] == 'val': out_file = val_hdf5 elif info['dataset'] == 'tst': out_file = tst_hdf5 else: raise ValueError( f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}" ) np_label_raster = np.reshape( np_label_raster, (np_label_raster.shape[0], np_label_raster.shape[1], 1)) number_samples, number_classes = samples_preparation( np_input_image, np_label_raster, params['global']['samples_size'], params['sample']['samples_dist'], number_samples, number_classes, out_file, info['dataset'], params['sample']['min_annotated_percent']) print(info['tif']) print(number_samples) out_file.flush() trn_hdf5.close() val_hdf5.close() tst_hdf5.close() print("Number of samples created: ", number_samples) if bucket_name: print('Transfering Samples to the bucket') bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5') bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5') bucket.upload_file(samples_folder + "/tst_samples.hdf5", final_samples_folder + '/tst_samples.hdf5') print("End of process")
def main(bucket_name, data_path, samples_size, num_classes, number_of_bands, csv_file, samples_dist, remove_background, mask_input_image, mask_reference): gpkg_file = [] if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'samples_prep.csv') list_data_prep = read_csv('samples_prep.csv') if data_path: final_samples_folder = os.path.join(data_path, "samples") else: final_samples_folder = "samples" samples_folder = "samples" out_label_folder = "label" else: list_data_prep = read_csv(csv_file) samples_folder = os.path.join(data_path, "samples") out_label_folder = os.path.join(data_path, "label") create_or_empty_folder(samples_folder) create_or_empty_folder(out_label_folder) number_samples = {'trn': 0, 'val': 0} number_classes = 0 trn_hdf5 = h5py.File(os.path.join(samples_folder, "trn_samples.hdf5"), "w") val_hdf5 = h5py.File(os.path.join(samples_folder, "val_samples.hdf5"), "w") trn_hdf5.create_dataset("sat_img", (0, samples_size, samples_size, number_of_bands), np.float32, maxshape=(None, samples_size, samples_size, number_of_bands)) trn_hdf5.create_dataset("map_img", (0, samples_size, samples_size), np.uint8, maxshape=(None, samples_size, samples_size)) val_hdf5.create_dataset("sat_img", (0, samples_size, samples_size, number_of_bands), np.float32, maxshape=(None, samples_size, samples_size, number_of_bands)) val_hdf5.create_dataset("map_img", (0, samples_size, samples_size), np.uint8, maxshape=(None, samples_size, samples_size)) for info in list_data_prep: if bucket_name: bucket.download_file(info['tif'], "Images/" + info['tif'].split('/')[-1]) info['tif'] = "Images/" + info['tif'].split('/')[-1] if info['gpkg'] not in gpkg_file: gpkg_file.append(info['gpkg']) bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1]) info['gpkg'] = info['gpkg'].split('/')[-1] assert_band_number(info['tif'], number_of_bands) # Read the input raster image np_input_image = image_reader_as_array(info['tif']) # Validate the number of class in the vector file validate_num_classes(info['gpkg'], num_classes, info['attribute_name']) # Burn vector file in a raster file np_label_raster = vector_to_raster(info['gpkg'], info['tif'], info['attribute_name']) # Mask the zeros from input image into label raster. if mask_reference: np_label_raster = mask_image(np_input_image, np_label_raster) # Mask zeros from label raster into input image otherwise use original image if mask_input_image: np_input_image = mask_image(np_label_raster, np_input_image) if info['dataset'] == 'trn': out_file = trn_hdf5 elif info['dataset'] == 'val': out_file = val_hdf5 np_label_raster = np.reshape( np_label_raster, (np_label_raster.shape[0], np_label_raster.shape[1], 1)) number_samples, number_classes = samples_preparation( np_input_image, np_label_raster, samples_size, samples_dist, number_samples, number_classes, out_file, info['dataset'], remove_background) print(info['tif']) print(number_samples) out_file.flush() trn_hdf5.close() val_hdf5.close() print("Number of samples created: ", number_samples) if bucket_name: print('Transfering Samples to the bucket') bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5') bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5') print("End of process")