import json import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from sklearn.model_selection import StratifiedKFold from models.model import EF_Net from models.controller import Controller from utils.utils import get_device, read_parameters, separate_train_val from metrics.metrics import Accuracy_Metric import albumentations as A from albumentations import pytorch if __name__ == "__main__": df = pd.read_csv("data/metadata.csv") configs = read_parameters() device = get_device() transform = A.Compose([ A.Resize(256, 256, p = 1), A.OneOf([ A.Blur(p = 1), A.RandomGamma(p = 1), A.RandomBrightness(p = 1), A.RandomContrast(p = 1), ]), A.OneOf([ A.VerticalFlip(p = 1), ]), A.CoarseDropout(p = 0.5), A.Normalize(p = 1), pytorch.ToTensorV2()
trn_hdf5.close() val_hdf5.close() tst_hdf5.close() print("Number of samples created: ", number_samples) if bucket_name: print('Transfering Samples to the bucket') bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5') bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5') bucket.upload_file(samples_folder + "/tst_samples.hdf5", final_samples_folder + '/tst_samples.hdf5') print("End of process") if __name__ == '__main__': parser = argparse.ArgumentParser(description='Sample preparation') parser.add_argument('ParamFile', metavar='DIR', help='Path to training parameters stored in yaml') args = parser.parse_args() params = read_parameters(args.ParamFile) start_time = time.time() debug = True if params['global']['debug_mode'] else False main(params) print("Elapsed time:{}".format(time.time() - start_time))
num_classes, device) create_new_raster_from_base(local_img, inference_image, sem_seg_results) tqdm.write(f"Semantic segmentation of image {img_name} completed") if bucket: bucket.upload_file( inference_image, os.path.join(params['inference']['working_folder'], f"{img_name.split('.')[0]}_inference.tif")) else: raise ValueError( f"The task should be either classification or segmentation. The provided value is {params['global']['task']}" ) time_elapsed = time.time() - since print('Inference completed in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) if __name__ == '__main__': print('Start: ') parser = argparse.ArgumentParser( description='Inference on images using trained model') parser.add_argument('param_file', metavar='file', help='Path to training parameters stored in yaml') args = parser.parse_args() params = read_parameters(args.param_file) main(params)
def main(params): """ Training and validation datasets preparation. :param params: (dict) Parameters found in the yaml config file. """ bucket_file_cache = [] bucket_name = params['global']['bucket_name'] data_path = params['global']['data_path'] Path.mkdir(Path(data_path), exist_ok=True) csv_file = params['sample']['prep_csv_file'] final_samples_folder = None if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) bucket.download_file(csv_file, 'samples_prep.csv') list_data_prep = read_csv('samples_prep.csv') if data_path: final_samples_folder = os.path.join(data_path, "samples") else: final_samples_folder = "samples" samples_folder = "samples" out_label_folder = "label" else: list_data_prep = read_csv(csv_file) samples_folder = os.path.join( data_path, "samples") #FIXME check that data_path exists! out_label_folder = os.path.join(data_path, "label") create_or_empty_folder(samples_folder) create_or_empty_folder(out_label_folder) number_samples = {'trn': 0, 'val': 0, 'tst': 0} number_classes = 0 trn_hdf5, val_hdf5, tst_hdf5 = create_files_and_datasets( params, samples_folder) with tqdm(list_data_prep) as _tqdm: for info in _tqdm: if bucket_name: bucket.download_file(info['tif'], "Images/" + info['tif'].split('/')[-1]) info['tif'] = "Images/" + info['tif'].split('/')[-1] if info['gpkg'] not in bucket_file_cache: bucket_file_cache.append(info['gpkg']) bucket.download_file(info['gpkg'], info['gpkg'].split('/')[-1]) info['gpkg'] = info['gpkg'].split('/')[-1] if info['meta']: if info['meta'] not in bucket_file_cache: bucket_file_cache.append(info['meta']) bucket.download_file(info['meta'], info['meta'].split('/')[-1]) info['meta'] = info['meta'].split('/')[-1] _tqdm.set_postfix( OrderedDict(file=f'{info["tif"]}', sample_size=params['global']['samples_size'])) # Validate the number of class in the vector file validate_num_classes(info['gpkg'], params['global']['num_classes'], info['attribute_name']) assert os.path.isfile( info['tif']), f"could not open raster file at {info['tif']}" with rasterio.open(info['tif'], 'r') as raster: # Burn vector file in a raster file np_label_raster = vector_to_raster( vector_file=info['gpkg'], input_image=raster, attribute_name=info['attribute_name'], fill=get_key_def('ignore_idx', get_key_def('training', params, {}), 0)) # Read the input raster image np_input_image = image_reader_as_array( input_image=raster, scale=get_key_def('scale_data', params['global'], None), aux_vector_file=get_key_def('aux_vector_file', params['global'], None), aux_vector_attrib=get_key_def('aux_vector_attrib', params['global'], None), aux_vector_ids=get_key_def('aux_vector_ids', params['global'], None), aux_vector_dist_maps=get_key_def('aux_vector_dist_maps', params['global'], True), aux_vector_dist_log=get_key_def('aux_vector_dist_log', params['global'], True), aux_vector_scale=get_key_def('aux_vector_scale', params['global'], None)) # Mask the zeros from input image into label raster. if params['sample']['mask_reference']: np_label_raster = mask_image(np_input_image, np_label_raster) if info['dataset'] == 'trn': out_file = trn_hdf5 elif info['dataset'] == 'val': out_file = val_hdf5 elif info['dataset'] == 'tst': out_file = tst_hdf5 else: raise ValueError( f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}" ) meta_map, metadata = get_key_def("meta_map", params["global"], {}), None if info['meta'] is not None and isinstance( info['meta'], str) and os.path.isfile(info['meta']): metadata = read_parameters(info['meta']) input_band_count = np_input_image.shape[ 2] + MetaSegmentationDataset.get_meta_layer_count(meta_map) assert input_band_count == params['global']['number_of_bands'], \ f"The number of bands in the input image ({input_band_count}) and the parameter" \ f"'number_of_bands' in the yaml file ({params['global']['number_of_bands']}) should be identical" np_label_raster = np.reshape( np_label_raster, (np_label_raster.shape[0], np_label_raster.shape[1], 1)) number_samples, number_classes = samples_preparation( np_input_image, np_label_raster, params['global']['samples_size'], params['sample']['samples_dist'], number_samples, number_classes, out_file, info['dataset'], params['sample']['min_annotated_percent'], metadata) _tqdm.set_postfix(OrderedDict(number_samples=number_samples)) out_file.flush() trn_hdf5.close() val_hdf5.close() tst_hdf5.close() print("Number of samples created: ", number_samples) if bucket_name and final_samples_folder: print('Transfering Samples to the bucket') bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5') bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5') bucket.upload_file(samples_folder + "/tst_samples.hdf5", final_samples_folder + '/tst_samples.hdf5') print("End of process")
def main(params): """ Identify the class to which each image belongs. :param params: (dict) Parameters found in the yaml config file. """ since = time.time() img_dir_or_csv = params['inference']['img_dir_or_csv_file'] working_folder = Path(params['inference']['working_folder']) Path.mkdir(working_folder, exist_ok=True) print(f'Inferences will be saved to: {working_folder}') bucket = None bucket_file_cache = [] bucket_name = params['global']['bucket_name'] model, state_dict_path, model_name = net(params, inference=True) num_devices = params['global']['num_gpus'] if params['global'][ 'num_gpus'] else 0 # list of GPU devices that are available and unused. If no GPUs, returns empty list lst_device_ids = get_device_ids( num_devices) if torch.cuda.is_available() else [] device = torch.device(f'cuda:{lst_device_ids[0]}' if torch.cuda. is_available() and lst_device_ids else 'cpu') if lst_device_ids: print(f"Using Cuda device {lst_device_ids[0]}") else: warnings.warn( f"No Cuda device available. This process will only run on CPU") model.to(device) if bucket_name: s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) if img_dir_or_csv.endswith('.csv'): bucket.download_file(img_dir_or_csv, 'img_csv_file.csv') list_img = read_csv('img_csv_file.csv', inference=True) else: raise NotImplementedError( 'Specify a csv file containing images for inference. Directory input not implemented yet' ) else: if img_dir_or_csv.endswith('.csv'): list_img = read_csv(img_dir_or_csv, inference=True) else: img_dir = Path(img_dir_or_csv) assert img_dir.exists( ), f'Could not find directory "{img_dir_or_csv}"' list_img_paths = sorted(img_dir.glob('*.tif')) list_img = [] for img_path in list_img_paths: img = {} img['tif'] = img_path list_img.append(img) assert len( list_img) >= 0, f'No .tif files found in {img_dir_or_csv}' if params['global']['task'] == 'classification': classifier(params, list_img, model, device) elif params['global']['task'] == 'segmentation': if bucket: bucket.download_file(state_dict_path, "saved_model.pth.tar") model, _ = load_from_checkpoint("saved_model.pth.tar", model) else: model, _ = load_from_checkpoint(state_dict_path, model) chunk_size, nbr_pix_overlap = calc_overlap(params) num_classes = params['global']['num_classes'] if num_classes == 1: # assume background is implicitly needed (makes no sense to predict with one class otherwise) # this will trigger some warnings elsewhere, but should succeed nonetheless num_classes = 2 with tqdm(list_img, desc='image list', position=0) as _tqdm: for img in _tqdm: img_name = os.path.basename(img['tif']) if bucket: local_img = f"Images/{img_name}" bucket.download_file(img['tif'], local_img) inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif" if img['meta']: if img['meta'] not in bucket_file_cache: bucket_file_cache.append(img['meta']) bucket.download_file(img['meta'], img['meta'].split('/')[-1]) img['meta'] = img['meta'].split('/')[-1] else: local_img = img['tif'] inference_image = os.path.join( params['inference']['working_folder'], f"{img_name.split('.')[0]}_inference.tif") assert os.path.isfile( local_img), f"could not open raster file at {local_img}" with rasterio.open(local_img, 'r') as raster: np_input_image = image_reader_as_array( input_image=raster, scale=get_key_def('scale_data', params['global'], None), aux_vector_file=get_key_def('aux_vector_file', params['global'], None), aux_vector_attrib=get_key_def('aux_vector_attrib', params['global'], None), aux_vector_ids=get_key_def('aux_vector_ids', params['global'], None), aux_vector_dist_maps=get_key_def( 'aux_vector_dist_maps', params['global'], True), aux_vector_scale=get_key_def('aux_vector_scale', params['global'], None)) meta_map, metadata = get_key_def("meta_map", params["global"], {}), None if meta_map: assert img['meta'] is not None and isinstance(img['meta'], str) and os.path.isfile(img['meta']), \ "global configuration requested metadata mapping onto loaded samples, but raster did not have available metadata" metadata = read_parameters(img['meta']) if debug: _tqdm.set_postfix( OrderedDict(image_name=img_name, image_shape=np_input_image.shape)) input_band_count = np_input_image.shape[ 2] + MetaSegmentationDataset.get_meta_layer_count(meta_map) assert input_band_count == params['global']['number_of_bands'], \ f"The number of bands in the input image ({input_band_count}) and the parameter" \ f"'number_of_bands' in the yaml file ({params['global']['number_of_bands']}) should be identical" sem_seg_results = sem_seg_inference(model, np_input_image, nbr_pix_overlap, chunk_size, num_classes, device, meta_map, metadata) if debug and len(np.unique(sem_seg_results)) == 1: print( f'Something is wrong. Inference contains only one value. Make sure data scale is coherent with training domain values.' ) create_new_raster_from_base(local_img, inference_image, sem_seg_results) tqdm.write( f"Semantic segmentation of image {img_name} completed") if bucket: bucket.upload_file( inference_image, os.path.join( params['inference']['working_folder'], f"{img_name.split('.')[0]}_inference.tif")) else: raise ValueError( f"The task should be either classification or segmentation. The provided value is {params['global']['task']}" ) time_elapsed = time.time() - since print('Inference completed in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60))