Exemplo n.º 1
0
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import StratifiedKFold
from models.model import EF_Net
from models.controller import Controller
from utils.utils import get_device, read_parameters, separate_train_val
from metrics.metrics import Accuracy_Metric
import albumentations as A
from albumentations import pytorch

if __name__ == "__main__":
    df = pd.read_csv("data/metadata.csv")
    configs = read_parameters()
    device = get_device()
    transform = A.Compose([
                        A.Resize(256, 256, p = 1),
                        A.OneOf([
                                    A.Blur(p = 1),
                                    A.RandomGamma(p = 1),
                                    A.RandomBrightness(p = 1),
                                    A.RandomContrast(p = 1),
                                    ]),
                            A.OneOf([
                                    A.VerticalFlip(p = 1),
                            ]),
                        A.CoarseDropout(p = 0.5),
                        A.Normalize(p = 1),
                        pytorch.ToTensorV2()
    trn_hdf5.close()
    val_hdf5.close()
    tst_hdf5.close()

    print("Number of samples created: ", number_samples)

    if bucket_name:
        print('Transfering Samples to the bucket')
        bucket.upload_file(samples_folder + "/trn_samples.hdf5", final_samples_folder + '/trn_samples.hdf5')
        bucket.upload_file(samples_folder + "/val_samples.hdf5", final_samples_folder + '/val_samples.hdf5')
        bucket.upload_file(samples_folder + "/tst_samples.hdf5", final_samples_folder + '/tst_samples.hdf5')

    print("End of process")


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Sample preparation')
    parser.add_argument('ParamFile', metavar='DIR',
                        help='Path to training parameters stored in yaml')
    args = parser.parse_args()
    params = read_parameters(args.ParamFile)

    start_time = time.time()

    debug = True if params['global']['debug_mode'] else False

    main(params)

    print("Elapsed time:{}".format(time.time() - start_time))
Exemplo n.º 3
0
                                                num_classes, device)
            create_new_raster_from_base(local_img, inference_image,
                                        sem_seg_results)
            tqdm.write(f"Semantic segmentation of image {img_name} completed")
            if bucket:
                bucket.upload_file(
                    inference_image,
                    os.path.join(params['inference']['working_folder'],
                                 f"{img_name.split('.')[0]}_inference.tif"))
    else:
        raise ValueError(
            f"The task should be either classification or segmentation. The provided value is {params['global']['task']}"
        )

    time_elapsed = time.time() - since
    print('Inference completed in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))


if __name__ == '__main__':
    print('Start: ')
    parser = argparse.ArgumentParser(
        description='Inference on images using trained model')
    parser.add_argument('param_file',
                        metavar='file',
                        help='Path to training parameters stored in yaml')
    args = parser.parse_args()
    params = read_parameters(args.param_file)

    main(params)
def main(params):
    """
    Training and validation datasets preparation.
    :param params: (dict) Parameters found in the yaml config file.

    """
    bucket_file_cache = []
    bucket_name = params['global']['bucket_name']
    data_path = params['global']['data_path']
    Path.mkdir(Path(data_path), exist_ok=True)
    csv_file = params['sample']['prep_csv_file']

    final_samples_folder = None
    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        bucket.download_file(csv_file, 'samples_prep.csv')
        list_data_prep = read_csv('samples_prep.csv')
        if data_path:
            final_samples_folder = os.path.join(data_path, "samples")
        else:
            final_samples_folder = "samples"
        samples_folder = "samples"
        out_label_folder = "label"

    else:
        list_data_prep = read_csv(csv_file)
        samples_folder = os.path.join(
            data_path, "samples")  #FIXME check that data_path exists!
        out_label_folder = os.path.join(data_path, "label")

    create_or_empty_folder(samples_folder)
    create_or_empty_folder(out_label_folder)

    number_samples = {'trn': 0, 'val': 0, 'tst': 0}
    number_classes = 0

    trn_hdf5, val_hdf5, tst_hdf5 = create_files_and_datasets(
        params, samples_folder)

    with tqdm(list_data_prep) as _tqdm:
        for info in _tqdm:

            if bucket_name:
                bucket.download_file(info['tif'],
                                     "Images/" + info['tif'].split('/')[-1])
                info['tif'] = "Images/" + info['tif'].split('/')[-1]
                if info['gpkg'] not in bucket_file_cache:
                    bucket_file_cache.append(info['gpkg'])
                    bucket.download_file(info['gpkg'],
                                         info['gpkg'].split('/')[-1])
                info['gpkg'] = info['gpkg'].split('/')[-1]
                if info['meta']:
                    if info['meta'] not in bucket_file_cache:
                        bucket_file_cache.append(info['meta'])
                        bucket.download_file(info['meta'],
                                             info['meta'].split('/')[-1])
                    info['meta'] = info['meta'].split('/')[-1]

            _tqdm.set_postfix(
                OrderedDict(file=f'{info["tif"]}',
                            sample_size=params['global']['samples_size']))

            # Validate the number of class in the vector file
            validate_num_classes(info['gpkg'], params['global']['num_classes'],
                                 info['attribute_name'])

            assert os.path.isfile(
                info['tif']), f"could not open raster file at {info['tif']}"
            with rasterio.open(info['tif'], 'r') as raster:

                # Burn vector file in a raster file
                np_label_raster = vector_to_raster(
                    vector_file=info['gpkg'],
                    input_image=raster,
                    attribute_name=info['attribute_name'],
                    fill=get_key_def('ignore_idx',
                                     get_key_def('training', params, {}), 0))

                # Read the input raster image
                np_input_image = image_reader_as_array(
                    input_image=raster,
                    scale=get_key_def('scale_data', params['global'], None),
                    aux_vector_file=get_key_def('aux_vector_file',
                                                params['global'], None),
                    aux_vector_attrib=get_key_def('aux_vector_attrib',
                                                  params['global'], None),
                    aux_vector_ids=get_key_def('aux_vector_ids',
                                               params['global'], None),
                    aux_vector_dist_maps=get_key_def('aux_vector_dist_maps',
                                                     params['global'], True),
                    aux_vector_dist_log=get_key_def('aux_vector_dist_log',
                                                    params['global'], True),
                    aux_vector_scale=get_key_def('aux_vector_scale',
                                                 params['global'], None))

            # Mask the zeros from input image into label raster.
            if params['sample']['mask_reference']:
                np_label_raster = mask_image(np_input_image, np_label_raster)

            if info['dataset'] == 'trn':
                out_file = trn_hdf5
            elif info['dataset'] == 'val':
                out_file = val_hdf5
            elif info['dataset'] == 'tst':
                out_file = tst_hdf5
            else:
                raise ValueError(
                    f"Dataset value must be trn or val or tst. Provided value is {info['dataset']}"
                )

            meta_map, metadata = get_key_def("meta_map", params["global"],
                                             {}), None
            if info['meta'] is not None and isinstance(
                    info['meta'], str) and os.path.isfile(info['meta']):
                metadata = read_parameters(info['meta'])

            input_band_count = np_input_image.shape[
                2] + MetaSegmentationDataset.get_meta_layer_count(meta_map)
            assert input_band_count == params['global']['number_of_bands'], \
                f"The number of bands in the input image ({input_band_count}) and the parameter" \
                f"'number_of_bands' in the yaml file ({params['global']['number_of_bands']}) should be identical"

            np_label_raster = np.reshape(
                np_label_raster,
                (np_label_raster.shape[0], np_label_raster.shape[1], 1))
            number_samples, number_classes = samples_preparation(
                np_input_image, np_label_raster,
                params['global']['samples_size'],
                params['sample']['samples_dist'], number_samples,
                number_classes, out_file, info['dataset'],
                params['sample']['min_annotated_percent'], metadata)

            _tqdm.set_postfix(OrderedDict(number_samples=number_samples))
            out_file.flush()

    trn_hdf5.close()
    val_hdf5.close()
    tst_hdf5.close()

    print("Number of samples created: ", number_samples)

    if bucket_name and final_samples_folder:
        print('Transfering Samples to the bucket')
        bucket.upload_file(samples_folder + "/trn_samples.hdf5",
                           final_samples_folder + '/trn_samples.hdf5')
        bucket.upload_file(samples_folder + "/val_samples.hdf5",
                           final_samples_folder + '/val_samples.hdf5')
        bucket.upload_file(samples_folder + "/tst_samples.hdf5",
                           final_samples_folder + '/tst_samples.hdf5')

    print("End of process")
Exemplo n.º 5
0
def main(params):
    """
    Identify the class to which each image belongs.
    :param params: (dict) Parameters found in the yaml config file.

    """
    since = time.time()
    img_dir_or_csv = params['inference']['img_dir_or_csv_file']
    working_folder = Path(params['inference']['working_folder'])
    Path.mkdir(working_folder, exist_ok=True)
    print(f'Inferences will be saved to: {working_folder}')

    bucket = None
    bucket_file_cache = []
    bucket_name = params['global']['bucket_name']

    model, state_dict_path, model_name = net(params, inference=True)

    num_devices = params['global']['num_gpus'] if params['global'][
        'num_gpus'] else 0
    # list of GPU devices that are available and unused. If no GPUs, returns empty list
    lst_device_ids = get_device_ids(
        num_devices) if torch.cuda.is_available() else []
    device = torch.device(f'cuda:{lst_device_ids[0]}' if torch.cuda.
                          is_available() and lst_device_ids else 'cpu')

    if lst_device_ids:
        print(f"Using Cuda device {lst_device_ids[0]}")
    else:
        warnings.warn(
            f"No Cuda device available. This process will only run on CPU")

    model.to(device)

    if bucket_name:
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(bucket_name)
        if img_dir_or_csv.endswith('.csv'):
            bucket.download_file(img_dir_or_csv, 'img_csv_file.csv')
            list_img = read_csv('img_csv_file.csv', inference=True)
        else:
            raise NotImplementedError(
                'Specify a csv file containing images for inference. Directory input not implemented yet'
            )
    else:
        if img_dir_or_csv.endswith('.csv'):
            list_img = read_csv(img_dir_or_csv, inference=True)
        else:
            img_dir = Path(img_dir_or_csv)
            assert img_dir.exists(
            ), f'Could not find directory "{img_dir_or_csv}"'
            list_img_paths = sorted(img_dir.glob('*.tif'))
            list_img = []
            for img_path in list_img_paths:
                img = {}
                img['tif'] = img_path
                list_img.append(img)
            assert len(
                list_img) >= 0, f'No .tif files found in {img_dir_or_csv}'

    if params['global']['task'] == 'classification':
        classifier(params, list_img, model, device)

    elif params['global']['task'] == 'segmentation':
        if bucket:
            bucket.download_file(state_dict_path, "saved_model.pth.tar")
            model, _ = load_from_checkpoint("saved_model.pth.tar", model)
        else:
            model, _ = load_from_checkpoint(state_dict_path, model)

        chunk_size, nbr_pix_overlap = calc_overlap(params)
        num_classes = params['global']['num_classes']
        if num_classes == 1:
            # assume background is implicitly needed (makes no sense to predict with one class otherwise)
            # this will trigger some warnings elsewhere, but should succeed nonetheless
            num_classes = 2
        with tqdm(list_img, desc='image list', position=0) as _tqdm:
            for img in _tqdm:
                img_name = os.path.basename(img['tif'])
                if bucket:
                    local_img = f"Images/{img_name}"
                    bucket.download_file(img['tif'], local_img)
                    inference_image = f"Classified_Images/{img_name.split('.')[0]}_inference.tif"
                    if img['meta']:
                        if img['meta'] not in bucket_file_cache:
                            bucket_file_cache.append(img['meta'])
                            bucket.download_file(img['meta'],
                                                 img['meta'].split('/')[-1])
                        img['meta'] = img['meta'].split('/')[-1]
                else:
                    local_img = img['tif']
                    inference_image = os.path.join(
                        params['inference']['working_folder'],
                        f"{img_name.split('.')[0]}_inference.tif")

                assert os.path.isfile(
                    local_img), f"could not open raster file at {local_img}"
                with rasterio.open(local_img, 'r') as raster:

                    np_input_image = image_reader_as_array(
                        input_image=raster,
                        scale=get_key_def('scale_data', params['global'],
                                          None),
                        aux_vector_file=get_key_def('aux_vector_file',
                                                    params['global'], None),
                        aux_vector_attrib=get_key_def('aux_vector_attrib',
                                                      params['global'], None),
                        aux_vector_ids=get_key_def('aux_vector_ids',
                                                   params['global'], None),
                        aux_vector_dist_maps=get_key_def(
                            'aux_vector_dist_maps', params['global'], True),
                        aux_vector_scale=get_key_def('aux_vector_scale',
                                                     params['global'], None))

                meta_map, metadata = get_key_def("meta_map", params["global"],
                                                 {}), None
                if meta_map:
                    assert img['meta'] is not None and isinstance(img['meta'], str) and os.path.isfile(img['meta']), \
                        "global configuration requested metadata mapping onto loaded samples, but raster did not have available metadata"
                    metadata = read_parameters(img['meta'])

                if debug:
                    _tqdm.set_postfix(
                        OrderedDict(image_name=img_name,
                                    image_shape=np_input_image.shape))

                input_band_count = np_input_image.shape[
                    2] + MetaSegmentationDataset.get_meta_layer_count(meta_map)
                assert input_band_count == params['global']['number_of_bands'], \
                    f"The number of bands in the input image ({input_band_count}) and the parameter" \
                    f"'number_of_bands' in the yaml file ({params['global']['number_of_bands']}) should be identical"

                sem_seg_results = sem_seg_inference(model, np_input_image,
                                                    nbr_pix_overlap,
                                                    chunk_size, num_classes,
                                                    device, meta_map, metadata)

                if debug and len(np.unique(sem_seg_results)) == 1:
                    print(
                        f'Something is wrong. Inference contains only one value. Make sure data scale is coherent with training domain values.'
                    )

                create_new_raster_from_base(local_img, inference_image,
                                            sem_seg_results)
                tqdm.write(
                    f"Semantic segmentation of image {img_name} completed")
                if bucket:
                    bucket.upload_file(
                        inference_image,
                        os.path.join(
                            params['inference']['working_folder'],
                            f"{img_name.split('.')[0]}_inference.tif"))
    else:
        raise ValueError(
            f"The task should be either classification or segmentation. The provided value is {params['global']['task']}"
        )

    time_elapsed = time.time() - since
    print('Inference completed in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))