def prepare_input(input_parcel_filepath: str, classtype_to_prepare: str,
                  output_dir: str):
    """
    This function creates a file that is compliant with the assumptions used by the rest of the
    classification functionality.

    It should be a csv file with the following columns:
        - object_id: column with a unique identifier
        - classname: a string column with a readable name of the classes that will be classified to
    """
    # Check if input parameters are OK
    if not os.path.exists(input_parcel_filepath):
        raise Exception(f"Input file doesn't exist: {input_parcel_filepath}")
    else:
        logger.info(f"Process input file {input_parcel_filepath}")

    # Read input file
    logger.info(f"Read parceldata from {input_parcel_filepath}")
    if geofile_util.is_geofile(input_parcel_filepath):
        parceldata_df = geofile_util.read_file(input_parcel_filepath)
    else:
        parceldata_df = pdh.read_file(input_parcel_filepath)
    logger.info(f"Read Parceldata ready, info(): {parceldata_df.info()}")

    # Check if the id column is present...
    if conf.columns['id'] not in parceldata_df.columns:
        message = f"Column {conf.columns['id']} not found in input parcel file: {input_parcel_filepath}. Make sure the column is present or change the column name in global_constants.py"
        logger.critical(message)
        raise Exception(message)

    # Copy the refe file to the run dir, so we always keep knowing which refe was used
    input_classes_filepath = conf.preprocess[
        'classtype_to_prepare_refe_filepath']
    if not os.path.exists(input_classes_filepath):
        raise Exception(
            f"Input classes file doesn't exist: {input_classes_filepath}")
    shutil.copy(input_classes_filepath, output_dir)

    # Now start prepare
    if classtype_to_prepare == 'CROPGROUP':
        parceldata_df = prepare_input_cropgroup(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop_declared,
            column_output_class=conf.columns['class_declared'])
        return prepare_input_cropgroup(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop,
            column_output_class=conf.columns['class'])
    elif classtype_to_prepare == 'CROPGROUP_GROUNDTRUTH':
        return prepare_input_cropgroup(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop_gt_verified,
            column_output_class=conf.columns['class_groundtruth'])
    elif classtype_to_prepare == 'CROPGROUP_EARLY':
        parceldata_df = prepare_input_cropgroup_early(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop_declared,
            column_output_class=conf.columns['class_declared'])
        return prepare_input_cropgroup_early(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop,
            column_output_class=conf.columns['class'])
    elif classtype_to_prepare == 'CROPGROUP_EARLY_GROUNDTRUTH':
        return prepare_input_cropgroup_early(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop_gt_verified,
            column_output_class=conf.columns['class_groundtruth'])
    elif classtype_to_prepare == 'LANDCOVER':
        parceldata_df = prepare_input_landcover(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop_declared,
            column_output_class=conf.columns['class_declared'])
        return prepare_input_landcover(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop,
            column_output_class=conf.columns['class'])
    elif classtype_to_prepare == 'LANDCOVER_GROUNDTRUTH':
        return prepare_input_landcover(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop_gt_verified,
            column_output_class=conf.columns['class_groundtruth'])
    elif classtype_to_prepare == 'LANDCOVER_EARLY':
        parceldata_df = prepare_input_landcover_early(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop_declared,
            column_output_class=conf.columns['class_declared'])
        return prepare_input_landcover_early(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop,
            column_output_class=conf.columns['class'])
    elif classtype_to_prepare == 'LANDCOVER_EARLY_GROUNDTRUTH':
        return prepare_input_landcover_early(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop_gt_verified,
            column_output_class=conf.columns['class_groundtruth'])
    elif classtype_to_prepare == 'POPULAR_CROP':
        parceldata_df = prepare_input_most_popular_crop(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop_declared,
            column_output_class=conf.columns['class_declared'])
        return prepare_input_most_popular_crop(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop,
            column_output_class=conf.columns['class'])
    elif classtype_to_prepare == 'POPULAR_CROP_GROUNDTRUTH':
        return prepare_input_most_popular_crop(
            parceldata_df=parceldata_df,
            column_BEFL_cropcode=column_BEFL_crop_gt_verified,
            column_output_class=conf.columns['class_groundtruth'])
    else:
        message = f"Unknown value for parameter classtype_to_prepare: {classtype_to_prepare}"
        logger.fatal(message)
        raise Exception(message)
Exemplo n.º 2
0
def prepare_input(input_parcel_filepath: str,
                  output_imagedata_parcel_input_filepath: str,
                  output_parcel_nogeo_filepath: str = None,
                  force: bool = False):
    """
    This function creates a file that is preprocessed to be a good input file for
    timeseries extraction of sentinel images.

    Args
        input_parcel_filepath: input file
        output_imagedata_parcel_input_filepath: prepared output file
        output_parcel_nogeo_filepath: output file with a copy of the non-geo data
        force: force creation, even if output file(s) exist already

    """
    ##### Check if parameters are OK and init some extra params #####
    if not os.path.exists(input_parcel_filepath):
        raise Exception(f"Input file doesn't exist: {input_parcel_filepath}")

    # Check if the input file has a projection specified
    if geofile_util.get_crs(input_parcel_filepath) is None:
        message = f"The parcel input file doesn't have a projection/crs specified, so STOP: {input_parcel_filepath}"
        logger.critical(message)
        raise Exception(message)

    # If force == False Check and the output file exists already, stop.
    if (force is False
            and os.path.exists(output_imagedata_parcel_input_filepath)
            and (output_parcel_nogeo_filepath is None
                 or os.path.exists(output_parcel_nogeo_filepath))):
        logger.warning(
            "prepare_input: force == False and output files exist, so stop: " +
            f"{output_imagedata_parcel_input_filepath}, " +
            f"{output_parcel_nogeo_filepath}")
        return

    logger.info(f"Process input file {input_parcel_filepath}")

    # Create temp dir to store temporary data for tracebility
    output_dir, output_filename = os.path.split(
        output_imagedata_parcel_input_filepath)
    output_filename_noext = os.path.splitext(output_filename)[0]
    temp_output_dir = os.path.join(output_dir, 'temp')
    if not os.path.exists(temp_output_dir):
        os.mkdir(temp_output_dir)

    ##### Read the parcel data and write nogeo version #####
    parceldata_gdf = geofile_util.read_file(input_parcel_filepath)
    logger.info(f'Parceldata read, shape: {parceldata_gdf.shape}')

    # Check if the id column is present and set as index
    if conf.columns['id'] in parceldata_gdf.columns:
        parceldata_gdf.set_index(conf.columns['id'], inplace=True)
    else:
        message = f"STOP: Column {conf.columns['id']} not found in input parcel file: {input_parcel_filepath}. Make sure the column is present or change the column name in global_constants.py"
        logger.critical(message)
        raise Exception(message)

    if force is True or os.path.exists(output_parcel_nogeo_filepath) == False:
        logger.info(f"Save non-geo data to {output_parcel_nogeo_filepath}")
        parceldata_nogeo_df = parceldata_gdf.drop(['geometry'], axis=1)
        pdh.to_file(parceldata_nogeo_df, output_parcel_nogeo_filepath)

    ##### Do the necessary conversions and write buffered file #####

    # If force == False Check and the output file exists already, stop.
    if (force is False
            and os.path.exists(output_imagedata_parcel_input_filepath)):
        logger.warning(
            "prepare_input: force == False and output files exist, so stop: " +
            f"{output_imagedata_parcel_input_filepath}")
        return

    logger.info('Apply buffer on parcel')
    parceldata_buf_gdf = parceldata_gdf.copy()

    # resolution = number of segments per circle
    buffer_size = -conf.marker.getint('buffer')
    parceldata_buf_gdf[conf.columns['geom']] = (
        parceldata_buf_gdf[conf.columns['geom']].buffer(buffer_size,
                                                        resolution=5))

    # Export buffered geometries that result in empty geometries
    logger.info('Export parcels that are empty after buffer')
    parceldata_buf_empty_df = parceldata_buf_gdf.loc[parceldata_buf_gdf[
        conf.columns['geom']].is_empty == True]
    if len(parceldata_buf_empty_df.index) > 0:
        parceldata_buf_empty_df.drop(conf.columns['geom'],
                                     axis=1,
                                     inplace=True)
        temp_empty_filepath = os.path.join(
            temp_output_dir, f"{output_filename_noext}_empty.sqlite")
        pdh.to_file(parceldata_buf_empty_df, temp_empty_filepath)

    # Export parcels that don't result in a (multi)polygon
    parceldata_buf_notempty_gdf = parceldata_buf_gdf.loc[parceldata_buf_gdf[
        conf.columns['geom']].is_empty == False]
    parceldata_buf_nopoly_gdf = parceldata_buf_notempty_gdf.loc[
        ~parceldata_buf_notempty_gdf[conf.columns['geom']].geom_type.
        isin(['Polygon', 'MultiPolygon'])]
    if len(parceldata_buf_nopoly_gdf.index) > 0:
        logger.info('Export parcels that are no (multi)polygons after buffer')
        parceldata_buf_nopoly_gdf.drop(conf.columns['geom'],
                                       axis=1,
                                       inplace=True)
        temp_nopoly_filepath = os.path.join(
            temp_output_dir, f"{output_filename_noext}_nopoly.sqlite")
        geofile_util.to_file(parceldata_buf_nopoly_gdf, temp_nopoly_filepath)

    # Export parcels that are (multi)polygons after buffering
    parceldata_buf_poly_gdf = parceldata_buf_notempty_gdf.loc[
        parceldata_buf_notempty_gdf[conf.columns['geom']].geom_type.isin(
            ['Polygon', 'MultiPolygon'])]
    for column in parceldata_buf_poly_gdf.columns:
        if column not in [conf.columns['id'], conf.columns['geom']]:
            parceldata_buf_poly_gdf.drop(column, axis=1, inplace=True)
    logger.info(
        f"Export parcels that are (multi)polygons after buffer to {output_imagedata_parcel_input_filepath}"
    )
    geofile_util.to_file(parceldata_buf_poly_gdf,
                         output_imagedata_parcel_input_filepath)
    logger.info(parceldata_buf_poly_gdf)
Exemplo n.º 3
0
def calc_timeseries_data(input_parcel_filepath: Path, input_country_code: str,
                         start_date_str: str, end_date_str: str,
                         sensordata_to_get: List[str], base_filename: str,
                         dest_data_dir: Path):
    """ Calculate timeseries data for the input parcels

    args
        data_to_get: an array with data you want to be calculated: check out the constants starting
                     with DATA_TO_GET... for the options.
    """
    ##### Check and init some stuff #####
    if sensordata_to_get is None:
        raise Exception("sensordata_to_get cannot be None")
    if not dest_data_dir.exists():
        os.mkdir(dest_data_dir)

    # To have a good precision, the vector input must be uploaded to gee in WGS84!
    input_preprocessed_dir = conf.dirs.getpath('input_preprocessed_dir')
    input_parcel_4326_filepath = input_preprocessed_dir / f"{input_parcel_filepath.stem}_4326.shp"

    # If the WGS84 version doesn't exist yet, create it
    if (not os.path.exists(input_parcel_4326_filepath)):
        input_parcel_gdf = geofile.read_file(input_parcel_filepath)
        target_epsg = 4326
        logger.info(
            f"Reproject features from {input_parcel_gdf.crs} to epsg:{target_epsg}"
        )
        input_parcel_4326_gdf = input_parcel_gdf.to_crs(epsg=target_epsg)
        logger.info(
            f"Write reprojected features to {input_parcel_4326_filepath}")
        geofile.to_file(input_parcel_4326_gdf, input_parcel_4326_filepath)

    ##### Start calculation of the timeseries on gee #####

    logger.info("Start create_sentinel_timeseries_info")
    # On windows machines there seems to be an issue with gee. The following error is very common,
    # probably because there are too many sockets created in a short time... and the cleanup
    # procedure in windows can't follow:
    #     "OSError: [WinError 10048] Elk socketadres (protocol/netwerkadres/poort) kan normaal
    #      slechts één keer worden gebruikt"
    # So execute in a loop and retry every 10 seconds... this seems to be a working workaround.
    nb_retries = 0
    done_success = False
    while done_success is False and nb_retries < 10:
        try:
            calculate_sentinel_timeseries(
                input_parcel_filepath=input_parcel_filepath,
                input_country_code=input_country_code,
                start_date_str=start_date_str,
                end_date_str=end_date_str,
                sensordata_to_get=sensordata_to_get,
                base_filename=base_filename,
                dest_data_dir=dest_data_dir)
            done_success = True

        except OSError as ex:
            nb_retries += 1
            if os.name == 'nt' and ex.winerror == 10048:
                logger.warning(
                    f"Exception [WinError {ex.winerror}] while trying calculate_sentinel_timeseries, retry! (Full exception message {ex})"
                )
                time.sleep(10)
            else:
                raise

    # If it wasn't successful, log and stop.
    if done_success is False:
        message = "STOP: calculate_sentinel_timeseries couldn't be completed even after many retries..."
        logger.critical(message)
        raise Exception(message)

    # Download the data from GEE
    return_status = 'UNDEFINED'
    number_retries = 0
    while return_status == 'UNDEFINED' or return_status == 'RETRY_NEEDED':
        # Download the results
        try:
            logger.info('Now download needed timeseries files')
            return_status = download_sentinel_timeseries(
                dest_data_dir=dest_data_dir, base_filename=base_filename)

            # Retry every 10 minutes
            if return_status == 'RETRY_NEEDED':
                logger.info(
                    'Not all data was available yet on google drive... try again in a few minutes...'
                )

                # Retry only 36 times, or +- 6 hours
                if number_retries >= 70:
                    return_status = 'STOP'
                    message = "Retried a lot of times, but data still isn't available"
                    logger.error(message)
                    raise Exception(message)

                # Wait for 10 minutes before retrying again... but only sleep 10 seconds at
                # a time so it can be cancelled.
                nb_sleeps = 0
                while nb_sleeps < 30:
                    time.sleep(10)
                    nb_sleeps += 1

                number_retries += 1

        except:
            logger.error('ERROR downloading from google drive!')
            raise