def create_crops(merged_boxes, hyperspectral_pool=None, rgb_pool=None, sensor="hyperspectral", expand=0, hyperspectral_savedir="."):
    """Crop sensor data based on a dataframe of geopandas bounding boxes
    Args:
        merged_boxes: geopandas dataframe with bounding box geometry, plotID, siteID, and species label
        hyperspectral_pool: glob string for looking up matching sensor tiles
        expand: units in meters to add to crops to give context around deepforest box
        hyperspectral_savedir: location to save convert .tif from .h5 files
    Returns:
        crops: list of cropped sensor data
        labels: species id labels
        box_index: unique index and plot_data length.
    """    
    crops = []
    labels = []
    sites = []
    box_index = []
    elevations = []
    heights = []
    for index, row in merged_boxes.iterrows():
        #Crop and append
        box = row["geometry"]       
        plot_name = row["plotID"] 
        site = row["plotID"].split("_")[0]
        elevation = int(row["elevation"])
        height = row["height"]
        
        #get sensor data
        if sensor == "rgb":
            try:
                sensor_path = find_sensor_path(bounds=box.bounds, lookup_pool=rgb_pool)
            except:
                raise ValueError("Cannot find RGB data path for box bounds {} for plot_name {}".format(box.bounds,plot_name))
        elif sensor == "hyperspectral":
            try:
                rgb_path = find_sensor_path(bounds=box.bounds, lookup_pool=rgb_pool)
            except:
                raise ValueError("Cannot find RGB data path for box bounds {} for plot_name {}".format(box.bounds,plot_name))
                
            try:
                hyperspectral_h5_path = find_sensor_path(bounds=box.bounds, lookup_pool=hyperspectral_pool)
            except:
                raise ValueError("Cannot find hyperspectral data path for box bounds {} for plot_name {}".format(box.bounds,plot_name))
                
            sensor_path = convert_h5(hyperspectral_h5_path, rgb_path, savedir=hyperspectral_savedir)
        
        crop = crop_image(sensor_path=sensor_path, box=box, expand=expand)
        
        crops.append(crop)
        sites.append(site)
        labels.append(row["taxonID"])
        elevations.append(elevation)
        heights.append(height)
        box_index.append("{}_{}".format(plot_name,index))
        
    return crops, labels, sites, heights, elevations, box_index
Exemplo n.º 2
0
def create_crops(merged_boxes,
                 hyperspectral_pool=None,
                 rgb_pool=None,
                 sensor="hyperspectral",
                 expand=0,
                 hyperspectral_savedir="."):
    """Crop sensor data based on a dataframe of geopandas bounding boxes
    Args:
        merged_boxes: geopandas dataframe with bounding box geometry, plotID, and species label
        hyperspectral_pool: glob string for looking up matching sensor tiles
        expand: units in meters to add to crops to give context around deepforest box
        hyperspectral_savedir: location to save convert .tif from .h5 files
    Returns:
        crops: list of cropped sensor data
        labels: species id labels
        box_index: unique index and plot_data length.
    """
    crops = []
    labels = []
    box_index = []
    for index, row in merged_boxes.iterrows():
        #Crop and append
        box = row["geometry"]
        plot_name = row["plotID"]

        #get sensor data
        if sensor == "rgb":
            sensor_path = find_sensor_path(bounds=box.bounds,
                                           lookup_pool=rgb_pool,
                                           sensor="rgb")
        elif sensor == "hyperspectral":
            rgb_path = find_sensor_path(bounds=box.bounds,
                                        lookup_pool=rgb_pool,
                                        sensor="rgb")
            hyperspectral_h5_path = find_sensor_path(
                bounds=box.bounds,
                lookup_pool=hyperspectral_pool,
                sensor="hyperspectral")
            sensor_path = convert_h5(hyperspectral_h5_path,
                                     rgb_path,
                                     savedir=hyperspectral_savedir)

        crop = crop_image(sensor_path=sensor_path, box=box, expand=expand)

        crops.append(crop)
        labels.append(row["taxonID"])
        box_index.append("{}_{}".format(plot_name, index))

    return crops, labels, box_index
def process_plot(plot_data, rgb_pool, deepforest_model):
    """For a given NEON plot, find the correct sensor data, predict trees and associate bounding boxes with field data
    Args:
        plot_data: geopandas dataframe in a utm projection
        deepforest_model: deepforest model used for prediction
    Returns:
        merged_boxes: geodataframe of bounding box predictions with species labels
    """
    #DeepForest prediction
    rgb_sensor_path = find_sensor_path(bounds=plot_data.total_bounds, lookup_pool=rgb_pool, sensor="rgb")
    boxes = predict_trees(deepforest_model=deepforest_model, rgb_path=rgb_sensor_path, bounds=plot_data.total_bounds)

    if boxes.empty:
        raise ValueError("No trees predicted in plot: {}, skipping.".format(plot_data.plotID.unique()[0]))
        
    #Merge results with field data, buffer on edge 
    merged_boxes = gpd.sjoin(boxes, plot_data)
    
    #If no remaining boxes just take a box around center
    if merged_boxes.empty:
        merged_boxes= create_boxes(plot_data)
        
    #If there are multiple boxes, take the center box
    grouped = merged_boxes.groupby("indvdID")
    
    cleaned_boxes = []
    for value, group in grouped:
        choosen_box = choose_box(group, plot_data)
        cleaned_boxes.append(choosen_box)
    
    merged_boxes = gpd.GeoDataFrame(pd.concat(cleaned_boxes),crs=merged_boxes.crs)
    merged_boxes = merged_boxes.drop(columns=["xmin","xmax","ymin","ymax"])
    
    return merged_boxes
def postprocess_CHM(df, lookup_pool):
    """Field measured height must be within min_diff meters of canopy model"""
    #Extract zonal stats
    try:
        CHM_path = find_sensor_path(lookup_pool=lookup_pool,
                                    bounds=df.total_bounds)
    except Exception as e:
        raise ValueError(
            "Cannot find CHM path for {} from plot {} in lookup_pool: {}".
            format(df.total_bounds, df.plotID.unique(), e))
    draped_boxes = rasterstats.zonal_stats(
        df.geometry.__geo_interface__,
        CHM_path,
        add_stats={'q99': non_zero_99_quantile})
    df["CHM_height"] = [x["q99"] for x in draped_boxes]

    #if height is null, assign it
    df.height.fillna(df["CHM_height"], inplace=True)

    return df
Exemplo n.º 5
0
def process_plot(plot_data, rgb_pool, deepforest_model):
    """For a given NEON plot, find the correct sensor data, predict trees and associate bounding boxes with field data
    Args:
        plot_data: geopandas dataframe in a utm projection
        deepforest_model: deepforest model used for prediction
    Returns:
        merged_boxes: geodataframe of bounding box predictions with species labels
    """
    #DeepForest prediction
    rgb_sensor_path = find_sensor_path(bounds=plot_data.total_bounds,
                                       lookup_pool=rgb_pool,
                                       sensor="rgb")
    boxes = predict_trees(deepforest_model=deepforest_model,
                          rgb_path=rgb_sensor_path,
                          bounds=plot_data.total_bounds)

    #Merge results with field data, buffer on edge
    merged_boxes = gpd.sjoin(boxes, plot_data)
    merged_boxes = merged_boxes.drop(columns=["xmin", "xmax", "ymin", "ymax"])

    return merged_boxes
Exemplo n.º 6
0
def extract_features(df, x, model_class, hyperspectral_pool, site_label_dict, domain_label_dict, HSI_size=20, k_neighbors=5):
    """Generate features
    Args:
    df: a geopandas dataframe
    x: individual id to use a target
    model_class: A deeptreeattention model class to extract layer features
    hyperspectral_pool: glob dir to search for sensor files
    HSI_size: size of HSI crop
    site_label_dict: dictionary of numeric site labels
    domain_label_dict: dictionary of numeric domain labels
    k_neighbors: number of neighbors to extract
    Returns:
    feature_array: a feature matrix of encoded bottleneck layer
    """
    #Due to resampling, there will be multiple rows of the same point, all are identical.
    target  =  df[df.individual == x].head(1)
    target = target.reset_index(drop=True)
    sensor_path = find_sensor_path(bounds=target.total_bounds, lookup_pool=hyperspectral_pool) 
    
    #Encode metadata
    site = target.siteID.values[0]
    numeric_site = site_label_dict[site]
    one_hot_sites = tf.one_hot(numeric_site, model_class.sites)
    
    domain = target.domainID.values[0]
    numeric_domain = domain_label_dict[domain]   
    one_hot_domains = tf.one_hot(numeric_domain, model_class.domains)
    
    #ToDO bring h5 into here.
    #elevation = elevation_from_tile(sensor_path)/1000
    elevation = 100/1000
    metadata = [elevation, one_hot_sites, one_hot_domains]
    
    neighbor_pool = df[~(df.individual == x)].reset_index(drop=True)
    raster = rasterio.open(sensor_path)
    feature_array, distances = predict_neighbors(target, metadata=metadata, HSI_size=HSI_size, raster=raster, neighbor_pool=neighbor_pool, model=model_class.ensemble_model, k_neighbors=k_neighbors)
    
    return feature_array, distances
def process_plot(plot_data, rgb_pool, deepforest_model, debug=False):
    """For a given NEON plot, find the correct sensor data, predict trees and associate bounding boxes with field data
    Args:
        plot_data: geopandas dataframe in a utm projection
        deepforest_model: deepforest model used for prediction
    Returns:
        merged_boxes: geodataframe of bounding box predictions with species labels
    """
    #DeepForest prediction
    try:
        rgb_sensor_path = find_sensor_path(bounds=plot_data.total_bounds,
                                           lookup_pool=rgb_pool)
    except Exception as e:
        raise ValueError("cannot find RGB sensor for {}".format(
            plot_data.plotID.unique()))

    boxes = predict_trees(deepforest_model=deepforest_model,
                          rgb_path=rgb_sensor_path,
                          bounds=plot_data.total_bounds)

    if boxes.empty:
        raise ValueError("No trees predicted in plot: {}, skipping.".format(
            plot_data.plotID.unique()[0]))

    if debug:
        interim_dir = os.path.abspath(ROOT)
        boxes.to_file("{}/data/interim/{}_boxes_raw.shp".format(
            interim_dir,
            plot_data.plotID.unique()[0]))

    #Merge results with field data, buffer on edge
    merged_boxes = gpd.sjoin(boxes, plot_data)

    ##If no remaining boxes just take a box around center
    missing_ids = plot_data[~plot_data.individual.isin(merged_boxes.individual
                                                       )]

    if not missing_ids.empty:
        created_boxes = create_boxes(missing_ids)
        merged_boxes = merged_boxes.append(created_boxes)

    #If there are multiple boxes per point, take the center box
    grouped = merged_boxes.groupby("individual")

    cleaned_boxes = []
    for value, group in grouped:
        choosen_box = choose_box(group, plot_data)
        cleaned_boxes.append(choosen_box)

    merged_boxes = gpd.GeoDataFrame(pd.concat(cleaned_boxes),
                                    crs=merged_boxes.crs)
    merged_boxes = merged_boxes.drop(columns=["xmin", "xmax", "ymin", "ymax"])

    ##if there are multiple points per box, take the tallest point.
    cleaned_points = []
    for value, group in merged_boxes.groupby("box_id"):
        if group.shape[0] > 1:
            print("removing {} points for within a deepforest box".format(
                group.shape[0] - 1))
            cleaned_points.append(group[group.height == group.height.max()])
        else:
            cleaned_points.append(group)

    merged_boxes = gpd.GeoDataFrame(pd.concat(cleaned_points),
                                    crs=merged_boxes.crs)

    #assert plot_data.shape[0] == merged_boxes.shape[0]
    return merged_boxes
Exemplo n.º 8
0
def extract_features(df,
                     x,
                     model_class,
                     hyperspectral_pool,
                     site_label_dict,
                     domain_label_dict,
                     HSI_size=20,
                     k_neighbors=5):
    """Generate features
    Args:
    df: a geopandas dataframe
    x: individual id to use a target
    model_class: A deeptreeattention model class to extract layer features
    hyperspectral_pool: glob dir to search for sensor files
    HSI_size: size of HSI crop
    site_label_dict: dictionary of numeric site labels
    domain_label_dict: dictionary of numeric domain labels
    k_neighbors: number of neighbors to extract
    Returns:
    feature_array: a feature matrix of encoded bottleneck layer
    """
    #Due to resampling, there will be multiple rows of the same point, all are identical.
    #Always pick itself as neighbor 1
    target = df[df.individual == x].head(1)
    target = target.reset_index(drop=True)
    sensor_path = find_sensor_path(bounds=target.total_bounds,
                                   lookup_pool=hyperspectral_pool)

    #Encode metadata
    site = target.siteID.values[0]
    numeric_site = site_label_dict[site]
    one_hot_sites = tf.one_hot(numeric_site, model_class.sites)

    domain = target.domainID.values[0]
    numeric_domain = domain_label_dict[domain]
    one_hot_domains = tf.one_hot(numeric_domain, model_class.domains)

    #for tests, dummy elevation variable
    try:
        elevation = elevation_from_tile(sensor_path) / 1000
    except:
        print("Dummy variable for elevation debug")
        elevation = 100 / 1000

    metadata = [elevation, one_hot_sites, one_hot_domains]

    neighbor_pool = df

    #If there are no neighbors, return 0's
    if neighbor_pool.empty:
        feature_array = np.zeros(
            (k_neighbors, model_class.ensemble_model.output.shape[1]))
        distances = np.repeat(9999, k_neighbors)
    else:
        raster = rasterio.open(sensor_path)
        feature_array, distances = predict_neighbors(
            target,
            metadata=metadata,
            HSI_size=HSI_size,
            raster=raster,
            neighbor_pool=neighbor_pool,
            model=model_class.ensemble_model,
            k_neighbors=k_neighbors)

    #enforce dtype
    return feature_array, distances