Exemplo n.º 1
0
def clip_shapefile_to_geometry(shapefile, clip_shapefile, out_filename,
                               outside_filename, out_directory):
    """ Mask a shapefile with another shapefile."""

    intersection = []
    outside = []
    with fopen(shapefile, "r") as src:
        meta = deepcopy(src.meta)
        with fopen(clip_shapefile) as clip_with:
            for feat in src:
                poly = shape(feat['geometry'])
                for clip_feat in clip_with:
                    clip_poly = shape(clip_feat['geometry'])
                    if clip_poly.contains(poly):
                        intersection.append(feat)
                    else:
                        outside.append(feat)

    with fopen(os.path.join(out_directory, out_filename), 'w', **meta) as dst:
        print("Saving {}".format(out_filename))
        for feat in intersection:
            dst.write(feat)

    with fopen(os.path.join(out_directory, outside_filename), 'w',
               **meta) as dst:
        print("Saving {}".format(outside_filename))
        for feat in outside:
            dst.write(feat)
Exemplo n.º 2
0
def buffer_shapefile(shp):
    buf = -0.00050
    with fopen(shp, 'r') as polys:
        out = []
        meta = polys.meta
        with fopen(shp, 'w', **meta) as dst:
            for feat in polys:
                feat['geometry'] = mapping(shape(feat['geometry']).buffer(buf))
                dst.write(feat)
Exemplo n.º 3
0
 def _populate_array_from_points(self):
     # TODO: replace with geopandas.shp_to_dataframe
     with fopen(self.shapefile_path, 'r') as src:
         for feat in src:
             coords = feat['geometry']['coordinates']
             val = feat['properties']['POINT_TYPE']
             self._add_entry(coords, val=val)
Exemplo n.º 4
0
def shapefile_area(shapefile):
    summ = 0
    with fopen(shapefile, "r") as src:
        for feat in src:
            poly = shape(feat['geometry'])
            summ += poly.area
    return summ
Exemplo n.º 5
0
def raster_point_extract(raster, points, dtime):
    """ Get point values from a raster.
    :param raster: local_raster: Thredds.Gridmet-derived array in Landsat image geometry.
    :param points: Shapefile of points.
    :param dtime: Datetime.datetime object.
    :return: Dict of coords, row/cols, and values of raster at that point.
    """
    point_data = {}
    with fopen(points, 'r') as src:
        for feature in src:
            name = feature['properties']['siteid']
            point_data[name] = {'coords': feature['geometry']['coordinates']}

        with rasopen(raster, 'r') as rsrc:
            rass_arr = rsrc.read()
            rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2])
            affine = rsrc.transform

        for key, val in point_data.items():
            x, y = val['coords']
            col, row = ~affine * (x, y)
            val = rass_arr[int(row), int(col)]
            point_data[key][dtime] = [val, None]

        return point_data
def raster_point_extract(lst_raster, eef_raster, points):
    point_data = {}
    with fopen(points, 'r') as src:
        for feature in src:
            try:
                name = feature['properties']['Name']
            except KeyError:
                name = feature['properties']['FID']
            point_data[name] = {'coords': feature['geometry']['coordinates']}

        with rasopen(lst_raster, 'r') as src:
            lst_arr = src.read()
            lst_arr = lst_arr.reshape(lst_arr.shape[1], lst_arr.shape[2])
            lst_affine = src.transform

        with rasopen(eef_raster, 'r') as src:
            eef_lst = src.read()
            eef_lst = eef_lst.reshape(eef_lst.shape[1], eef_lst.shape[2])
            eef_affine = src.transform

        for key, val in point_data.items():
            x, y, z = val['coords']
            col, row = ~lst_affine * (x, y)
            val = lst_arr[int(row), int(col)]
            point_data[key]['lst_found'] = val
            point_data[key]['lst_row_col'] = int(row), int(col)

            col, row = ~eef_affine * (x, y)
            val = eef_lst[int(row), int(col)]
            point_data[key]['eef_lst'] = val
            point_data[key]['eef_row_col'] = int(row), int(col)

        return point_data
Exemplo n.º 7
0
def get_shapefile_lat_lon(shapefile):
    ''' Center of shapefile'''
    with fopen(shapefile, "r") as src:
        minx, miny, maxx, maxy = src.bounds
        latc = (maxy + miny) / 2
        lonc = (maxx + minx) / 2

    return latc, lonc
Exemplo n.º 8
0
 def tile_bbox(self):
     with fopen(WRS_2, 'r') as wrs:
         for feature in wrs:
             fp = feature['properties']
             if fp['PATH'] == self.geography.path and fp[
                     'ROW'] == self.geography.row:
                 bbox = feature['geometry']
                 return bbox
Exemplo n.º 9
0
def get_wrs2_features(path, row):
    with fopen(WRS2) as src:
        for feat in src:
            poly = shape(feat['geometry'])
            propr = feat['properties']
            if propr['PATH'] == path and propr['ROW'] == row:
                return [feat]
    return None
Exemplo n.º 10
0
 def test_write_shapefile(self):
     flux = FluxSite(self.data_perm)
     data = flux.data
     flux.write_locations_to_shp(data, self.shape_out)
     with fopen(self.shape_out, 'r') as shp:
         count = 0
         for feature in shp:
             count += 1
         self.assertEqual(count, 252)
Exemplo n.º 11
0
 def test_sample_points(self):
     montana = MontanaTest()
     p = PixelTrainingArray(root=self.directory, geography=montana, instances=10,
                            overwrite_array=True)
     p.extract_sample(save_points=True)
     with fopen(p.shapefile_path, 'r') as src:
         points = [x for x in src]
     self.assertGreater(len(points), 40)
     self.assertGreater(p.extracted_points.shape[0], 40)
Exemplo n.º 12
0
 def geojson_to_shp(self, out_folder, geojson_dict, srs):
     try:
         code = from_epsg(srs)
         b_json = json.dumps(geojson_dict['geojson']).encode('utf-8')
         geojson = fiona.ogrext.buffer_to_virtual_file(b_json)
         file_name = self.char_replace(geojson_dict["name"]) + ".shp"
         out_path = os.path.join(out_folder, file_name)
         with fopen(geojson) as source:
             with fopen(out_path,
                        "w",
                        driver="ESRI Shapefile",
                        crs=code,
                        schema=source.schema) as sink:
                 for rec in source:
                     sink.write(rec)
         print("Shapefile written to %s " % out_path)
     except Exception as e:
         print(e)
     return
Exemplo n.º 13
0
def count_project_fields(source, tables):
    ct = 0
    for t in tables:
        shp = os.path.join(source, '{}.shp'.format(t))
        with fopen(shp, 'r') as src:
            print(shp)
            sub_ct = len([x for x in src])
            ct += sub_ct
            print(sub_ct)

    print(ct)
Exemplo n.º 14
0
    def test_find_closest_station(self):
        """ Test find closest agrimet station to any point.
        :return: 
        """
        coords = []
        with fopen(self.point_file, 'r') as src:
            for feature in src:
                coords.append(feature['geometry']['coordinates'])

        for coord in coords:
            agrimet = Agrimet(lon=coord[0], lat=coord[1])
            self.assertTrue(agrimet.station in self.site_ids)
Exemplo n.º 15
0
def filter_shapefile_overlapping(shapefile, out_directory=None):
    """ Shapefiles may span multiple path/rows/years.
    For training, we want all of the data available.
    This function filters the polygons contained in
    the shapefile into separate files for each path/row/year
    contained in the shapefile. """
    path_row_map = defaultdict(list)
    wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r')
    tree, path_rows, features = _construct_kdtree(wrs2)
    wrs2.close()

    cent_arr = array([0, 0])
    with fopen(shapefile, "r") as src:
        meta = deepcopy(src.meta)
        for feat in src:
            poly = shape(feat['geometry'])
            centroid = poly.centroid.coords[0]
            cent_arr[0] = centroid[0]
            cent_arr[1] = centroid[1]
            centroid = cent_arr.reshape(1, -1)
            dist, ind = tree.query(centroid, k=10)
            tiles = features[ind[0]]
            prs = get_pr_subset(poly, tiles)  # gets the matching path/rows

            for p in prs:
                path_row_map[p].append(feat)

    if out_directory is None:
        return path_row_map, meta

    outfile = os.path.basename(shapefile)
    outfile = os.path.splitext(outfile)[0]

    for path_row in path_row_map:
        out = outfile + "_" + path_row + ".shp"
        with fopen(os.path.join(out_directory, out), 'w', **meta) as dst:
            print("Saving {}".format(out))
            for feat in path_row_map[path_row]:
                dst.write(feat)
Exemplo n.º 16
0
 def test_find_sites(self):
     with fopen(self.test_locations, 'r') as src:
         for feat in src:
             lat, lon = feat['geometry']['coordinates'][1], feat[
                 'geometry']['coordinates'][0]
             expected_site = feat['properties']['siteid']
             #  don't test on neighboring sites
             if expected_site not in self.excepted_sites:
                 agrimet = Agrimet(lat=lat,
                                   lon=lon,
                                   start_date=self.start,
                                   end_date=self.end)
                 found_site = agrimet.station
                 self.assertEqual(expected_site, found_site)
Exemplo n.º 17
0
def multi_raster_point_extract(local_raster, geographic_raster, points,
                               image_profile):
    """ Get raster data from two rasters at shapefile points.
    :param local_raster: Thredds.Gridmet-derived array in Landsat image geometry.
    :param geographic_raster: Native netcdf gridmet raster.
    :param points: Shapefile points, in geographic crs. 
    :param image_profile: Landsat.LandsatX.profile, contains CRS and other raster info.
    :return: Dict of coords, row/cols, and values at those points.
    """
    point_data = {}
    with fopen(points, 'r') as src:
        for feature in src:
            try:
                name = feature['properties']['Name']
            except KeyError:
                name = feature['properties']['FID']
            geo_coords = feature['geometry']['coordinates']
            image_crs = image_profile['crs']
            in_proj = Proj(image_crs)
            utm = in_proj(geo_coords[0], geo_coords[1])
            point_data[name] = {'coords': {'geo': geo_coords, 'utm': utm}}

        with rasopen(local_raster, 'r') as srrc:
            local_arr = srrc.read()
            local_arr = local_arr.reshape(local_arr.shape[1],
                                          local_arr.shape[2])
            local_affine = srrc.transform

        with rasopen(geographic_raster, 'r') as ssrc:
            geo_raster = ssrc.read()
            geo_raster = geo_raster.reshape(geo_raster.shape[1],
                                            geo_raster.shape[2])
            geo_affine = ssrc.transform

        for key, val in point_data.items():
            i, j = val['coords']['utm']
            col, row = ~local_affine * (i, j)
            local_val = local_arr[int(row), int(col)]
            point_data[key]['local_val'] = local_val
            point_data[key]['local_row_col'] = int(row), int(col)

            x, y, z = val['coords']['geo']
            col, row = ~geo_affine * (x, y)
            geo_val = geo_raster[int(row), int(col)]
            point_data[key]['geo_val'] = geo_val
            point_data[key]['geo_row_col'] = int(row), int(col)

        return point_data
Exemplo n.º 18
0
    def save_sample_points(self):

        points_schema = {
            'properties': dict([('FID', 'int:10'), ('POINT_TYPE', 'int:10')]),
            'geometry': 'Point'
        }
        meta = self.tile_geometry.copy()
        meta['schema'] = points_schema

        with fopen(self.shapefile_path, 'w', **meta) as output:
            for index, row in self.extracted_points.iterrows():
                props = dict([('FID', row['FID']),
                              ('POINT_TYPE', row['POINT_TYPE'])])
                pt = Point(row['X'], row['Y'])
                output.write({'properties': props, 'geometry': mapping(pt)})
        return None
Exemplo n.º 19
0
    def test_write_agrimet_shapefile(self):

        agrimet = Agrimet(write_stations=True)
        station_data = agrimet.load_stations()
        epsg = '4326'
        outfile = self.out_shape
        agrimet.write_agrimet_sation_shp(station_data, epsg, outfile)
        with fopen(outfile, 'r') as shp:
            count = 0
            for _ in shp:
                count += 1
        self.assertEqual(186, count)
        file_list = os.listdir(self.points_dir)
        for f in file_list:
            if 'write_test' in f:
                os.remove(os.path.join(self.points_dir, f))
Exemplo n.º 20
0
    def __init__(self, vector=None, profile=None, latlon=True):
        BBox.__init__(self)

        if vector:
            with fopen(vector, 'r') as src:
                self.crs = src.crs
                self.epsg = int(src.crs['init'].split(":")[1])
                self.profile = src.profile
                self.meta = src.meta
                self.west, self.south, self.east, self.north = src.bounds

        if latlon and self.crs != {'init': 'epsg:4326'}:
            in_proj = Proj(init=self.profile['crs']['init'])
            self.west, self.north = in_proj(self.west, self.north, inverse=True)
            self.east, self.south = in_proj(self.east, self.south, inverse=True)

        else:
            pass
Exemplo n.º 21
0
    def _get_polygons(self, vector):
        with fopen(vector, 'r') as src:
            crs = src.crs
            if not crs:
                raise NoCoordinateReferenceError(
                    'Provided shapefile has no reference data.')
            if crs['init'] != 'epsg:4326':
                raise UnexpectedCoordinateReferenceSystemError(
                    'Provided shapefile should be in unprojected (geographic)'
                    'coordinate system, i.e., WGS84, EPSG 4326, {} is not'.
                    format(vector))
            clipped = src.filter(mask=self.tile_bbox)
            polys = []
            bad_geo_count = 0
            for feat in clipped:
                try:
                    geo = shape(feat['geometry'])
                    polys.append(geo)
                except AttributeError:
                    bad_geo_count += 1

        return polys
Exemplo n.º 22
0
def build_summary_table_monthly(source, shapes, tables, out_loc, project='oe'):
    master = DataFrame()
    lat, lon = None, None
    for table in source:
        print('Processing {}'.format(table))
        try:
            csv = read_csv(os.path.join(tables, '{}.csv'.format(table)))

            shp = os.path.join(shapes, '{}.shp'.format(table))
            with fopen(shp) as src:
                # .shp files should be in epsg: 102300
                for feat in src:
                    if src.crs != {'init': 'epsg:4326'}:
                        coords = feat['geometry']['coordinates'][0][0]
                        if len(coords) > 2:
                            coords = coords[0]

                        lat, lon = state_plane_MT_to_WGS(coords[1], coords[0])
                        break
                    else:
                        lat, lon = feat['geometry']['coordinates'][1], feat['geometry']['coordinates'][0]

            d = DataCollector(project=project, csv=csv, table=table, lat=lat, lon=lon, monthly=True)
            d.get_table_data_monthly()
            d.df.dropna(axis=0, how='all', inplace=True)
            master = concat([master, d.df])

        except FileNotFoundError:
            print('{} not found'.format(table))

    if project == 'oe':
        master['DIVERSIONS'] = DIVERSIONS
        master = master[master['DIVERSIONS'] > 0.]
        master['EFF'] = master['Crop_Cons_af'] / master['DIVERSIONS']

    master.to_csv(os.path.join(out_loc, 'HUC_8_Monthly.csv'), date_format='%Y-%m')
Exemplo n.º 23
0
def point_target_extract(points, nlcd_path,
                         target_shapefile=None, count_limit=None):
    point_data = {}
    with fopen(points, 'r') as src:
        for feature in src:
            name = feature['id']
            proj_coords = feature['geometry']['coordinates']
            point_data[name] = {'point': feature['geometry'],
                                'coords': proj_coords}
            # point_crs = src.profile['crs']['init']
    pt_ct = 0
    for pt_id, val in point_data.items():
        pt_ct += 1
        if pt_ct < count_limit:
            pt = shape(val['point'])
            with fopen(target_shapefile, 'r') as target_src:
                has_attr = False
                for t_feature in target_src:
                    polygon = t_feature['geometry']
                    if pt.within(shape(polygon)):
                        print('pt id {}, props: {}'
                              .format(pt_id, t_feature['properties']))
                        props = t_feature['properties']
                        point_data[pt_id]['properties'] = {'IType': props['IType'],
                                                           'LType': props['LType']}

                        has_attr = True
                        break

                if not has_attr:
                    if nlcd_path:
                        with rasopen(nlcd_path, 'r') as rsrc:
                            rass_arr = rsrc.read()
                            rass_arr = rass_arr.reshape(rass_arr.shape[1], rass_arr.shape[2])
                            affine = rsrc.affine

                            x, y = val['coords']
                            col, row = ~affine * (x, y)
                            raster_val = rass_arr[int(row), int(col)]
                            ltype_dct = {'IType': None,
                                         'LType': str(raster_val)}
                            point_data[pt_id]['properties'] = ltype_dct
                            print('id {} has no FLU, '
                                  'nlcd {}'.format(pt_id,
                                                   nlcd_value(ltype_dct['LType'])))
                    else:
                        ltype_dct = {'IType': None,
                                     'LType': None}
                        point_data[pt_id]['properties'] = ltype_dct

    idd = []
    ltype = []
    itype = []
    x = []
    y = []
    ct = 0
    for pt_id, val in point_data.items():
        ct += 1
        if ct < count_limit:
            idd.append(pt_id)
            ltype.append(val['properties']['LType'])
            itype.append(val['properties']['IType'])
            x.append(val['coords'][0])
            y.append(val['coords'][1])
        else:
            break
    dct = dict(zip(['ID', 'LTYPE', 'ITYPE', 'X', 'Y'],
                   [idd, ltype, itype, x, y]))
    df = DataFrame(data=dct)

    return df
Exemplo n.º 24
0
def main(reference_data, raster_classif_file, yaml_file, pre_min_het,
         pre_max_het, area_threshold, db_str='main'):
    """FOI assessment is based on spatial analysis of a “thematic” raster
    produced in advance.

    The thematic raster can be the result of a any image/raster processing
    method yielding a class label for each pixel - crop classification, behavior
    analysis of land phenomenon, gridded data on soil, slope, humidity, etc.
    The starting point was the idea that inside of an homgeneous parcel we
    should have only one type of pixel.
    For example if the thematic raster is the result of a crop classification,
    inside a parcel we should have only one type of pixels that represent the
    respective crop
    If the thematic raster is the result of a behaviour analysis, all the pixels
    inside a parcel should behave in the same way during a period of time.
    The FOI assessment is based on the analysis made on the presence and
    distribution of different types of pixels inside the FOI.

    Args:
        reference_data (str): Spatial data to be tested -
            parcels that will be checked for heterogeneity and cardinality
            The parcels poligons in .shp file format or
            database table name without .shp ending.
        raster_classif_file (str): Thematic raster - classification raster, or
            raster from other source that will be used for testing
            heterogeneity and cardinality
        yaml_file: YAML file that holds the classes of thematic raster file.
            can be also a simple list of values in the notebook corespondence
            between pixel values and names for the classes
        pre_min_het: Minimum thresholds for heterogeneity checks.
        pre_max_het: Maximum thresholds for heterogeneity checks.
        area_threshold: Minimum area for clusters selection.

    Returns:
        bool: True if successful, False otherwise.

    """

    # database connection string
    if type(db_str) is str:
        db_connection = f"PG:{db.conn_str(db_str)}"
    elif type(db_str) is list:
        db_connection = "PG:host={} port={} dbname={} user={} password={}".format(
            *db_str)

    def db_conn():
        if type(db_str) is str:
            return db.conn(db_str)
        elif type(db_str) is list:
            return psycopg2.connect("host={} port={} dbname={} user={} password={}".format(*db_str))

    # ogr2ogr options
    geom_field_name = "GEOMETRY_NAME=wkb_geometry"
    overwrite_option = "-OVERWRITE"
    geom_type = "MULTIPOLYGON"
    output_format = "PostgreSQL"

    # Path for storing the processed data - final spatial data that will be
    #    exported after database processing
    processed_data = normpath(join('foi', 'processed_data'))
    os.makedirs(processed_data, exist_ok=True)

    # Path for storing the final output data
    output_data = normpath(join('foi', 'output_data'))
    os.makedirs(output_data, exist_ok=True)
    reference_data_name = os.path.splitext(os.path.basename(reference_data))[0]
    try:
        with open(f"{config.get_value(['paths','temp'])}tb_prefix", 'r') as f:
            reference_data_table = f.read()
    except Exception:
        reference_data_table = reference_data_name

    # Vector file resulted from the raster stats pixel count
    # pixelcount_output = f'{output_data}pixel_count_{reference_data_table}.shp'

    pixelcount_output = f'{processed_data}/{reference_data_name}_pixelcount.shp'
    # Vector file resulted from raster to vector process (polygonize)
    polygonize_output = f'{processed_data}/{reference_data_name}_polygonize.shp'

    # Name of the table to be created in the database - import of the pixel
    #   count into the database
    pixelcount_table = f"{reference_data_name}_pixelcount"
    # Name of the table to be created in the database - import of the
    #   polygonize result into the database
    polygonize_table = f"{reference_data_name}_polygonize"

    # Name and path of the files resulted from the analysis
    heterogeneity_output = f'{output_data}/{reference_data_name}_foih_v1.shp'
    cardinality_output = f'{output_data}/{reference_data_name}_foic_v1.shp'
    cardinality_output_clusters = f'{output_data}/{reference_data_name}_foic_clusters_v1.shp'

    sql = "SELECT * FROM " + reference_data_table + ";"
    try:
        ps_connection = db_conn()

        ps_connection.autocommit = True

        cursor = ps_connection.cursor()

        gpd_data = gpd.read_postgis(
            sql=sql, con=ps_connection, geom_col='wkb_geometry')

    except (Exception, psycopg2.DatabaseError) as error:
        print("Error while connecting to PostgreSQL", error)

    finally:
        # closing database connection.
        if(ps_connection):
            cursor.close()
            ps_connection.close()
            # print("PostgreSQL connection is closed")

    temp_reference_data = f'foi/{reference_data_name}_temp.shp'

    gpd_data.to_file(temp_reference_data)

    shape = fiona.open(temp_reference_data)
    spatialRef = shape.crs["init"]
#     print("Vector EPSG: ", spatialRef)

    # Import reference data shapefile to database.
    # Overwrite option is needed, otherwise the import will append new
    # values to the ones existing in the table
    subprocess.call(["ogr2ogr", overwrite_option, "-nlt", geom_type, "-lco",
                     geom_field_name, "-a_srs", spatialRef, "-nln",
                     reference_data_table, "-f", "PostgreSQL", db_connection,
                     reference_data])

    # Reading the values from yaml file
    conf = load(open(yaml_file, 'r').read(), Loader=FullLoader)
    category_map = conf['category_map']
    rst_fields = list(category_map.values())

    # Counting the number of pixels for each parcel. The fields with names of
    #   the classes from yaml file will be added,
    # and updated with the number of pixels from each category
    with fopen(temp_reference_data, 'r') as input:
        spatialRef = input.crs["init"]
        schema = input.schema

        for i in rst_fields:
            schema['properties'][i] = 'int:5'

        rst_attribs = dict.fromkeys(rst_fields, 0)

        with fopen(pixelcount_output, 'w', 'ESRI Shapefile', schema) as output:
            for i, vct_feat in enumerate(input):
                vct_val_dict = dict(vct_feat['properties'])
                rst_val_dict = zonal_stats(
                    vct_feat, raster_classif_file,
                    categorical=True, copy_properties=True,
                    category_map=category_map, nodata=-999)[0]
                vct_val_dict.update(rst_attribs)

                for lu in rst_val_dict:
                    vct_val_dict[lu] = rst_val_dict.get(lu)

                for atrib in vct_val_dict:
                    vct_feat['properties'][atrib] = vct_val_dict.get(atrib)

                output.write(vct_feat)
    print("Finished pixel calculation!")

    # Import resulted shapefile, with the number of pixels for each class to
    #   database. Overwrite option is needed, otherwise the
    # import will append new values to the ones existing in the table
    subprocess.call(["ogr2ogr", overwrite_option, "-nlt", geom_type, "-a_srs",
                     spatialRef, "-nln",  pixelcount_table, "-f", "PostgreSQL",
                     db_connection,  pixelcount_output])

    # Number of classes from the thematic raster
    num_classes = len(category_map)
    # Minimum and maximum thresholds for heterogeneity checks. In this example,
    #   any parcel
    # with percentage of pixels for one class between 30 and 70 from the total,
    #   will be considered heterogenous.
    # min_heterogeneity_threshold = 30
    # max_heterogeneity_threshold = 70
    min_heterogeneity_threshold = pre_min_het
    max_heterogeneity_threshold = pre_max_het

    # Calling the PostgreSQL function wich checks the heterogeneity.
    # The function calculates the percentages and sets an attribute
    # "foi_h" to 1 when the percentage of pixels is between thresholds
    try:
        ps_connection = db_conn()

        ps_connection.autocommit = True

        cursor = ps_connection.cursor()

        # call stored procedure
        cursor.callproc('public.check_heterogeneity', (
            pixelcount_table, num_classes, min_heterogeneity_threshold,
            max_heterogeneity_threshold))

        print("Running function to check heterogeneity")

    except (Exception, psycopg2.DatabaseError) as error:
        print("Error while connecting to PostgreSQL", error)

    finally:
        # closing database connection.
        if(ps_connection):
            cursor.close()
            ps_connection.close()
            print("PostgreSQL connection is closed")
    print("Heterogeneity assessment function finished")

    # Export processed data - heterogeneity, to shapefile
    subprocess.call(["ogr2ogr", "-f", "ESRI Shapefile",
                     heterogeneity_output, db_connection, pixelcount_table])
    print("Heterogeneity analysis output downloaded")

    # Polygonize the thematic raster. The process takes into account only
    #   one band (in this case - first band). Can be used with 8 connected
    #   pixels or with 4 connected pixels.
    connectedness = '-8'
    sourceRaster = gdal.Open(raster_classif_file)
    band = sourceRaster.GetRasterBand(1)
    srs = osr.SpatialReference(wkt=sourceRaster.GetProjection())
    dst_layername = polygonize_output
    drv = ogr.GetDriverByName("ESRI Shapefile")
    dst_ds = drv.CreateDataSource(dst_layername)
    dst_layer = dst_ds.CreateLayer(dst_layername, srs=srs)
    fd = ogr.FieldDefn("DN", ogr.OFTInteger)
    dst_layer.CreateField(fd)
    dst_field = dst_layer.GetLayerDefn().GetFieldIndex("DN")
    gdal.Polygonize(band, None, dst_layer, dst_field,
                    [connectedness], callback=None)
    dst_ds.Destroy()

    # Import polygonize result to database
    subprocess.call(["ogr2ogr", overwrite_option, "-nlt", geom_type, "-lco",
                     geom_field_name, "-nln",  polygonize_table, "-f",
                     output_format, db_connection,  polygonize_output])

    # Names of the tables to be created in the database during the processing
    processed_clusters = polygonize_table + "_clusters"
    processed_cardinality = polygonize_table + "_cardin"
    # Spatial data to be tested - parcels that will be checked for cardinality
    #   (I think we should use the same data as for heterogeneity)
    # reference_table = 'reference_data'
    # Minimum area for clusters selection - only clusters bigger that the
    #   threshold will be counted
    # area_threshold = 2000

    # Calling the PostgreSQL function wich checks the cardinality. The function
    #   fixes the geometry for the spatial data resulted from polygnize, clips
    #   the polygonize result with the parcels that needs to be checked,
    #   calculates the area of the clusters inside each parcel, selects the
    #   clusters that are more than one type, each of them bigger that the
    #   threshold, in each parcel.
    # The function creates two new tables: one with the clusters that matches
    #   the conditions, the other with data to be tested and a new column
    #   "foi_c" wich is 1 if the parcel has more that two types of clusters
    #   with the area bigger than the thershold

    # TO DO: put the unique identifier as function param

    try:
        ps_connection = db_conn()

        ps_connection.autocommit = True

        cursor = ps_connection.cursor()

        # call stored procedure
        # cursor.callproc('public.check_cardinality', (
        #     polygonize_table, reference_data_table, area_threshold))
        cursor.execute(
            "CALL public.check_cardinality_procedure( %s, %s, %s, %s); ",
            (polygonize_table, reference_data_table, area_threshold, 10000))

        print("Running function to check cardinality")

    except (Exception, psycopg2.DatabaseError) as error:
        print("Error while connecting to PostgreSQL", error)

    finally:
        # closing database connection.
        if(ps_connection):
            cursor.close()
            ps_connection.close()
            print("PostgreSQL connection is closed")

    # Export processed data - clusters, to shapefile
    subprocess.call(["ogr2ogr", "-f", "ESRI Shapefile",
                     cardinality_output_clusters, db_connection,
                     processed_clusters])
    print("Cardinality assessment function finished")

    # Export processed data - data to be tested with "foi_c" flag, to shapefile
    subprocess.call(["ogr2ogr", "-f", "ESRI Shapefile",
                     cardinality_output, db_connection, processed_cardinality])
    print("Cardinality analysis output downloaded")

    filelist_temp = [f for f in os.listdir(
        'foi') if f.startswith(Path(temp_reference_data).stem)]
    for f in filelist_temp:
        os.remove(os.path.join('foi', f))
Exemplo n.º 25
0
### Choose csv output file ###
csv_filename = conf['csv_output_path']
file_opt['title'] = 'Please select the csv output file'
file_opt['filetypes'] = [('csv files', '.csv')]
file_opt['initialdir'] = expanduser(csv_filename)
csv_filename = asksaveasfilename(**file_opt)
if csv_filename == '':
    print('No csv file name selected')
    exit()

### Dump the paths to yaml file on disk ###
safe_dump(conf, file(yaml_file,'w'), encoding='utf-8', allow_unicode=True, default_flow_style=False)


# Open shapefile using Fiona
with fopen(vector_filename, 'r') as vct:
      
    # Get field names of input vector layer
    vct_fields = []
    feat = vct[0]
    for fld in feat['properties']:
        vct_fields.append(fld)
    
    # Merge field name lists
    fieldnames = vct_fields + rst_fields

    with open(csv_filename, 'wb') as csvfile:
        # Start writing csv and add attribute names for first row
        writer = DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
Exemplo n.º 26
0
def attribute_shapefile(shp, *results):
    df = None
    s = None

    out = shp.replace('.shp', '_pym.shp')

    agri_schema = {
        'geometry': 'Polygon',
        'properties': {
            'OBJECTID': 'int',
            'Supply': 'str',
            'Acres': 'float',
            'System': 'str',
            'Crop': 'str',
            'PIXELS': 'int'
        }
    }

    first = True

    for res in results[0]:
        year = int(res[-8:-4])
        c = read_csv(res)
        c['MONTH'][c['MONTH'] < 4] = nan
        c['MONTH'][c['MONTH'] > 10] = nan
        c.dropna(axis=0, how='any', inplace=True)

        c = c.groupby('OBJECTID').agg({
            'NDVI': 'mean',
            'ETRF': 'mean',
            'ETR_MM': 'sum',
            'ET_MM': 'sum',
            'PPT_MM': 'sum',
            'PIXELS': 'median'
        }).reset_index()

        renames = {
            'NDVI': 'NDVI_{}'.format(year),
            'ETRF': 'ETRF_{}'.format(year),
            'ETR_MM': 'ETR_{}'.format(year),
            'ET_MM': 'ET_{}'.format(year),
            'PPT_MM': 'PPT_{}'.format(year)
        }

        c.rename(columns=renames, inplace=True)

        if first:
            df = deepcopy(c)
            s = {int(r['OBJECTID']): int(r['PIXELS']) for i, r in c.iterrows()}
            first = False
        else:
            df.drop(columns=['PIXELS', 'OBJECTID'], axis=1, inplace=True)
            df = concat([df, c], join='outer', axis=1, sort=True)

        schema_dict = {
            'NDVI_{}'.format(year): 'float',
            'ETRF_{}'.format(year): 'float',
            'ETR_{}'.format(year): 'float',
            'ET_{}'.format(year): 'float',
            'PPT_{}'.format(year): 'float'
        }

        agri_schema['properties'].update(schema_dict)

    with fopen(shp, 'r') as src:
        src_crs = src.crs
        src_driver = src.driver

        with collection(out,
                        mode='w',
                        driver=src_driver,
                        schema=agri_schema,
                        crs=src_crs) as output:
            for rec in src:
                p = rec['properties']
                props = {
                    'OBJECTID': p['OBJECTID'],
                    'Supply': p['Supply_Sou'],
                    'Acres': p['Acres'],
                    'System': p['System_Typ'],
                    'Crop': p['Crop_Type']
                }

                props.update(
                    df[df['OBJECTID'] == p['OBJECTID']].to_dict('records')[0])
                props.update({'PIXELS': s[p['OBJECTID']]})
                props['OBJECTID'] = int(props['OBJECTID'])
                output.write({
                    'geometry': rec['geometry'],
                    'properties': props,
                    'id': p['OBJECTID']
                })
Exemplo n.º 27
0
def filter_shapefile_non_overlapping(base, base_shapefile, data_directory):
    """
    Shapefiles may deal with data over multiple path/rows.
    This is a method to get the minimum number of
    path/rows required to cover all features. 
    Data directory: where the split shapefiles will be saved.
    base: directory containing base_shapefile."""
    path_row = defaultdict(list)
    id_mapping = {}
    # TODO: un hardcode this directory.
    wrs2 = fopen('../spatial_data/wrs2_descending_usa.shp', 'r')
    tree, path_rows, features = _construct_kdtree(wrs2)
    wrs2.close()

    cent_arr = array([0, 0])
    with fopen(os.path.join(base, base_shapefile), "r") as src:
        meta = deepcopy(src.meta)
        for feat in src:
            idd = feat['id']
            id_mapping[idd] = feat
            poly = shape(feat['geometry'])
            centroid = poly.centroid.coords[0]
            cent_arr[0] = centroid[0]
            cent_arr[1] = centroid[1]
            centroid = cent_arr.reshape(1, -1)
            dist, ind = tree.query(centroid, k=10)
            tiles = features[ind[0]]
            prs = get_pr_subset(poly, tiles)
            for p in prs:
                path_row[p].append(idd)

    non_unique_ids = defaultdict(list)
    unique = defaultdict(list)
    for key in path_row:
        ls = path_row[key]  # all features in a given path/row
        placeholder = ls.copy()
        for key1 in path_row:
            if key != key1:
                ls1 = path_row[key1]
                # find unique keys in ls
                placeholder = set(placeholder) - set(ls1)  #all
                # features present in placeholder that are not
                # present in ls1; i.e. unique keys
        unique[key] = list(placeholder)
        if len(ls) != len(placeholder):
            nu = set(ls) - set(
                placeholder
            )  # all features present in ls that are not present in placeholder (non-unique)
            for idd in list(nu):
                non_unique_ids[idd].append(key)

    match_key = []
    for key in non_unique_ids:  # unique ids
        pr = None
        hi = 0
        for pathrow in non_unique_ids[key]:  # path/rows corresponding to non
            # unique features
            if len(unique[pathrow]) > hi:
                pr = pathrow
                hi = len(unique[pathrow])

        if pr is not None:
            unique[pr].append(key)
        else:
            choice = non_unique_ids[key]
            choice.sort()
            choice = choice[0]
            unique[choice].append(key)

    prefix = os.path.splitext(base_shapefile)[0]
    for key in unique:
        if key is None:
            continue
        out = prefix + "_" + key + ".shp"
        if len(unique[key]):
            with fopen(os.path.join(data_directory, out), 'w', **meta) as dst:
                print("Saving split shapefile to: {}".format(
                    os.path.join(data_directory, out)))
                for feat in unique[key]:
                    dst.write(id_mapping[feat])
Exemplo n.º 28
0
 def tile_geometry(self):
     with fopen(WRS_2, 'r') as wrs:
         wrs_meta = wrs.meta.copy()
     return wrs_meta
Exemplo n.º 29
0
def proc(vector_file, raster_file, yaml_file, pre_min_het, pre_max_het,
         area_threshold):
    path_data = f"{config.get_value(['paths', 'temp'])}foi/"
    # database connection string
    db_connection = f"PG:{database.conn_str(db=1)}"
    # ogr2ogr options
    geom_field_name = "GEOMETRY_NAME=geom"
    overwrite_option = "-OVERWRITE"
    geom_type = "MULTIPOLYGON"
    output_format = "PostgreSQL"

    # Path for storing the processed data - final spatial data that will be exported after database processing
    processed_data = f'{path_data}processed_data/'
    os.makedirs(processed_data, exist_ok=True)
    # Spatial data to be tested - parcels that will be checked for heterogeneity and cardinality
    reference_data = vector_file
    # Thematic raster - classification raster, or raster from other
    # source that will be used for testing heterogeneity and cardinality
    raster_classif_file = raster_file
    # YAML file that holds the classes form the thematic raster file - can be also a simple list of values in the notebook
    # corespondence between pixel values and names for the classes
    # yaml_file = f'{path_data}pixelvalues_classes.yml'

    output_data = f'{path_data}output_data/'
    os.makedirs(output_data, exist_ok=True)
    reference_data_name = os.path.splitext(os.path.basename(reference_data))[0]
    try:
        with open(f"{config.get_value(['paths','temp'])}tb_prefix", 'r') as f:
            reference_data_table = f.read()
    except Exception:
        reference_data_table = reference_data_name

    # Vector file resulted from the raster stats pixel count
    #pixelcount_output = f'{output_data}pixel_count_{reference_data_table}.shp'

    pixelcount_output = f'{processed_data}' + reference_data_name + '_pixelcount.shp'
    # Vector file resulted from raster to vector process (polygonize)
    polygonize_output = f'{processed_data}' + reference_data_name + '_polygonize.shp'

    # Name of the table to be created in the database - import of the pixel count into the database
    pixelcount_table = f"{reference_data_name}_pixelcount"
    # Name of the table to be created in the database - import of the polygonize result into the database
    polygonize_table = f"{reference_data_name}_polygonize"

    #Name and path of the files resulted from the analysis
    heterogeneity_output = f'{output_data}' + reference_data_name + '_foih_v1.shp'
    cardinality_output = f'{output_data}' + reference_data_name + '_foic_v1.shp'
    cardinality_output_clusters = f'{output_data}' + reference_data_name + '_foic_clusters_v1.shp'

    shape = fiona.open(reference_data)
    spatialRef = shape.crs["init"]
    #     print("Vector EPSG: ", spatialRef)

    # Import reference data shapefile to database. Overwrite option is needed, otherwise the
    # import will append new values to the ones existing in the table
    subprocess.call([
        "ogr2ogr", overwrite_option, "-nlt", geom_type, "-lco",
        geom_field_name, "-a_srs", spatialRef, "-nln", reference_data_table,
        "-f", "PostgreSQL", db_connection, reference_data
    ])

    # Reading the values from yaml file
    conf = load(open(yaml_file, 'r').read(), Loader=FullLoader)
    category_map = conf['category_map']
    rst_fields = list(category_map.values())

    # Counting the number of pixels for each parcel. The fields with names of the classes from yaml file will be added,
    # and updated with the number of pixels from each category
    with fopen(reference_data, 'r') as input:
        spatialRef = input.crs["init"]
        schema = input.schema

        for i in rst_fields:
            schema['properties'][i] = 'int:5'

        rst_attribs = dict.fromkeys(rst_fields, 0)

        with fopen(pixelcount_output, 'w', 'ESRI Shapefile', schema) as output:
            for i, vct_feat in enumerate(input):
                vct_val_dict = dict(vct_feat['properties'])
                rst_val_dict = zonal_stats(vct_feat,
                                           raster_classif_file,
                                           categorical=True,
                                           copy_properties=True,
                                           category_map=category_map,
                                           nodata=-999)[0]
                vct_val_dict.update(rst_attribs)

                for lu in rst_val_dict:
                    vct_val_dict[lu] = rst_val_dict.get(lu)

                for atrib in vct_val_dict:
                    vct_feat['properties'][atrib] = vct_val_dict.get(atrib)

                output.write(vct_feat)
    print("Finished pixel calculation!")

    # Import resulted shapefile, with the number of pixels for each class to database. Overwrite option is needed, otherwise the
    # import will append new values to the ones existing in the table
    subprocess.call([
        "ogr2ogr", overwrite_option, "-nlt", geom_type, "-a_srs", spatialRef,
        "-nln", pixelcount_table, "-f", "PostgreSQL", db_connection,
        pixelcount_output
    ])

    # Number of classes from the thematic raster
    num_classes = len(category_map)
    # Minimum and maximum thresholds for heterogeneity checks. In this example, any parcel
    # with percentage of pixels for one class between 30 and 70 from the total, will be considered heterogenous.
    # min_heterogeneity_threshold = 30
    # max_heterogeneity_threshold = 70
    min_heterogeneity_threshold = pre_min_het
    max_heterogeneity_threshold = pre_max_het

    # Calling the PostgreSQL function wich checks the heterogeneity. The function calculates the percentages
    # and sets an attribute "foi_h" to 1 when the percentage of pixels is between thresholds
    try:
        ps_connection = database.connection()

        ps_connection.autocommit = True

        cursor = ps_connection.cursor()

        # call stored procedure
        cursor.callproc(
            'public.check_heterogeneity',
            (pixelcount_table, num_classes, min_heterogeneity_threshold,
             max_heterogeneity_threshold))

        print("Running function to check heterogeneity")

    except (Exception, psycopg2.DatabaseError) as error:
        print("Error while connecting to PostgreSQL", error)

    finally:
        # closing database connection.
        if (ps_connection):
            cursor.close()
            ps_connection.close()
            print("PostgreSQL connection is closed")
    print("Heterogeneity assessment function finished")

    # Export processed data - heterogeneity, to shapefile
    subprocess.call([
        "ogr2ogr", "-f", "ESRI Shapefile", heterogeneity_output, db_connection,
        pixelcount_table
    ])
    print("Heterogeneity analysis output downloaded")

    # Polygonize the thematic raster. The process takes into account only
    # one band (in this case - first band). Can be used with 8 connected pixels or with 4 connected pixels.
    connectedness = '-8'
    sourceRaster = gdal.Open(raster_classif_file)
    band = sourceRaster.GetRasterBand(1)
    srs = osr.SpatialReference(wkt=sourceRaster.GetProjection())
    dst_layername = polygonize_output
    drv = ogr.GetDriverByName("ESRI Shapefile")
    dst_ds = drv.CreateDataSource(dst_layername)
    dst_layer = dst_ds.CreateLayer(dst_layername, srs=srs)
    fd = ogr.FieldDefn("DN", ogr.OFTInteger)
    dst_layer.CreateField(fd)
    dst_field = dst_layer.GetLayerDefn().GetFieldIndex("DN")
    gdal.Polygonize(band,
                    None,
                    dst_layer,
                    dst_field, [connectedness],
                    callback=None)
    dst_ds.Destroy()

    # Import polygonize result to database
    subprocess.call([
        "ogr2ogr", overwrite_option, "-nlt", geom_type, "-lco",
        geom_field_name, "-nln", polygonize_table, "-f", output_format,
        db_connection, polygonize_output
    ])

    # Names of the tables to be created in the database during the processing
    processed_clusters = polygonize_table + "_clusters"
    processed_cardinality = polygonize_table + "_cardin"
    # Spatial data to be tested - parcels that will be checked for cardinality (I think we should use the same
    # data as for heterogeneity)
    # reference_table = 'reference_data'
    # Minimum area for clusters selection - only clusters bigger that the threshold will be counted
    # area_threshold = 2000

    # Calling the PostgreSQL function wich checks the cardinality. The function fixes the geometry
    # for the spatial data resulted from polygnize, clips the polygonize result with the parcels that needs to be checked,
    # calculates the area of the clusters inside each parcel, selects the clusters that are more than one type, each of
    # them bigger that the threshold, in each parcel.
    # The function creates two new tables: one with the clusters that matches the conditions,
    # the other with data to be tested and a new column "foi_c" wich is 1 if the parcel has more that two types
    # of clusters with the area bigger than the thershold

    # TO DO: put the unique identifier as function param

    try:
        ps_connection = database.connection()

        ps_connection.autocommit = True

        cursor = ps_connection.cursor()

        # call stored procedure
        #         cursor.callproc('public.check_cardinality',
        #                         (polygonize_table, reference_data_table, area_threshold))
        cursor.execute(
            "CALL public.check_cardinality_procedure( %s, %s, %s, %s); ",
            (polygonize_table, reference_data_table, area_threshold, 10000))

        print("Running function to check cardinality")

    except (Exception, psycopg2.DatabaseError) as error:
        print("Error while connecting to PostgreSQL", error)

    finally:
        # closing database connection.
        if (ps_connection):
            cursor.close()
            ps_connection.close()
            print("PostgreSQL connection is closed")

    # Export processed data - clusters, to shapefile
    subprocess.call([
        "ogr2ogr", "-f", "ESRI Shapefile", cardinality_output_clusters,
        db_connection, processed_clusters
    ])
    print("Cardinality assessment function finished")

    # Export processed data - data to be tested with "foi_c" flag, to shapefile
    subprocess.call([
        "ogr2ogr", "-f", "ESRI Shapefile", cardinality_output, db_connection,
        processed_cardinality
    ])
    print("Cardinality analysis output downloaded")