Exemplo n.º 1
0
def from_featureclass(filename, **kwargs):
    """
    Returns a GeoDataFrame (Spatially Enabled Pandas DataFrame) from a feature class.

    ===========================     ====================================================================
    **Argument**                    **Description**
    ---------------------------     --------------------------------------------------------------------
    filename                        Required string. Full path to the feature class
    ===========================     ====================================================================

    *Optional parameters when ArcPy library is available in the current environment*:
    ===========================     ====================================================================
    **Key**                         **Value**
    ---------------------------     --------------------------------------------------------------------
    sql_clause                      sql clause to parse data down. To learn more see
                                    [ArcPy Search Cursor](https://pro.arcgis.com/en/pro-app/arcpy/data-access/searchcursor-class.htm)
    ---------------------------     --------------------------------------------------------------------
    where_clause                    where statement. To learn more see [ArcPy SQL reference](https://pro.arcgis.com/en/pro-app/help/mapping/navigation/sql-reference-for-elements-used-in-query-expressions.htm)
    ---------------------------     --------------------------------------------------------------------
    fields                          list of strings specifying the field names.
    ===========================     ====================================================================

    :returns: pandas.core.frame.DataFrame

    """
    from arcgis.geometry import _types
    import json
    if HASARCPY:
        sql_clause = kwargs.pop('sql_clause', (None,None))
        where_clause = kwargs.pop('where_clause', None)
        fields = kwargs.pop('fields', None)
        sr = kwargs.pop('sr', None)
        try:
            desc = arcpy.da.Describe(filename)
            area_field = desc.pop('areaFieldName', None)
            length_field = desc.pop('lengthFieldName', None)
        except: # for older versions of arcpy
            desc = arcpy.Describe(filename)
            desc = {
                'fields' : desc.fields,
                'shapeType' : desc.shapeType
            }
            area_field = getattr(desc, 'areaFieldName', None)
            length_field = getattr(desc, 'lengthFieldName', None)
        shape_name = desc['shapeType']
        if fields is None:
            fields = [fld.name for fld in desc['fields'] \
                      if fld.type not in ['Geometry'] and \
                      fld.name not in [area_field, length_field]]
        cursor_fields = fields + ['SHAPE@JSON']
        df_fields = fields + ['SHAPE']
        count = 0
        dfs = []
        shape_field_idx = cursor_fields.index("SHAPE@JSON")
        with da.SearchCursor(filename,
                             field_names=cursor_fields,
                             where_clause=where_clause,
                             sql_clause=sql_clause,
                             spatial_reference=sr) as rows:
            srows = []
            for row in rows:
                srows.append(row)
                if len(srows) == 25000:
                    dfs.append( pd.DataFrame(srows,
                                             columns=df_fields))
                    srows = []
            if len(srows):
                dfs.append( pd.DataFrame(srows,
                                         columns=df_fields))
                srows = []
            del srows
        if len(dfs) > 0:
            df = pd.concat(dfs)
            df = df.reset_index(drop=True)
        elif len(dfs) == 1:
            df = dfs[0]
        else:
            df = pd.DataFrame([],
                              columns=df_fields)
        q = df.SHAPE.notnull()
        gt = desc['shapeType'].lower()
        geoms = {
            "point" : _types.Point,
            "polygon" : _types.Polygon,
            "polyline" : _types.Polyline,
            "multipoint" : _types.MultiPoint,
            "envelope" : _types.Envelope,
            "geometry" : _types.Geometry
        }
        df.SHAPE = (
           df.SHAPE[q]
           .apply(pd.io.json.loads)
           .apply(geoms[gt])
        )
        df.spatial.set_geometry("SHAPE")
        return df
    elif HASARCPY == False and \
         HASPYSHP == True and\
         filename.lower().find('.shp') > -1:
        geoms = []
        records = []
        reader = shapefile.Reader(filename)
        fields = [field[0] for field in reader.fields if field[0] != 'DeletionFlag']
        for r in reader.shapeRecords():
            atr = dict(zip(fields, r.record))
            g = r.shape.__geo_interface__
            g = _geojson_to_esrijson(g)
            geom = _types.Geometry(g)
            atr['SHAPE'] = geom
            records.append(atr)
            del atr
            del r, g
            del geom
        sdf = pd.DataFrame(records)
        sdf.spatial.set_geometry('SHAPE')
        sdf.reset_index(inplace=True)
        return sdf
    elif HASARCPY == False and \
         HASFIONA == True and \
         (filename.lower().find('.shp') > -1 or \
          os.path.dirname(filename).lower().find('.gdb') > -1):
        is_gdb = os.path.dirname(filename).lower().find('.gdb') > -1
        if is_gdb:

            # Remove deprecation warning.
            fiona_env = fiona.drivers
            if hasattr(fiona,'Env'):
                fiona_env = fiona.Env

            with fiona_env():
                from arcgis.geometry import _types
                fp = os.path.dirname(filename)
                fn = os.path.basename(filename)
                geoms = []
                atts = []
                with fiona.open(fp, layer=fn) as source:
                    meta = source.meta
                    cols = list(source.schema['properties'].keys())

                    # Get the CRS
                    try:
                        wkid = source.crs['init'].split(':')[1]
                    except:
                        wkid = 4326

                    sr = _types.SpatialReference({'wkid':int(wkid)})

                    for idx, row in source.items():
                        g = _types.Geometry(row['geometry'])
                        geoms.append(g)
                        atts.append(list(row['properties'].values()))
                        del idx, row
                    df = pd.DataFrame(data=atts, columns=cols)
                    df.spatial.set_geometry(geoms)
                    df.spatial.sr = sr
                    return df
        else:
            with fiona.drivers():
                from arcgis.geometry import _types
                geoms = []
                atts = []
                with fiona.open(filename) as source:
                    meta = source.meta
                    cols = list(source.schema['properties'].keys())
                    for idx, row in source.items():
                        geoms.append(_types.Geometry(row['geometry']))
                        atts.append(list(row['properties'].values()))
                        del idx, row
                    df = pd.DataFrame(data=atts, columns=cols)
                    df.spatial.set_geometry(geoms)
                    return df
    return
Exemplo n.º 2
0
def from_featureclass(filename, **kwargs):
    """
    Returns a GeoDataFrame (Spatially Enabled Pandas DataFrame) from a feature class.

    ===========================     ====================================================================
    **Argument**                    **Description**
    ---------------------------     --------------------------------------------------------------------
    filename                        Required string or pathlib.Path. Full path to the feature class
    ===========================     ====================================================================

    *Optional parameters when ArcPy library is available in the current environment*:
    ===========================     ====================================================================
    **Key**                         **Value**
    ---------------------------     --------------------------------------------------------------------
    sql_clause                      sql clause to parse data down. To learn more see
                                    [ArcPy Search Cursor](https://pro.arcgis.com/en/pro-app/arcpy/data-access/searchcursor-class.htm)
    ---------------------------     --------------------------------------------------------------------
    where_clause                    where statement. To learn more see [ArcPy SQL reference](https://pro.arcgis.com/en/pro-app/help/mapping/navigation/sql-reference-for-elements-used-in-query-expressions.htm)
    ---------------------------     --------------------------------------------------------------------
    fields                          list of strings specifying the field names.
    ---------------------------     --------------------------------------------------------------------
    spatial_filter                  A `Geometry` object that will filter the results.  This requires
                                    `arcpy` to work.
    ===========================     ====================================================================

    :returns: pandas.core.frame.DataFrame

    """
    from arcgis.geometry import _types
    import json

    filename = _ensure_path_string(filename)

    if HASARCPY:
        sql_clause = kwargs.pop('sql_clause', (None, None))
        where_clause = kwargs.pop('where_clause', None)
        fields = kwargs.pop('fields', None)
        sr = kwargs.pop('sr', None)
        spatial_filter = kwargs.pop('spatial_filter', None)
        geom = None
        try:
            desc = arcpy.da.Describe(filename)
            area_field = desc.pop('areaFieldName', None)
            length_field = desc.pop('lengthFieldName', None)
        except:  # for older versions of arcpy
            desc = arcpy.Describe(filename)
            desc = {'fields': desc.fields, 'shapeType': desc.shapeType}
            area_field = getattr(desc, 'areaFieldName', None)
            length_field = getattr(desc, 'lengthFieldName', None)

        if spatial_filter:
            _sf_lu = {
                "esriSpatialRelIntersects": "INTERSECT",
                "esriSpatialRelContains": "CONTAINS",
                "esriSpatialRelCrosses": "CROSSED_BY_THE_OUTLINE_OF",
                "esriSpatialRelEnvelopeIntersects": "INTERSECT",
                "esriSpatialRelIndexIntersects": "INTERSECT",
                "esriSpatialRelOverlaps": "INTERSECT",
                "esriSpatialRelTouches": "BOUNDARY_TOUCHES",
                "esriSpatialRelWithin": "WITHIN"
            }
            relto = _sf_lu[spatial_filter['spatialRel']]
            geom = spatial_filter['geometry']
            if hasattr(geom, 'polygon'):
                geom = geom.polygon
            geom = geom.as_arcpy
            flname = "a" + uuid.uuid4().hex[:6]
            filename = arcpy.management.MakeFeatureLayer(
                filename, out_layer=flname, where_clause=where_clause)[0]
            arcpy.management.SelectLayerByLocation(filename,
                                                   overlap_type=relto,
                                                   select_features=geom)[0]

        shape_name = desc['shapeType']
        if fields is None:
            fields = [fld.name for fld in desc['fields'] \
                      if fld.type not in ['Geometry'] and \
                      fld.name not in [area_field, length_field]]
        cursor_fields = fields + ['SHAPE@JSON']
        df_fields = fields + ['SHAPE']
        count = 0
        dfs = []
        shape_field_idx = cursor_fields.index("SHAPE@JSON")
        with da.SearchCursor(filename,
                             field_names=cursor_fields,
                             where_clause=where_clause,
                             sql_clause=sql_clause,
                             spatial_reference=sr) as rows:
            srows = []
            for row in rows:
                srows.append(row)
                if len(srows) == 25000:
                    dfs.append(pd.DataFrame(srows, columns=df_fields))
                    srows = []
            if len(srows):
                dfs.append(pd.DataFrame(srows, columns=df_fields))
                srows = []
            del srows
        if len(dfs) > 0:
            df = pd.concat(dfs)
            df = df.reset_index(drop=True)
        elif len(dfs) == 1:
            df = dfs[0]
        else:
            df = pd.DataFrame([], columns=df_fields)
        q = df.SHAPE.notnull()
        gt = desc['shapeType'].lower()
        geoms = {
            "point": _types.Point,
            "polygon": _types.Polygon,
            "polyline": _types.Polyline,
            "multipoint": _types.MultiPoint,
            "envelope": _types.Envelope,
            "geometry": _types.Geometry
        }
        df.SHAPE = (df.SHAPE[q].apply(pd.io.json.loads).apply(geoms[gt]))
        df.spatial.set_geometry("SHAPE")
        df.spatial._meta.source = filename
        return df
    elif HASARCPY == False and \
         HASPYSHP == True and\
         filename.lower().find('.shp') > -1:
        geoms = []
        records = []
        reader = shapefile.Reader(filename)
        fields = [
            field[0] for field in reader.fields if field[0] != 'DeletionFlag'
        ]
        for r in reader.shapeRecords():
            atr = dict(zip(fields, r.record))
            g = r.shape.__geo_interface__
            g = _geojson_to_esrijson(g)
            geom = _types.Geometry(g)
            atr['SHAPE'] = geom
            records.append(atr)
            del atr
            del r, g
            del geom
        sdf = pd.DataFrame(records)
        sdf.spatial.set_geometry('SHAPE')
        sdf['OBJECTID'] = range(sdf.shape[0])
        sdf.reset_index(inplace=True)
        sdf.spatial._meta.source = filename
        return sdf
    elif HASARCPY == False and \
         HASFIONA == True and \
         (filename.lower().find('.shp') > -1 or \
          os.path.dirname(filename).lower().find('.gdb') > -1):
        is_gdb = os.path.dirname(filename).lower().find('.gdb') > -1
        if is_gdb:

            # Remove deprecation warning.
            fiona_env = fiona.drivers
            if hasattr(fiona, 'Env'):
                fiona_env = fiona.Env

            with fiona_env():
                from arcgis.geometry import _types
                fp = os.path.dirname(filename)
                fn = os.path.basename(filename)
                geoms = []
                atts = []
                with fiona.open(fp, layer=fn) as source:
                    meta = source.meta
                    cols = list(source.schema['properties'].keys())

                    # Get the CRS
                    try:
                        wkid = source.crs['init'].split(':')[1]
                    except:
                        wkid = 4326

                    sr = _types.SpatialReference({'wkid': int(wkid)})

                    for idx, row in source.items():
                        g = _types.Geometry(row['geometry'])
                        geoms.append(g)
                        atts.append(list(row['properties'].values()))
                        del idx, row
                    df = pd.DataFrame(data=atts, columns=cols)
                    df.spatial.set_geometry(geoms)
                    df.spatial.sr = sr
                    df.spatial._meta.source = filename
                    return df
        else:
            with fiona.drivers():
                from arcgis.geometry import _types
                geoms = []
                atts = []
                with fiona.open(filename) as source:
                    meta = source.meta
                    cols = list(source.schema['properties'].keys())
                    for idx, row in source.items():
                        geoms.append(_types.Geometry(row['geometry']))
                        atts.append(list(row['properties'].values()))
                        del idx, row
                    df = pd.DataFrame(data=atts, columns=cols)
                    df.spatial.set_geometry(geoms)
                    df.spatial._meta.source = filename
                    return df
    else:
        if os.path.dirname(filename).lower().find('.gdb') > -1:
            message = """
            Cannot Open Geodatabase without Arcpy or Fiona
            \nPlease switch to Arcpy for full support or install fiona by this command `conda install fiona`
            """.strip()
            print(message)
            raise Exception(
                'Failed to import Feature Class from Geodatabase specified')
        else:
            raise Exception(
                'Unsupported Data Format or Invalid Feature Class specified')
Exemplo n.º 3
0
def to_featureclass(df, out_name, out_location=None,
                    overwrite=True, out_sr=None,
                    skip_invalid=True):
    """
    converts a SpatialDataFrame to a feature class

    Parameters:
     :out_location: path to the workspace
     :out_name: name of the output feature class table
     :overwrite: True, the data will be erased then replaced, else the
      table will be appended to an existing table.
     :out_sr: if set, the data will try to reproject itself
     :skip_invalid: if True, the cursor object will not raise an error on
      insertion of invalid data, if False, the first occurence of invalid
      data will raise an exception.
    Returns:
     path to the feature class
    """
    fc = None
    if HASARCPY:
        import arcgis
        cols = []
        dt_idx = []
        invalid_rows = []
        idx = 0
        max_length = None
        if out_location:
            if os.path.isdir(out_location) == False and \
               out_location.lower().endswith('.gdb'):
                out_location = arcpy.CreateFileGDB_management(out_folder_path=os.path.dirname(out_location),
                                                             out_name=os.path.basename(out_location))[0]
            elif os.path.isdir(out_location) == False and \
                 out_name.lower().endswith('.shp'):
                os.makedirs(out_location)
            elif os.path.isfile(out_location) == False and \
                 out_location.lower().endswith('.sde'):
                raise ValueError("The sde connection file does not exist")
        else:
            if out_name.lower().endswith('.shp'):
                out_location = tempfile.gettempdir()
            elif HASARCPY:
                out_location = arcpy.env.scratchGDB
            else:
                out_location = tempfile.gettempdir()
                out_name = out_name + ".shp"
        fc = os.path.join(out_location, out_name)
        df = df.copy() # create a copy so we don't modify the source data.
        if out_name.lower().endswith('.shp'):
            max_length = 10
        for col in df.columns:
            if col.lower() != 'shape':
                if df[col].dtype.type in NUMERIC_TYPES:
                    df[col] = df[col].fillna(0)
                elif df[col].dtype.type in DATETIME_TYPES:
                    dt_idx.append(col)
                else:
                    df.loc[df[col].isnull(), col] = ""
                idx += 1
                col = sanitize_field_name(s=col,
                                          length=max_length)
            cols.append(col)
            del col
        df.columns = cols

        if arcpy.Exists(fc) and \
           overwrite:
            arcpy.Delete_management(fc)
        if arcpy.Exists(fc) ==  False:
            sr = df.sr
            if df.sr is None:
                sr = df['SHAPE'].loc[df['SHAPE'].first_valid_index()].spatial_reference
                if isinstance(sr, dict) and \
                   'wkid' in sr:
                    sr = arcpy.SpatialReference(sr['wkid'])
                elif isinstance(sr, arcpy.SpatialReference):
                    sr = sr
                else:
                    sr = None
            elif df.sr:
                sr = _types.SpatialReference(df.sr).as_arcpy
            elif sr is None:
                sr = df['SHAPE'].loc[df['SHAPE'].first_valid_index()].spatial_reference
                if isinstance(sr, dict) and \
                               'wkid' in sr:
                    sr = arcpy.SpatialReference(sr['wkid'])
                elif isinstance(sr, arcpy.SpatialReference):
                    sr = sr
                else:
                    sr = None
            elif isinstance(sr, dict):
                sr = _types.SpatialReference(sr).as_arcpy
            elif isinstance(sr, _types.SpatialReference):
                sr = df.sr.as_arcpy

            fc = arcpy.CreateFeatureclass_management(out_path=out_location,
                                                     out_name=out_name,
                                                     geometry_type=df.geometry_type.upper(),
                                                     spatial_reference=sr)[0]
        desc = arcpy.Describe(fc)
        oidField = desc.oidFieldName
        col_insert = copy.copy(df.columns).tolist()
        if hasattr(desc, 'areaFieldName'):
            af = desc.areaFieldName.lower()
        else:
            af = None
        if hasattr(desc, 'lengthFieldName'):
            lf = desc.lengthFieldName.lower()
        else:
            lf = None
        col_insert = [f for f in col_insert if f.lower() not in ['oid', 'objectid', 'fid', desc.oidFieldName.lower(), af, lf]]
        df_cols = col_insert.copy()
        lower_col_names = [f.lower() for f in col_insert if f.lower() not in ['oid', 'objectid', 'fid']]
        idx_shp = None

        if oidField.lower() in lower_col_names:
            val = col_insert.pop(lower_col_names.index(oidField.lower()))
            del df[val]
            col_insert = copy.copy(df.columns).tolist()
            lower_col_names = [f.lower() for f in col_insert]
        if hasattr(desc, "areaFieldName") and \
           desc.areaFieldName.lower() in lower_col_names:
            val = col_insert.pop(lower_col_names.index(desc.areaFieldName.lower()))
            del df[val]
            col_insert = copy.copy(df.columns).tolist()
            lower_col_names = [f.lower() for f in col_insert]
        elif 'shape_area' in lower_col_names:
            val = col_insert.pop(lower_col_names.index('shape_area'))
            del df[val]
            col_insert = copy.copy(df.columns).tolist()
            lower_col_names = [f.lower() for f in col_insert]
        if hasattr(desc, "lengthFieldName") and \
           desc.lengthFieldName.lower() in lower_col_names:
            val = col_insert.pop(lower_col_names.index(desc.lengthFieldName.lower()))
            del df[val]
            col_insert = copy.copy(df.columns).tolist()
            lower_col_names = [f.lower() for f in col_insert]
        elif 'shape_length' in lower_col_names:
            val = col_insert.pop(lower_col_names.index('shape_length'))
            del df[val]
            col_insert = copy.copy(df.columns).tolist()
            lower_col_names = [f.lower() for f in col_insert]
        if "SHAPE" in df.columns:
            idx_shp = col_insert.index("SHAPE")
            col_insert[idx_shp] = "SHAPE@"
        existing_fields = [field.name.lower() for field in arcpy.ListFields(fc)]
        for col in col_insert:
            if col.lower() != 'shape@' and \
               col.lower() != 'shape' and \
               col.lower() not in existing_fields:
                try:
                    t = _infer_type(df, col)
                    if t == "TEXT" and out_name.lower().endswith('.shp') == False:
                        l = int(df[col].str.len().max()) or 0
                        if l < 255:
                            l = 255
                        arcpy.AddField_management(in_table=fc, field_name=col,
                                                  field_length=l,
                                                  field_type=_infer_type(df, col))
                    else:
                        arcpy.AddField_management(in_table=fc, field_name=col,
                                              field_type=t)
                except:
                    print('col %s' % col)
        dt_idx = [col_insert.index(col) for col in dt_idx if col in col_insert]
        icur = da.InsertCursor(fc, col_insert)
        for index, row in df[df_cols].iterrows():
            if len(dt_idx) > 0:
                row = row.tolist()
                for i in dt_idx:
                    row[i] = row[i].to_pydatetime()
                    del i
                try:
                    if idx_shp:
                        row[idx_shp] = row[idx_shp].as_arcpy
                    icur.insertRow(row)
                except:
                    invalid_rows.append(index)
                    if skip_invalid == False:
                        raise Exception("Invalid row detected at index: %s" % index)
            else:
                try:
                    row = row.tolist()
                    if isinstance(idx_shp, int):
                        row[idx_shp] = row[idx_shp].as_arcpy
                    icur.insertRow(row)
                except:
                    invalid_rows.append(index)
                    if skip_invalid == False:
                        raise Exception("Invalid row detected at index: %s" % index)

            del row
        del icur
        if len(invalid_rows) > 0:
            t = ",".join([str(r) for r in invalid_rows])
            _log.warning('The following rows could not be written to the table: %s' % t)
    elif HASARCPY == False and \
         HASPYSHP:
        return _pyshp_to_shapefile(df=df,
                                   out_path=out_location,
                                   out_name=out_name)
    else:
        raise Exception("Cannot Export the data without ArcPy or PyShp modules. "+ \
                        "Please install them and try again.")
    return fc