示例#1
0
class ZonalStatsProcess(GaiaProcess):
    """
    Calculates statistical values from a raster dataset for each polygon
    in a vector dataset.
    """
    required_inputs = (
        ('raster', formats.RASTER),
        ('zones', formats.VECTOR),
    )
    required_args = ()
    default_output = formats.VECTOR

    def __init__(self, **kwargs):
        super(ZonalStatsProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        self.validate()

    def compute(self):
        self.output.create_output_dir(self.output.uri)
        features = gdal_zonalstats(
            self.inputs[1].read(format=formats.JSON,
                                epsg=self.inputs[0].get_epsg()),
            self.inputs[0].read())
        self.output.data = GeoDataFrame.from_features(features)
        self.output.write()
示例#2
0
    def compute(self):
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        first_df = self.inputs[0].read()
        col = self.var_col
        adjust_by_col = self.adjust_by_col

        # filter out null fields or else weight functions won't work
        keep = first_df[col].notnull()
        filtered_df = first_df[keep].reset_index()

        # get Local Moran's I
        f = np.array(filtered_df[col])
        w = wt.gpd_contiguity(filtered_df)
        if adjust_by_col:
            adjust_by = np.array(filtered_df[adjust_by_col])
            lm = pysal.esda.moran.Moran_Local_Rate(e=f,
                                                   b=adjust_by,
                                                   w=w,
                                                   permutations=9999)
        else:
            lm = pysal.Moran_Local(y=f, w=w, permutations=9999)

        sig = lm.p_sim < 0.05
        filtered_df['lm_sig'] = sig
        filtered_df['lm_p_sim'] = lm.p_sim
        filtered_df['lm_q'] = lm.q
        filtered_df['lm_Is'] = lm.Is

        self.output.data = filtered_df
        self.output.write()
        logger.debug(self.output)
示例#3
0
class LengthProcess(GaiaProcess):
    """
    Calculate the length of each feature in a dataset.
    If the dataset projection is not in metric units, it will
    be temporarily reprojected to EPSG:3857 to calculate the area.
    """
    required_inputs = (('first', formats.VECTOR),)
    required_args = ()
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(LengthProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        featureio = self.inputs[0]
        original_projection = featureio.get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        feature_df = GeoDataFrame.copy(featureio.read(epsg=epsg))
        feature_df['length'] = feature_df.geometry.length
        if original_projection:
            feature_df[feature_df.geometry.name] = feature_df.geometry.to_crs(
                epsg=original_projection)
            feature_df.crs = fiona.crs.from_epsg(original_projection)
        return feature_df

    def calc_postgis(self):
        featureio = self.inputs[0]
        geom0, epsg = featureio.geom_column, featureio.epsg
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(epsg)
        geom_query = geom0
        geometry_type = featureio.geometry_type
        length_func = 'ST_Perimeter' if 'POLYGON' in geometry_type.upper() \
            else 'ST_Length'
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            geom_query = 'ST_Transform({}, {})'.format(
                geom_query, 3857)
        geom_query = ', {}({}) as length'.format(length_func, geom_query)
        query, params = featureio.get_query()
        query = query.replace('FROM', '{} FROM'.format(geom_query))
        logger.debug(query)
        return df_from_postgis(featureio.engine, query, params, geom0, epsg)

    def compute(self):
        if self.inputs[0].__class__.__name__ == 'PostgisIO':
            data = self.calc_postgis()
        else:
            data = self.calc_pandas()
        self.output.data = data
        self.output.write()
示例#4
0
class ClusterProcess(GaiaProcess):
    """
    Local Moran's I Calculation (Local Indicators of Spatial Association,
    or LISAs) to identifying clusters in data.
    https://pysal.readthedocs.org/en/latest/users/tutorials/autocorrelation.html#local-moran-s-i
    http://pysal.readthedocs.org/en/latest/library/esda/moran.html

    Returns original vector layer with associated Moran's I statistics,
    including:

    lm_Is: float, Moran's I
    lm_q: float, quadrat location
    lm_p_sims: float, p-value based on permutations (low p-value means observed
    Is differ from expected Is significantly)
    lm_sig: boolean, True if p_sims is below 0.05
    """
    required_inputs = (('input', formats.VECTOR),)
    required_args = ('var_col')
    optional_args = ('adjust_by_col')
    default_output = formats.JSON
    adjust_by_col = None

    def __init__(self, var_col, **kwargs):
        self.var_col = var_col
        super(ClusterProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())

    def compute(self):
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        first_df = self.inputs[0].read()
        col = self.var_col
        adjust_by_col = self.adjust_by_col

        # filter out null fields or else weight functions won't work
        keep = first_df[col].notnull()
        filtered_df = first_df[keep].reset_index()

        # get Local Moran's I
        f = np.array(filtered_df[col])
        w = wt.gpd_contiguity(filtered_df)
        if adjust_by_col:
            adjust_by = np.array(filtered_df[adjust_by_col])
            lm = pysal.esda.moran.Moran_Local_Rate(e=f, b=adjust_by, w=w,
                                                   permutations=9999)
        else:
            lm = pysal.Moran_Local(y=f, w=w, permutations=9999)

        sig = lm.p_sim < 0.05
        filtered_df['lm_sig'] = sig
        filtered_df['lm_p_sim'] = lm.p_sim
        filtered_df['lm_q'] = lm.q
        filtered_df['lm_Is'] = lm.Is

        self.output.data = filtered_df
        self.output.write()
        logger.debug(self.output)
示例#5
0
    def compute(self):
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        for input in self.inputs:
            if input.name == 'input':
                first_df = input.read()
        col = self.var_col
        adjust_by_col = self.adjust_by_col
        permutations = self.permutations
        if not permutations:
            permutations = 999

        # filter out null fields
        keep = first_df[col].notnull()
        filtered_df = first_df[keep]

        # get Global Moran's I
        f = np.array(filtered_df[col])
        w = wt.gpd_contiguity(filtered_df)
        if adjust_by_col:
            adjust_by = np.array(filtered_df[adjust_by_col])
            mi = pysal.esda.moran.Moran_Rate(e=f,
                                             b=adjust_by,
                                             w=w,
                                             permutations=permutations)
        else:
            mi = pysal.Moran(y=f, w=w, permutations=permutations)

        keep = ['I', 'EI', 'p_norm', 'EI_sim', 'p_sim', 'z_sim', 'p_z_sim']
        mi_dict = {k: getattr(mi, k) for k in keep}

        self.output.data = mi_dict
        self.output.write()
        logger.debug(self.output)
示例#6
0
class ClusterProcess(GaiaProcess):
    """
    Local Moran's I Calculation (Local Indicators of Spatial Association,
    or LISAs) to identifying clusters in data.
    https://pysal.readthedocs.org/en/latest/users/tutorials/autocorrelation.html#local-moran-s-i
    http://pysal.readthedocs.org/en/latest/library/esda/moran.html

    Returns original vector layer with associated Moran's I statistics,
    including:

    lm_Is: float, Moran's I
    lm_q: float, quadrat location
    lm_p_sims: float, p-value based on permutations (low p-value means observed
    Is differ from expected Is significantly)
    lm_sig: boolean, True if p_sims is below 0.05
    """
    required_inputs = (('input', formats.VECTOR), )
    required_args = ('var_col')
    optional_args = ('adjust_by_col')
    default_output = formats.JSON
    adjust_by_col = None

    def __init__(self, var_col, **kwargs):
        self.var_col = var_col
        super(ClusterProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())

    def compute(self):
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        first_df = self.inputs[0].read()
        col = self.var_col
        adjust_by_col = self.adjust_by_col

        # filter out null fields or else weight functions won't work
        keep = first_df[col].notnull()
        filtered_df = first_df[keep].reset_index()

        # get Local Moran's I
        f = np.array(filtered_df[col])
        w = wt.gpd_contiguity(filtered_df)
        if adjust_by_col:
            adjust_by = np.array(filtered_df[adjust_by_col])
            lm = pysal.esda.moran.Moran_Local_Rate(e=f,
                                                   b=adjust_by,
                                                   w=w,
                                                   permutations=9999)
        else:
            lm = pysal.Moran_Local(y=f, w=w, permutations=9999)

        sig = lm.p_sim < 0.05
        filtered_df['lm_sig'] = sig
        filtered_df['lm_p_sim'] = lm.p_sim
        filtered_df['lm_q'] = lm.q
        filtered_df['lm_Is'] = lm.Is

        self.output.data = filtered_df
        self.output.write()
        logger.debug(self.output)
示例#7
0
class LengthProcess(GaiaProcess):
    """
    Calculate the length of each feature in a dataset.
    If the dataset projection is not in metric units, it will
    be temporarily reprojected to EPSG:3857 to calculate the area.
    """
    required_inputs = (('first', formats.VECTOR), )
    required_args = ()
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(LengthProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        featureio = self.inputs[0]
        original_projection = featureio.get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        feature_df = GeoDataFrame.copy(featureio.read(epsg=epsg))
        feature_df['length'] = feature_df.geometry.length
        if original_projection:
            feature_df[feature_df.geometry.name] = feature_df.geometry.to_crs(
                epsg=original_projection)
            feature_df.crs = fiona.crs.from_epsg(original_projection)
        return feature_df

    def calc_postgis(self):
        featureio = self.inputs[0]
        geom0, epsg = featureio.geom_column, featureio.epsg
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(epsg)
        geom_query = geom0
        geometry_type = featureio.geometry_type
        length_func = 'ST_Perimeter' if 'POLYGON' in geometry_type.upper() \
            else 'ST_Length'
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            geom_query = 'ST_Transform({}, {})'.format(geom_query, 3857)
        geom_query = ', {}({}) as length'.format(length_func, geom_query)
        query, params = featureio.get_query()
        query = query.replace('FROM', '{} FROM'.format(geom_query))
        logger.debug(query)
        return df_from_postgis(featureio.engine, query, params, geom0, epsg)

    def compute(self):
        if self.inputs[0].__class__.__name__ == 'PostgisIO':
            data = self.calc_postgis()
        else:
            data = self.calc_pandas()
        self.output.data = data
        self.output.write()
示例#8
0
class UnionProcess(GaiaProcess):
    """
    Combines two vector datasets into one.
    They should have the same columns.
    """

    required_inputs = (('first', formats.VECTOR), ('second', formats.VECTOR))
    required_args = ()
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(UnionProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        if ''.join(first_df.columns) != ''.join(second_df.columns):
            raise GaiaException('Inputs must have the same columns')
        uniondf = GeoDataFrame(pd.concat([first_df, second_df]))
        return uniondf

    def calc_postgis(self):
        union_queries = []
        union_params = []
        first = self.inputs[0]
        second = self.inputs[1]
        geom0, epsg = first.geom_column, first.epsg
        geom1, epsg1 = second.geom_column, second.epsg
        if ''.join(first.columns) != ''.join(second.columns):
            raise GaiaException('Inputs must have the same columns')
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            union_queries.append(io_query.rstrip(';'))
            union_params.extend(params)

        if epsg1 != epsg:
            geom1_query = 'ST_Transform({},{})'.format(geom1, epsg)
            union_queries[1] = union_queries[1].replace(
                '"{}"'.format(geom1), geom1_query)
        query = '({query0}) UNION ({query1})'\
            .format(query0=union_queries[0], query1=union_queries[1])
        return df_from_postgis(first.engine,
                               query, union_params, geom0, epsg)

    def compute(self):
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1 and
                       input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
        logger.debug(self.output)
示例#9
0
class UnionProcess(GaiaProcess):
    """
    Combines two vector datasets into one.
    They should have the same columns.
    """

    required_inputs = (('first', formats.VECTOR), ('second', formats.VECTOR))
    required_args = ()
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(UnionProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        if ''.join(first_df.columns) != ''.join(second_df.columns):
            raise GaiaException('Inputs must have the same columns')
        uniondf = GeoDataFrame(pd.concat([first_df, second_df]))
        return uniondf

    def calc_postgis(self):
        union_queries = []
        union_params = []
        first = self.inputs[0]
        second = self.inputs[1]
        geom0, epsg = first.geom_column, first.epsg
        geom1, epsg1 = second.geom_column, second.epsg
        if ''.join(first.columns) != ''.join(second.columns):
            raise GaiaException('Inputs must have the same columns')
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            union_queries.append(io_query.rstrip(';'))
            union_params.extend(params)

        if epsg1 != epsg:
            geom1_query = 'ST_Transform({},{})'.format(geom1, epsg)
            union_queries[1] = union_queries[1].replace(
                '"{}"'.format(geom1), geom1_query)
        query = '({query0}) UNION ({query1})'\
            .format(query0=union_queries[0], query1=union_queries[1])
        return df_from_postgis(first.engine, query, union_params, geom0, epsg)

    def compute(self):
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1
                       and input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
        logger.debug(self.output)
示例#10
0
class WithinProcess(GaiaProcess):
    """
    Similar to SubsetProcess but for vectors: calculates the features within
    a vector dataset that are within (or whose centroids are within) the
    polygons of a second vector dataset.
    """

    required_inputs = (('first', formats.VECTOR), ('second', formats.VECTOR))
    required_args = ()
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(WithinProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        first_within = first_df[first_df.geometry.within(
            second_df.geometry.unary_union)]
        return first_within

    def calc_postgis(self):
        first = self.inputs[0]
        within_queries = []
        within_params = []
        geom0 = first.geom_column
        epsg = first.epsg
        geom1 = self.inputs[1].geom_column
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            within_queries.append(io_query.rstrip(';'))
            within_params.extend(params)
        joinstr = ' AND ' if 'WHERE' in within_queries[0].upper(
        ) else ' WHERE '
        query = '{query0} {join} ST_Within(ST_Transform({geom0},{epsg}), ' \
                '(SELECT ST_Union(ST_TRANSFORM({geom1},{epsg})) ' \
                'from ({query1}) as q2))'\
            .format(query0=within_queries[0], join=joinstr, geom0=geom0,
                    geom1=geom1, epsg=epsg, query1=within_queries[1])
        return df_from_postgis(first.engine, query, params, geom0, epsg)

    def compute(self):
        if len(self.inputs) != 2:
            raise GaiaException('WithinProcess requires 2 inputs')
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1
                       and input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
示例#11
0
class WithinProcess(GaiaProcess):
    """
    Similar to SubsetProcess but for vectors: calculates the features within
    a vector dataset that are within (or whose centroids are within) the
    polygons of a second vector dataset.
    """

    required_inputs = (('first', formats.VECTOR), ('second', formats.VECTOR))
    required_args = ()
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(WithinProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        first_within = first_df[first_df.geometry.within(
            second_df.geometry.unary_union)]
        return first_within

    def calc_postgis(self):
        first = self.inputs[0]
        within_queries = []
        within_params = []
        geom0 = first.geom_column
        epsg = first.epsg
        geom1 = self.inputs[1].geom_column
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            within_queries.append(io_query.rstrip(';'))
            within_params.extend(params)
        joinstr = ' AND ' if 'WHERE' in within_queries[0].upper() else ' WHERE '
        query = '{query0} {join} ST_Within(ST_Transform({geom0},{epsg}), ' \
                '(SELECT ST_Union(ST_TRANSFORM({geom1},{epsg})) ' \
                'from ({query1}) as q2))'\
            .format(query0=within_queries[0], join=joinstr, geom0=geom0,
                    geom1=geom1, epsg=epsg, query1=within_queries[1])
        return df_from_postgis(first.engine, query, params, geom0, epsg)

    def compute(self):
        if len(self.inputs) != 2:
            raise GaiaException('WithinProcess requires 2 inputs')
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1 and
                       input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
示例#12
0
class IntersectsProcess(GaiaProcess):
    """
    Calculates the features within the first vector dataset that touch
    the features of the second vector dataset.
    """

    required_inputs = (('first', formats.VECTOR), ('second', formats.VECTOR))
    required_args = ()
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(IntersectsProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        first_intersects = first_df[first_df.geometry.intersects(
            second_df.geometry.unary_union)]
        return first_intersects

    def calc_postgis(self):
        int_queries = []
        int_params = []
        first = self.inputs[0]
        geom0 = first.geom_column
        epsg = first.epsg
        geom1 = self.inputs[1].geom_column
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            int_queries.append(io_query.rstrip(';'))
            int_params.extend(params)
        joinstr = ' AND ' if 'WHERE' in int_queries[0].upper() else ' WHERE '
        query = '{query0} {join} (SELECT ST_Intersects(ST_Transform(' \
                '{table}.{geom0},{epsg}), ST_Union(ST_Transform(' \
                'q2.{geom1},{epsg}))) from ({query1}) as q2)'\
            .format(query0=int_queries[0], join=joinstr, geom0=geom0,
                    geom1=geom1, epsg=epsg, query1=int_queries[1],
                    table=first.table)
        return df_from_postgis(first.engine,
                               query, int_params, geom0, epsg)

    def compute(self):
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1 and
                       input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
        logger.debug(self.output)
示例#13
0
class IntersectsProcess(GaiaProcess):
    """
    Calculates the features within the first vector dataset that touch
    the features of the second vector dataset.
    """

    required_inputs = (('first', formats.VECTOR), ('second', formats.VECTOR))
    required_args = ()
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(IntersectsProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        first_intersects = first_df[first_df.geometry.intersects(
            second_df.geometry.unary_union)]
        return first_intersects

    def calc_postgis(self):
        int_queries = []
        int_params = []
        first = self.inputs[0]
        geom0 = first.geom_column
        epsg = first.epsg
        geom1 = self.inputs[1].geom_column
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            int_queries.append(io_query.rstrip(';'))
            int_params.extend(params)
        joinstr = ' AND ' if 'WHERE' in int_queries[0].upper() else ' WHERE '
        query = '{query0} {join} (SELECT ST_Intersects(ST_Transform(' \
                '{table}.{geom0},{epsg}), ST_Union(ST_Transform(' \
                'q2.{geom1},{epsg}))) from ({query1}) as q2)'\
            .format(query0=int_queries[0], join=joinstr, geom0=geom0,
                    geom1=geom1, epsg=epsg, query1=int_queries[1],
                    table=first.table)
        return df_from_postgis(first.engine, query, int_params, geom0, epsg)

    def compute(self):
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1
                       and input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
        logger.debug(self.output)
示例#14
0
 def test_within(self):
     """
     Test WithinProcess for vector inputs
     """
     vector1_io = VectorFileIO(
         uri=os.path.join(testfile_path, 'iraq_hospitals.geojson'))
     vector2_io = VectorFileIO(
         uri=os.path.join(testfile_path, 'baghdad_districts.geojson'))
     process = geo.WithinProcess(inputs=[vector1_io, vector2_io])
     try:
         process.compute()
         self.assertEquals(len(process.output.data), 19)
     finally:
         if process:
             process.purge()
示例#15
0
    def compute(self):
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        first_df = self.inputs[0].read()
        col = self.var_col
        adjust_by_col = self.adjust_by_col

        # filter out null fields or else weight functions won't work
        keep = first_df[col].notnull()
        filtered_df = first_df[keep].reset_index()

        # get Local Moran's I
        f = np.array(filtered_df[col])
        w = wt.gpd_contiguity(filtered_df)
        if adjust_by_col:
            adjust_by = np.array(filtered_df[adjust_by_col])
            lm = pysal.esda.moran.Moran_Local_Rate(e=f, b=adjust_by, w=w,
                                                   permutations=9999)
        else:
            lm = pysal.Moran_Local(y=f, w=w, permutations=9999)

        sig = lm.p_sim < 0.05
        filtered_df['lm_sig'] = sig
        filtered_df['lm_p_sim'] = lm.p_sim
        filtered_df['lm_q'] = lm.q
        filtered_df['lm_Is'] = lm.Is

        self.output.data = filtered_df
        self.output.write()
        logger.debug(self.output)
示例#16
0
 def __init__(self, inputs=None, buffer_size=None, **kwargs):
     super(BufferProcess, self).__init__(inputs, **kwargs)
     self.buffer_size = buffer_size
     if not self.output:
         self.output = VectorFileIO(name='result',
                                    uri=self.get_outpath())
     self.validate()
示例#17
0
    def compute(self):
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        for input in self.inputs:
            if input.name == 'input':
                first_df = input.read()
        col = self.var_col
        adjust_by_col = self.adjust_by_col
        permutations = self.permutations
        if not permutations:
            permutations = 999

        # filter out null fields
        keep = first_df[col].notnull()
        filtered_df = first_df[keep]

        # get Global Moran's I
        f = np.array(filtered_df[col])
        w = wt.gpd_contiguity(filtered_df)
        if adjust_by_col:
            adjust_by = np.array(filtered_df[adjust_by_col])
            mi = pysal.esda.moran.Moran_Rate(e=f, b=adjust_by, w=w,
                                             permutations=permutations)
        else:
            mi = pysal.Moran(y=f, w=w, permutations=permutations)

        keep = ['I', 'EI', 'p_norm', 'EI_sim', 'p_sim', 'z_sim', 'p_z_sim']
        mi_dict = {k: getattr(mi, k) for k in keep}

        self.output.data = mi_dict
        self.output.write()
        logger.debug(self.output)
示例#18
0
 def __init__(self, distance=None, **kwargs):
     super(NearProcess, self).__init__(**kwargs)
     self.distance = distance
     if not self.output:
         self.output = VectorFileIO(name='result',
                                    uri=self.get_outpath())
     self.validate()
示例#19
0
class CentroidProcess(GaiaProcess):
    """
    Calculates the centroid point of a vector dataset.
    """

    required_inputs = (('first', formats.VECTOR), )
    required_args = ()
    default_output = formats.JSON

    def __init__(self, combined=False, **kwargs):
        super(CentroidProcess, self).__init__(**kwargs)
        self.combined = combined
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())

    def calc_pandas(self):
        df_in = self.inputs[0].read()
        df = GeoDataFrame(df_in.copy(), geometry=df_in.geometry.name)
        if self.combined:
            gs = GeoSeries(df.geometry.unary_union.centroid,
                           name=df_in.geometry.name)
            return GeoDataFrame(gs)
        else:
            df[df.geometry.name] = df.geometry.centroid
            return df

    def calc_postgis(self):
        pg_io = self.inputs[0]
        io_query, params = pg_io.get_query()
        geom0, epsg = pg_io.geom_column, pg_io.epsg
        if self.combined:
            query = 'SELECT ST_Centroid(ST_Union({geom})) as {geom}' \
                    ' from ({query}) as foo'.format(geom=geom0,
                                                    query=io_query.rstrip(';'))
        else:
            query = re.sub(
                '"{}"'.format(geom0),
                'ST_Centroid("{geom}") as {geom}'.format(geom=geom0), io_query,
                1)
        return df_from_postgis(pg_io.engine, query, params, geom0, epsg)

    def compute(self):
        use_postgis = self.inputs[0].__class__.__name__ == 'PostgisIO'
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
        logger.debug(self.output)
示例#20
0
class CentroidProcess(GaiaProcess):
    """
    Calculates the centroid point of a vector dataset.
    """

    required_inputs = (('first', formats.VECTOR),)
    required_args = ()
    default_output = formats.JSON

    def __init__(self, combined=False, **kwargs):
        super(CentroidProcess, self).__init__(**kwargs)
        self.combined = combined
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())

    def calc_pandas(self):
        df_in = self.inputs[0].read()
        df = GeoDataFrame(df_in.copy(), geometry=df_in.geometry.name)
        if self.combined:
            gs = GeoSeries(df.geometry.unary_union.centroid,
                           name=df_in.geometry.name)
            return GeoDataFrame(gs)
        else:
            df[df.geometry.name] = df.geometry.centroid
            return df

    def calc_postgis(self):
        pg_io = self.inputs[0]
        io_query, params = pg_io.get_query()
        geom0, epsg = pg_io.geom_column, pg_io.epsg
        if self.combined:
            query = 'SELECT ST_Centroid(ST_Union({geom})) as {geom}' \
                    ' from ({query}) as foo'.format(geom=geom0,
                                                    query=io_query.rstrip(';'))
        else:
            query = re.sub('"{}"'.format(geom0),
                           'ST_Centroid("{geom}") as {geom}'.format(
                               geom=geom0), io_query, 1)
        return df_from_postgis(pg_io.engine, query, params, geom0, epsg)

    def compute(self):
        use_postgis = self.inputs[0].__class__.__name__ == 'PostgisIO'
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
        logger.debug(self.output)
示例#21
0
 def compute(self):
     if not self.output:
         self.output = VectorFileIO(name='result', uri=self.get_outpath())
     for input in self.inputs:
         if input.name == 'input':
             first_df = input.read()
     weight_type = self.weight_type
     if weight_type == 'contiguity':
         w = wt.gpd_contiguity(first_df)
     elif weight_type == 'knnW':
         w = wt.gpd_knnW(first_df)
     elif weight_type == 'distanceBandW':
         w = wt.gpd_distanceBandW(first_df)
     elif weight_type == 'kernel':
         w = wt.gpd_kernel(first_df)
     # TODO: add params related to dif weight types
     else:
         print(u'weight type {0} not available'.format(weight_type))
     self.output.data = w
     self.output.write()
     logger.debug(self.output)
示例#22
0
 def test_intersect(self):
     """
     Test IntersectsProcess for vector inputs
     """
     vector1_io = VectorFileIO(
         uri=os.path.join(testfile_path, 'baghdad_districts.geojson'))
     vector2_io = VectorFileIO(
         uri=os.path.join(testfile_path, 'iraq_roads.geojson'))
     process = geo.IntersectsProcess(inputs=[vector1_io, vector2_io])
     try:
         process.compute()
         with open(
                 os.path.join(testfile_path,
                              'intersects_process_results.json')) as exp:
             expected_json = json.load(exp)
         actual_json = json.loads(process.output.read(format=formats.JSON))
         self.assertEquals(len(expected_json['features']),
                           len(actual_json['features']))
     finally:
         if process:
             process.purge()
示例#23
0
 def test_union(self):
     """
     Test UnionProcess for vector inputs
     """
     vector1_io = VectorFileIO(uri=os.path.join(
         testfile_path, 'baghdad_districts.geojson'),
                               filters=[('NNAME', 'contains', '^A')])
     vector2_io = VectorFileIO(uri=os.path.join(
         testfile_path, 'baghdad_districts.geojson'),
                               filters=[('NNAME', 'contains', '^B')])
     process = geo.UnionProcess(inputs=[vector1_io, vector2_io])
     try:
         process.compute()
         with open(os.path.join(testfile_path,
                                'union_process_results.json')) as exp:
             expected_json = json.load(exp)
         actual_json = json.loads(process.output.read(format=formats.JSON))
         self.assertEquals(len(expected_json['features']),
                           len(actual_json['features']))
     finally:
         if process:
             process.purge()
示例#24
0
class WeightProcess(GaiaProcess):
    """
    Calculate spatial weight.
    weight_type available includes: contiguity, knnW, distanceBandW, kernel
    """
    required_inputs = (('input', formats.VECTOR),)
    required_args = ('weight_type')
    default_output = formats.WEIGHT

    def __init__(self, weight_type, **kwargs):
        self.weight_type = weight_type
        super(WeightProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = WeightFileIO(name='result',
                                       uri=self.get_outpath())

    def compute(self):
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        for input in self.inputs:
            if input.name == 'input':
                first_df = input.read()
        weight_type = self.weight_type
        if weight_type == 'contiguity':
            w = wt.gpd_contiguity(first_df)
        elif weight_type == 'knnW':
            w = wt.gpd_knnW(first_df)
        elif weight_type == 'distanceBandW':
            w = wt.gpd_distanceBandW(first_df)
        elif weight_type == 'kernel':
            w = wt.gpd_kernel(first_df)
        # TODO: add params related to dif weight types
        else:
            print(u'weight type {0} not available'.format(weight_type))
        self.output.data = w
        self.output.write()
        logger.debug(self.output)
示例#25
0
class ZonalStatsProcess(GaiaProcess):
    """
    Calculates statistical values from a raster dataset for each polygon
    in a vector dataset.
    """
    required_inputs = (('raster', formats.RASTER), ('zones', formats.VECTOR),)
    required_args = ()
    default_output = formats.VECTOR

    def __init__(self, **kwargs):
        super(ZonalStatsProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        self.validate()

    def compute(self):
        self.output.create_output_dir(self.output.uri)
        features = gdal_zonalstats(
            self.inputs[1].read(format=formats.JSON,
                                epsg=self.inputs[0].get_epsg()),
            self.inputs[0].read())
        self.output.data = GeoDataFrame.from_features(features)
        self.output.write()
示例#26
0
class WeightProcess(GaiaProcess):
    """
    Calculate spatial weight.
    weight_type available includes: contiguity, knnW, distanceBandW, kernel
    """
    required_inputs = (('input', formats.VECTOR), )
    required_args = ('weight_type')
    default_output = formats.WEIGHT

    def __init__(self, weight_type, **kwargs):
        self.weight_type = weight_type
        super(WeightProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = WeightFileIO(name='result', uri=self.get_outpath())

    def compute(self):
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        for input in self.inputs:
            if input.name == 'input':
                first_df = input.read()
        weight_type = self.weight_type
        if weight_type == 'contiguity':
            w = wt.gpd_contiguity(first_df)
        elif weight_type == 'knnW':
            w = wt.gpd_knnW(first_df)
        elif weight_type == 'distanceBandW':
            w = wt.gpd_distanceBandW(first_df)
        elif weight_type == 'kernel':
            w = wt.gpd_kernel(first_df)
        # TODO: add params related to dif weight types
        else:
            print(u'weight type {0} not available'.format(weight_type))
        self.output.data = w
        self.output.write()
        logger.debug(self.output)
示例#27
0
 def test_weight(self):
     """
     Test WeightProcess for vector inputs
     """
     vector_io = VectorFileIO(name='input',
                              uri=os.path.join(testfile_path,
                                               'baghdad_hospitals.geojson'))
     process = geo.WeightProcess('knnW', inputs=[vector_io])
     try:
         process.compute()
         exp = pysal.open(
             os.path.join(testfile_path, 'weight_process_result.gal'), 'r')
         expected_w = exp.read()
         exp.close()
         actual = process.output.read(format=formats.WEIGHT)
         self.assertEquals(expected_w.n, actual.n)
     finally:
         if process:
             process.purge()
示例#28
0
 def test_cluster(self):
     """
     Test ClusterProcess for vector inputs
     """
     vector_io = VectorFileIO(name='input',
                              uri=os.path.join(testfile_path,
                                               'baghdad_hospitals.geojson'))
     process = geo.ClusterProcess('num_hospitals', inputs=[vector_io])
     try:
         process.compute()
         with open(
                 os.path.join(testfile_path,
                              'cluster_process_results.json')) as exp:
             expected_json = json.load(exp)
         actual_json = json.loads(process.output.read(format=formats.JSON))
         self.assertEquals(len(expected_json['features']),
                           len(actual_json['features']))
     finally:
         if process:
             process.purge()
示例#29
0
 def test_length(self):
     """
     Test LengthProcess for vector inputs
     """
     vector_roads = VectorFileIO(uri=os.path.join(testfile_path,
                                                  'iraq_roads.geojson'),
                                 filters=[('type', '=', 'motorway'),
                                          ('bridge', '=', 1)])
     process = geo.LengthProcess(inputs=[vector_roads])
     try:
         process.compute()
         with open(
                 os.path.join(testfile_path,
                              'length_process_results.json')) as exp:
             expected_json = json.load(exp)
         actual_json = json.loads(process.output.read(format=formats.JSON))
         self.assertEquals(len(expected_json['features']),
                           len(actual_json['features']))
     finally:
         if process:
             process.purge()
示例#30
0
 def test_autocorrelation(self):
     """
     Test AutocorrelationProcess for vector inputs
     """
     vector_io = VectorFileIO(name='input',
                              uri=os.path.join(testfile_path,
                                               'baghdad_hospitals.geojson'))
     process = geo.AutocorrelationProcess('num_hospitals',
                                          inputs=[vector_io])
     try:
         process.compute()
         with open(
                 os.path.join(
                     testfile_path,
                     'autocorrelation_process_results.json')) as exp:
             expected_json = json.load(exp)
         actual_json = process.output.read(format=formats.JSON)
         self.assertEquals(expected_json['I'], actual_json['I'])
     finally:
         if process:
             process.purge()
示例#31
0
 def compute(self):
     if not self.output:
         self.output = VectorFileIO(name='result',
                                    uri=self.get_outpath())
     for input in self.inputs:
         if input.name == 'input':
             first_df = input.read()
     weight_type = self.weight_type
     if weight_type == 'contiguity':
         w = wt.gpd_contiguity(first_df)
     elif weight_type == 'knnW':
         w = wt.gpd_knnW(first_df)
     elif weight_type == 'distanceBandW':
         w = wt.gpd_distanceBandW(first_df)
     elif weight_type == 'kernel':
         w = wt.gpd_kernel(first_df)
     # TODO: add params related to dif weight types
     else:
         print(u'weight type {0} not available'.format(weight_type))
     self.output.data = w
     self.output.write()
     logger.debug(self.output)
示例#32
0
    def test_subset_raster(self):
        """
        Test SubsetProcess for vector & raster inputs
        """
        zipfile = ZipFile(os.path.join(testfile_path, '2states.zip'), 'r')
        zipfile.extract('2states.geojson', testfile_path)

        vector_io = VectorFileIO(
            uri=os.path.join(testfile_path, '2states.geojson'))
        raster_io = RasterFileIO(
            uri=os.path.join(testfile_path, 'globalairtemp.tif'))
        process = geo.SubsetProcess(inputs=[raster_io, vector_io])
        try:
            process.compute()
            self.assertEquals(type(process.output.data).__name__, 'Dataset')
            self.assertTrue(os.path.exists(process.output.uri))
            self.assertIsNotNone(process.id)
            self.assertIn(process.id, process.output.uri)
        finally:
            testfile = os.path.join(testfile_path, '2states.geojson')
            if os.path.exists(testfile):
                os.remove(testfile)
            if process:
                process.purge()
示例#33
0
class BufferProcess(GaiaProcess):
    """
    Generates a buffer polygon around the geometries of the input data.
    The size of the buffer is determined by the 'buffer_size' args key
    and the unit of measure should be meters.  If inputs are not in a
    metric projection they will be reprojected to EPSG:3857.
    """
    required_inputs = (('input', formats.VECTOR), )
    required_args = ('buffer_size', )
    default_output = formats.JSON

    def __init__(self, inputs=None, buffer_size=None, **kwargs):
        super(BufferProcess, self).__init__(inputs, **kwargs)
        self.buffer_size = buffer_size
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        featureio = self.inputs[0]
        original_projection = featureio.get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        feature_df = featureio.read(epsg=epsg)
        buffer = GeoSeries(feature_df.buffer(self.buffer_size).unary_union)
        buffer_df = GeoDataFrame(geometry=buffer)
        buffer_df.crs = feature_df.crs
        if original_projection:
            buffer_df[buffer_df.geometry.name] = buffer_df.to_crs(
                epsg=original_projection)
            buffer_df.crs = fiona.crs.from_epsg(original_projection)
        return buffer_df

    def calc_postgis(self):
        pg_io = self.inputs[0]
        original_projection = pg_io.epsg
        io_query, params = pg_io.get_query()
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))

        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            geom_query = 'ST_Transform({}, {})'.format(pg_io.geom_column, 3857)
        else:
            original_projection = None
        buffer_query = 'ST_Union(ST_Buffer({}, %s))'.format(geom_query)
        if original_projection:
            buffer_query = 'ST_Transform({}, {})'.format(
                buffer_query, original_projection)

        query = 'SELECT {buffer} as {geocol} ' \
                'FROM ({query}) as foo'.format(buffer=buffer_query,
                                               geocol=pg_io.geom_column,
                                               query=io_query.rstrip(';'))
        params.insert(0, self.buffer_size)
        logger.debug(query)
        return df_from_postgis(pg_io.engine, query, params, pg_io.geom_column,
                               pg_io.epsg)

    def compute(self):
        if self.inputs[0].__class__.__name__ == 'PostgisIO':
            data = self.calc_postgis()
        else:
            data = self.calc_pandas()
        self.output.data = data
        self.output.write()
示例#34
0
 def __init__(self, inputs=None, buffer_size=None, **kwargs):
     super(BufferProcess, self).__init__(inputs, **kwargs)
     self.buffer_size = buffer_size
     if not self.output:
         self.output = VectorFileIO(name='result', uri=self.get_outpath())
     self.validate()
示例#35
0
class NearProcess(GaiaProcess):
    """
    Takes two inputs, the second assumed to contain a single feature,
    the first a vector dataset. Requires a distance argument, and the unit of
    measure should be meters.  If inputs are not in a
    metric projection they will be reprojected to EPSG:3857.
    Returns the features in the second input within a specified distance
    of the point in the first input.

    """
    required_inputs = (('first', formats.VECTOR), ('second', formats.VECTOR))
    required_args = ('distance',)
    default_output = formats.JSON

    def __init__(self, distance=None, **kwargs):
        super(NearProcess, self).__init__(**kwargs)
        self.distance = distance
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        features = self.inputs[0]
        original_projection = self.inputs[0].get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        features_df = features.read(epsg=epsg)
        features_gs = features_df.geometry
        point_df = self.inputs[1].read(epsg=epsg)[:1]
        point_gs = point_df.geometry
        features_length = len(features_gs)
        min_dist = np.empty(features_length)
        for i, feature in enumerate(features_gs):
            min_dist[i] = np.min([feature.distance(point_gs[0])])

        nearby_df = GeoDataFrame.copy(features_df)
        nearby_df['distance'] = min_dist
        distance_max = self.distance
        nearby_df = nearby_df[(nearby_df['distance'] <= distance_max)]\
            .sort_values('distance')
        if original_projection:
            nearby_df[nearby_df.geometry.name] = \
                nearby_df.geometry.to_crs(epsg=original_projection)
        return nearby_df

    def calc_postgis(self):
        """
        Uses DWithin plus K-Nearest Neighbor (KNN) query
        """
        featureio = self.inputs[0]
        pointio = self.inputs[1]
        feature_geom, epsg = featureio.geom_column, featureio.epsg
        point_json = json.loads(pointio.read(
            format=formats.JSON))['features'][0]
        point_epsg = pointio.get_epsg()

        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(epsg))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857

        io_query, params = featureio.get_query()

        point_geom = 'ST_Transform(ST_SetSRID(ST_GeomFromGeoJSON(\'' \
                     '{geojson}\'),{point_epsg}), {epsg})'.\
            format(geojson=json.dumps(point_json['geometry']),
                   point_epsg=point_epsg, epsg=epsg)

        dist1 = """, (SELECT ST_Distance(
                ST_Transform({table0}.{geom0},{epsg}),
                ST_Transform(point, {epsg}))
                FROM {point_geom} as point
                ORDER BY {table0}.{geom0} <#> point LIMIT 1) as distance FROM
                """.format(table0=featureio.table,
                           geom0=feature_geom,
                           point_geom=point_geom,
                           epsg=epsg)

        dist2 = """
                WHERE ST_DWithin({point_geom},
                ST_Transform({table0}.{geom0},{epsg}), {distance})
                """.format(table0=featureio.table,
                           geom0=feature_geom,
                           point_geom=point_geom,
                           epsg=epsg,
                           distance=self.distance)

        dist3 = ' ORDER BY distance ASC'
        query = re.sub('FROM', dist1, io_query).rstrip(';')
        if 'WHERE' in query:
            query = re.sub('WHERE', dist2 + ' AND ', query)
        else:
            query += dist2
        query += dist3
        logger.debug(query)
        return df_from_postgis(featureio.engine,
                               query, params, feature_geom, epsg)

    def compute(self):
        if self.inputs[0].__class__.__name__ == 'PostgisIO':
            data = self.calc_postgis()
        else:
            data = self.calc_pandas()
        self.output.data = data
        self.output.write()
示例#36
0
class BufferProcess(GaiaProcess):
    """
    Generates a buffer polygon around the geometries of the input data.
    The size of the buffer is determined by the 'buffer_size' args key
    and the unit of measure should be meters.  If inputs are not in a
    metric projection they will be reprojected to EPSG:3857.
    """
    required_inputs = (('input', formats.VECTOR),)
    required_args = ('buffer_size',)
    default_output = formats.JSON

    def __init__(self, inputs=None, buffer_size=None, **kwargs):
        super(BufferProcess, self).__init__(inputs, **kwargs)
        self.buffer_size = buffer_size
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        featureio = self.inputs[0]
        original_projection = featureio.get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        feature_df = featureio.read(epsg=epsg)
        buffer = GeoSeries(feature_df.buffer(self.buffer_size).unary_union)
        buffer_df = GeoDataFrame(geometry=buffer)
        buffer_df.crs = feature_df.crs
        if original_projection:
            buffer_df[buffer_df.geometry.name] = buffer_df.to_crs(
                epsg=original_projection)
            buffer_df.crs = fiona.crs.from_epsg(original_projection)
        return buffer_df

    def calc_postgis(self):
        pg_io = self.inputs[0]
        original_projection = pg_io.epsg
        io_query, params = pg_io.get_query()
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))

        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            geom_query = 'ST_Transform({}, {})'.format(
                pg_io.geom_column, 3857)
        else:
            original_projection = None
        buffer_query = 'ST_Union(ST_Buffer({}, %s))'.format(geom_query)
        if original_projection:
            buffer_query = 'ST_Transform({}, {})'.format(buffer_query,
                                                         original_projection)

        query = 'SELECT {buffer} as {geocol} ' \
                'FROM ({query}) as foo'.format(buffer=buffer_query,
                                               geocol=pg_io.geom_column,
                                               query=io_query.rstrip(';'))
        params.insert(0, self.buffer_size)
        logger.debug(query)
        return df_from_postgis(pg_io.engine, query, params,
                               pg_io.geom_column, pg_io.epsg)

    def compute(self):
        if self.inputs[0].__class__.__name__ == 'PostgisIO':
            data = self.calc_postgis()
        else:
            data = self.calc_pandas()
        self.output.data = data
        self.output.write()
示例#37
0
class DistanceProcess(GaiaProcess):
    """
    Calculates the minimum distance from each feature of the first dataset
    to the nearest feature of the second dataset.
    """
    required_inputs = (('first', formats.VECTOR), ('second', formats.VECTOR))
    required_args = ()
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(DistanceProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        first = self.inputs[0]
        original_projection = first.get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        first_df = first.read(epsg=epsg)
        first_gs = first_df.geometry
        first_length = len(first_gs)
        second_df = self.inputs[1].read(epsg=epsg)
        second_gs = second_df.geometry
        min_dist = np.empty(first_length)
        for i, first_features in enumerate(first_gs):
            min_dist[i] = np.min([first_features.distance(second_features)
                                  for second_features in second_gs])

        distance_df = GeoDataFrame.copy(first_df)
        distance_df['distance'] = min_dist
        distance_df.sort_values('distance', inplace=True)
        if original_projection:
            distance_df[distance_df.geometry.name] = \
                distance_df.geometry.to_crs(epsg=original_projection)
        return distance_df

    def calc_postgis(self):
        """
        Uses K-Nearest Neighbor (KNN) query
        """
        diff_queries = []
        diff_params = []
        first = self.inputs[0]
        geom0, epsg = first.geom_column, first.epsg
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(epsg))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857

        geom1 = self.inputs[1].geom_column
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            diff_queries.append(io_query.rstrip(';'))
            diff_params.insert(0, params)

        diff_params = [item for x in diff_params for item in x]
        dist1 = """, (SELECT ST_Distance(
                ST_Transform({table0}.{geom0},{epsg}),
                ST_Transform(query2.{geom1},{epsg}))
                as distance
                """.format(table0=self.inputs[0].table,
                           geom0=geom0,
                           geom1=geom1,
                           epsg=epsg)

        dist2 = """
                ORDER BY {table0}.{geom0} <#> query2.{geom1} LIMIT 1) FROM
                """.format(table0=self.inputs[0].table,
                           geom0=geom0,
                           geom1=geom1,
                           epsg=epsg)

        dist3 = ' ORDER BY distance ASC'
        query = re.sub('FROM', dist1 + ' FROM (' + diff_queries[1] +
                       ') as query2 ' + dist2, diff_queries[0]) + dist3
        return df_from_postgis(first.engine, query, diff_params, geom0, epsg)

    def compute(self):
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1 and
                       input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
示例#38
0
 def __init__(self, combined=False, **kwargs):
     super(CentroidProcess, self).__init__(**kwargs)
     self.combined = combined
     if not self.output:
         self.output = VectorFileIO(name='result',
                                    uri=self.get_outpath())
示例#39
0
 def __init__(self, distance=None, **kwargs):
     super(NearProcess, self).__init__(**kwargs)
     self.distance = distance
     if not self.output:
         self.output = VectorFileIO(name='result', uri=self.get_outpath())
     self.validate()
示例#40
0
 def __init__(self, **kwargs):
     super(ZonalStatsProcess, self).__init__(**kwargs)
     if not self.output:
         self.output = VectorFileIO(name='result', uri=self.get_outpath())
     self.validate()
示例#41
0
class AutocorrelationProcess(GaiaProcess):
    """
    Calculate Moran's I global autocorrelation for the input data.
    Default number of permutations = 999
    Uses contiguity weight (queen) by default.
    https://pysal.readthedocs.org/en/latest/users/tutorials/autocorrelation.html#moran-s-i
    http://pysal.readthedocs.org/en/latest/library/esda/moran.html

    Returns the following Moran's I attributes as json:
    I: float, value of Moran's I
    EI: float, expected value of I under normality assumption
    p_norm: float, p-value of I under normality assumption
    EI_sim: float, average value of I from permutations
    p_sim: array, p-value based on permutations (one-tailed)
    z_sim: float, standardized I based on permutations
    p_z_sim: float, p-value based on standard normal approximation
    from permutations
    """
    required_inputs = (('input', formats.VECTOR), )
    required_args = ('var_col')
    optional_args = ('adjust_by_col', 'permutations')
    default_output = formats.JSON
    adjust_by_col = None
    permutations = None

    def __init__(self, var_col, **kwargs):
        self.var_col = var_col
        super(AutocorrelationProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = JsonFileIO(name='result', uri=self.get_outpath())

    def compute(self):
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        for input in self.inputs:
            if input.name == 'input':
                first_df = input.read()
        col = self.var_col
        adjust_by_col = self.adjust_by_col
        permutations = self.permutations
        if not permutations:
            permutations = 999

        # filter out null fields
        keep = first_df[col].notnull()
        filtered_df = first_df[keep]

        # get Global Moran's I
        f = np.array(filtered_df[col])
        w = wt.gpd_contiguity(filtered_df)
        if adjust_by_col:
            adjust_by = np.array(filtered_df[adjust_by_col])
            mi = pysal.esda.moran.Moran_Rate(e=f,
                                             b=adjust_by,
                                             w=w,
                                             permutations=permutations)
        else:
            mi = pysal.Moran(y=f, w=w, permutations=permutations)

        keep = ['I', 'EI', 'p_norm', 'EI_sim', 'p_sim', 'z_sim', 'p_z_sim']
        mi_dict = {k: getattr(mi, k) for k in keep}

        self.output.data = mi_dict
        self.output.write()
        logger.debug(self.output)
示例#42
0
 def __init__(self, var_col, **kwargs):
     self.var_col = var_col
     super(AutocorrelationProcess, self).__init__(**kwargs)
     if not self.output:
         self.output = JsonFileIO(name='result', uri=self.get_outpath())
示例#43
0
 def __init__(self, weight_type, **kwargs):
     self.weight_type = weight_type
     super(WeightProcess, self).__init__(**kwargs)
     if not self.output:
         self.output = WeightFileIO(name='result',
                                    uri=self.get_outpath())
示例#44
0
 def __init__(self, var_col, **kwargs):
     self.var_col = var_col
     super(AutocorrelationProcess, self).__init__(**kwargs)
     if not self.output:
         self.output = JsonFileIO(name='result',
                                  uri=self.get_outpath())
示例#45
0
class NearProcess(GaiaProcess):
    """
    Takes two inputs, the second assumed to contain a single feature,
    the first a vector dataset. Requires a distance argument, and the unit of
    measure should be meters.  If inputs are not in a
    metric projection they will be reprojected to EPSG:3857.
    Returns the features in the second input within a specified distance
    of the point in the first input.

    """
    required_inputs = (('first', formats.VECTOR), ('second', formats.VECTOR))
    required_args = ('distance', )
    default_output = formats.JSON

    def __init__(self, distance=None, **kwargs):
        super(NearProcess, self).__init__(**kwargs)
        self.distance = distance
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        features = self.inputs[0]
        original_projection = self.inputs[0].get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        features_df = features.read(epsg=epsg)
        features_gs = features_df.geometry
        point_df = self.inputs[1].read(epsg=epsg)[:1]
        point_gs = point_df.geometry
        features_length = len(features_gs)
        min_dist = np.empty(features_length)
        for i, feature in enumerate(features_gs):
            min_dist[i] = np.min([feature.distance(point_gs[0])])

        nearby_df = GeoDataFrame.copy(features_df)
        nearby_df['distance'] = min_dist
        distance_max = self.distance
        nearby_df = nearby_df[(nearby_df['distance'] <= distance_max)]\
            .sort_values('distance')
        if original_projection:
            nearby_df[nearby_df.geometry.name] = \
                nearby_df.geometry.to_crs(epsg=original_projection)
        return nearby_df

    def calc_postgis(self):
        """
        Uses DWithin plus K-Nearest Neighbor (KNN) query
        """
        featureio = self.inputs[0]
        pointio = self.inputs[1]
        feature_geom, epsg = featureio.geom_column, featureio.epsg
        point_json = json.loads(
            pointio.read(format=formats.JSON))['features'][0]
        point_epsg = pointio.get_epsg()

        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(epsg))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857

        io_query, params = featureio.get_query()

        point_geom = 'ST_Transform(ST_SetSRID(ST_GeomFromGeoJSON(\'' \
                     '{geojson}\'),{point_epsg}), {epsg})'.\
            format(geojson=json.dumps(point_json['geometry']),
                   point_epsg=point_epsg, epsg=epsg)

        dist1 = """, (SELECT ST_Distance(
                ST_Transform({table0}.{geom0},{epsg}),
                ST_Transform(point, {epsg}))
                FROM {point_geom} as point
                ORDER BY {table0}.{geom0} <#> point LIMIT 1) as distance FROM
                """.format(table0=featureio.table,
                           geom0=feature_geom,
                           point_geom=point_geom,
                           epsg=epsg)

        dist2 = """
                WHERE ST_DWithin({point_geom},
                ST_Transform({table0}.{geom0},{epsg}), {distance})
                """.format(table0=featureio.table,
                           geom0=feature_geom,
                           point_geom=point_geom,
                           epsg=epsg,
                           distance=self.distance)

        dist3 = ' ORDER BY distance ASC'
        query = re.sub('FROM', dist1, io_query).rstrip(';')
        if 'WHERE' in query:
            query = re.sub('WHERE', dist2 + ' AND ', query)
        else:
            query += dist2
        query += dist3
        logger.debug(query)
        return df_from_postgis(featureio.engine, query, params, feature_geom,
                               epsg)

    def compute(self):
        if self.inputs[0].__class__.__name__ == 'PostgisIO':
            data = self.calc_postgis()
        else:
            data = self.calc_pandas()
        self.output.data = data
        self.output.write()
示例#46
0
class EqualsProcess(GaiaProcess):
    """
    Calculates the features within the first vector dataset that touch
    the features of the second vector dataset.
    """

    required_inputs = (('first', formats.VECTOR), ('second', formats.VECTOR))
    required_args = ()
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(EqualsProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        first_gs = first_df.geometry
        first_length = len(first_gs)
        second_gs = second_df.geometry
        matches = np.empty(first_length)
        for i, first_features in enumerate(first_gs):
            matched = [first_features.equals(second_features)
                       for second_features in second_gs]
            matches[i] = True if (True in matched) else False
        output_df = GeoDataFrame.copy(first_df)
        output_df['equals'] = matches
        output_df = output_df[
            (output_df['equals'] == 1)].drop('equals', 1)
        return output_df

    def calc_postgis(self):
        equals_queries = []
        equals_params = []
        first = self.inputs[0]
        geom0, epsg = first.geom_column, first.epsg
        geom1 = self.inputs[1].geom_column
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            equals_queries.append(io_query.rstrip(';'))
            equals_params.extend(params)
        joinstr = ' AND ' if 'WHERE' in equals_queries[0].upper() else ' WHERE '
        query = '{query0} {join} {geom0} IN (SELECT {geom1} ' \
                'FROM ({query1}) as second)'.format(query0=equals_queries[0],
                                                    query1=equals_queries[1],
                                                    join=joinstr,
                                                    geom0=geom0,
                                                    geom1=geom1)
        logger.debug(query)
        return df_from_postgis(first.engine, query, equals_params, geom0, epsg)

    def compute(self):
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1 and
                       input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
        logger.debug(self.output)
示例#47
0
class DistanceProcess(GaiaProcess):
    """
    Calculates the minimum distance from each feature of the first dataset
    to the nearest feature of the second dataset.
    """
    required_inputs = (('first', formats.VECTOR), ('second', formats.VECTOR))
    required_args = ()
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(DistanceProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        first = self.inputs[0]
        original_projection = first.get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        first_df = first.read(epsg=epsg)
        first_gs = first_df.geometry
        first_length = len(first_gs)
        second_df = self.inputs[1].read(epsg=epsg)
        second_gs = second_df.geometry
        min_dist = np.empty(first_length)
        for i, first_features in enumerate(first_gs):
            min_dist[i] = np.min([
                first_features.distance(second_features)
                for second_features in second_gs
            ])

        distance_df = GeoDataFrame.copy(first_df)
        distance_df['distance'] = min_dist
        distance_df.sort_values('distance', inplace=True)
        if original_projection:
            distance_df[distance_df.geometry.name] = \
                distance_df.geometry.to_crs(epsg=original_projection)
        return distance_df

    def calc_postgis(self):
        """
        Uses K-Nearest Neighbor (KNN) query
        """
        diff_queries = []
        diff_params = []
        first = self.inputs[0]
        geom0, epsg = first.geom_column, first.epsg
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(epsg))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857

        geom1 = self.inputs[1].geom_column
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            diff_queries.append(io_query.rstrip(';'))
            diff_params.insert(0, params)

        diff_params = [item for x in diff_params for item in x]
        dist1 = """, (SELECT ST_Distance(
                ST_Transform({table0}.{geom0},{epsg}),
                ST_Transform(query2.{geom1},{epsg}))
                as distance
                """.format(table0=self.inputs[0].table,
                           geom0=geom0,
                           geom1=geom1,
                           epsg=epsg)

        dist2 = """
                ORDER BY {table0}.{geom0} <#> query2.{geom1} LIMIT 1) FROM
                """.format(table0=self.inputs[0].table,
                           geom0=geom0,
                           geom1=geom1,
                           epsg=epsg)

        dist3 = ' ORDER BY distance ASC'
        query = re.sub(
            'FROM', dist1 + ' FROM (' + diff_queries[1] + ') as query2 ' +
            dist2, diff_queries[0]) + dist3
        return df_from_postgis(first.engine, query, diff_params, geom0, epsg)

    def compute(self):
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1
                       and input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
示例#48
0
 def __init__(self, **kwargs):
     super(ZonalStatsProcess, self).__init__(**kwargs)
     if not self.output:
         self.output = VectorFileIO(name='result',
                                    uri=self.get_outpath())
     self.validate()
示例#49
0
 def __init__(self, combined=False, **kwargs):
     super(CentroidProcess, self).__init__(**kwargs)
     self.combined = combined
     if not self.output:
         self.output = VectorFileIO(name='result', uri=self.get_outpath())
示例#50
0
 def __init__(self, var_col, **kwargs):
     self.var_col = var_col
     super(ClusterProcess, self).__init__(**kwargs)
     if not self.output:
         self.output = VectorFileIO(name='result',
                                    uri=self.get_outpath())
示例#51
0
class EqualsProcess(GaiaProcess):
    """
    Calculates the features within the first vector dataset that touch
    the features of the second vector dataset.
    """

    required_inputs = (('first', formats.VECTOR), ('second', formats.VECTOR))
    required_args = ()
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(EqualsProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        self.validate()

    def calc_pandas(self):
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        first_gs = first_df.geometry
        first_length = len(first_gs)
        second_gs = second_df.geometry
        matches = np.empty(first_length)
        for i, first_features in enumerate(first_gs):
            matched = [
                first_features.equals(second_features)
                for second_features in second_gs
            ]
            matches[i] = True if (True in matched) else False
        output_df = GeoDataFrame.copy(first_df)
        output_df['equals'] = matches
        output_df = output_df[(output_df['equals'] == 1)].drop('equals', 1)
        return output_df

    def calc_postgis(self):
        equals_queries = []
        equals_params = []
        first = self.inputs[0]
        geom0, epsg = first.geom_column, first.epsg
        geom1 = self.inputs[1].geom_column
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            equals_queries.append(io_query.rstrip(';'))
            equals_params.extend(params)
        joinstr = ' AND ' if 'WHERE' in equals_queries[0].upper(
        ) else ' WHERE '
        query = '{query0} {join} {geom0} IN (SELECT {geom1} ' \
                'FROM ({query1}) as second)'.format(query0=equals_queries[0],
                                                    query1=equals_queries[1],
                                                    join=joinstr,
                                                    geom0=geom0,
                                                    geom1=geom1)
        logger.debug(query)
        return df_from_postgis(first.engine, query, equals_params, geom0, epsg)

    def compute(self):
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1
                       and input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
        logger.debug(self.output)
示例#52
0
class AutocorrelationProcess(GaiaProcess):
    """
    Calculate Moran's I global autocorrelation for the input data.
    Default number of permutations = 999
    Uses contiguity weight (queen) by default.
    https://pysal.readthedocs.org/en/latest/users/tutorials/autocorrelation.html#moran-s-i
    http://pysal.readthedocs.org/en/latest/library/esda/moran.html

    Returns the following Moran's I attributes as json:
    I: float, value of Moran's I
    EI: float, expected value of I under normality assumption
    p_norm: float, p-value of I under normality assumption
    EI_sim: float, average value of I from permutations
    p_sim: array, p-value based on permutations (one-tailed)
    z_sim: float, standardized I based on permutations
    p_z_sim: float, p-value based on standard normal approximation
    from permutations
    """
    required_inputs = (('input', formats.VECTOR),)
    required_args = ('var_col')
    optional_args = ('adjust_by_col', 'permutations')
    default_output = formats.JSON
    adjust_by_col = None
    permutations = None

    def __init__(self, var_col, **kwargs):
        self.var_col = var_col
        super(AutocorrelationProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = JsonFileIO(name='result',
                                     uri=self.get_outpath())

    def compute(self):
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())
        for input in self.inputs:
            if input.name == 'input':
                first_df = input.read()
        col = self.var_col
        adjust_by_col = self.adjust_by_col
        permutations = self.permutations
        if not permutations:
            permutations = 999

        # filter out null fields
        keep = first_df[col].notnull()
        filtered_df = first_df[keep]

        # get Global Moran's I
        f = np.array(filtered_df[col])
        w = wt.gpd_contiguity(filtered_df)
        if adjust_by_col:
            adjust_by = np.array(filtered_df[adjust_by_col])
            mi = pysal.esda.moran.Moran_Rate(e=f, b=adjust_by, w=w,
                                             permutations=permutations)
        else:
            mi = pysal.Moran(y=f, w=w, permutations=permutations)

        keep = ['I', 'EI', 'p_norm', 'EI_sim', 'p_sim', 'z_sim', 'p_z_sim']
        mi_dict = {k: getattr(mi, k) for k in keep}

        self.output.data = mi_dict
        self.output.write()
        logger.debug(self.output)
示例#53
0
 def __init__(self, var_col, **kwargs):
     self.var_col = var_col
     super(ClusterProcess, self).__init__(**kwargs)
     if not self.output:
         self.output = VectorFileIO(name='result', uri=self.get_outpath())