Exemplo n.º 1
0
    def _parse_girder_url(url):
        """

        Returns either None or tuple(resource_type, resource_id)
        """
        if url is None:
            raise GaiaException('Internal error - url is None')

        girder_scheme = 'girder://'
        if not url.startswith(girder_scheme):
            return None

        # Extract resource type (file or folder) and id
        start_index = len(girder_scheme)
        path_string = url[start_index:]
        path_list = path_string.split('/')
        # print('path_list: ', path_list)
        if (len(path_list) != 2):
            raise GaiaException('Invalid girder url; path must be length 2')

        resource_type, resource_id = path_list
        if (resource_type not in ['item', 'folder']):
            msg = """Invalid girder url; path must start with either \
                \"item/\" or \"folder/\""""
            raise GaiaException(msg)

        return resource_type, resource_id
Exemplo n.º 2
0
    def can_read(source, *args, **kwargs):
        # For now, support either url (string) or tuple (GirderInterface,path)
        if isinstance(source, str):
            girder_scheme = 'girder://'
            if source is not None and source.startswith(girder_scheme):
                result = GirderReader._parse_girder_url(source)
                if result is None:
                    return False

                # Todo Confirm that resource exists on girder?
                return True

            # (else)
            return False
        else:
            if not isinstance(source, tuple) and not len(source) == 2:
                return False

            gint, path = source
            if not isinstance(gint, GirderInterface):
                return False

            if not isinstance(path, str):
                raise GaiaException('Second tuple element is not a string')

            if not gint.is_initialized():
                msg = """Cannot read girder object; \
                    must first call gaia.connect()"""
                raise GaiaException(msg)

        # (else)
        return True
Exemplo n.º 3
0
    def read(self, **kwargs):
        """Returns a GirderDataset

        Doesn't actally load or move data; it remains on Girder
        Todo: kwargs should probably be a union of raster and vector types,
        that get passed to GirderDataset

        :return: Girder Dataset
        """
        if self.url:
            parsed_result = self.__class__._parse_girder_url(self.url)
            if parsed_result is None:
                raise GaiaException('Internal error - not a girder url')

            resource_type, resource_id = parsed_result
            return GirderDataObject(self, resource_type, resource_id)

        elif self.girder_source:
            gint, path = self.girder_source
            resource = gint.lookup_resource(path)
            if resource is None:
                template = 'File not found on Girder at specified path ({})'
                msg = template.format(path)
                raise GaiaException(msg)

            resource_type = resource['_modelType']
            resource_id = resource['_id']
            return GirderDataObject(self, resource_type, resource_id)

        raise GaiaException(
            'Internal error - should never reach end of GirderReader.read()')
        return None
Exemplo n.º 4
0
def validate_base(inputs,
                  args,
                  required_inputs=[],
                  required_args=[],
                  optional_args=[]):
    """
    Ensure that all required inputs and arguments are present.
    """
    input_types = []
    errors = []

    for procInput in inputs:
        inputDataType = procInput.datatype
        if inputDataType == types.PROCESS:
            for t in [i for i in dir(types) if not i.startswith("__")]:
                if any((True for x in procInput.default_output
                        if x in getattr(formats, t, []))):
                    inputDataType = getattr(types, t)
                    break
        input_types.append(inputDataType)

    for i, req_input in enumerate(required_inputs):
        if i >= len(input_types):
            errors.append("Not enough inputs for process")
        elif req_input['type'] != input_types[i]:
            errors.append("Input #{} is of incorrect type.".format(i + 1))

    if len(input_types) > len(required_inputs):
        if (required_inputs[-1]['max'] is not None and len(input_types) >
                len(required_inputs) + required_inputs[-1]['max'] - 1):
            errors.append("Incorrect # of inputs; expected {}".format(
                len(required_inputs)))
        else:
            for i in range(len(required_inputs) - 1, len(input_types)):
                if input_types[i] != required_inputs[-1]['type']:
                    errors.append("Input #{} is of incorrect type.".format(i +
                                                                           1))
    if errors:
        raise GaiaException('\n'.join(errors))
    for item in required_args:
        arg, arg_type = item['name'], item['type']
        if arg not in args or args[arg] is None:
            raise GaiaException('Missing required argument {}'.format(arg))
        test_arg_type(args, arg, arg_type)
        if 'options' in item and args[arg] not in item['options']:
            raise GaiaException('Invalid value for {}'.format(item['name']))
    for item in optional_args:
        arg, arg_type = item['name'], item['type']
        if arg in optional_args and optional_args[arg] is not None:
            test_arg_type(optional_args, arg, arg_type)
            argval = args[arg]
            if 'options' in item and argval not in item['options']:
                raise GaiaException('Invalid value for {}'.format(
                    item['name']))
Exemplo n.º 5
0
    def validate(self):
        """
        Ensure that all required inputs and arguments are present.
        """
        # for input in self.inputs:
        #     if input.
        input_types = []
        errors = []

        for input in self.inputs:
            type = input.type
            if type == types.PROCESS:
                for t in [i for i in dir(types) if not i.startswith("__")]:
                    if any((True for x in input.default_output if x in getattr(
                            formats, t, []))):
                        type = getattr(types, t)
                        break
            input_types.append(type)

        for i, req_input in enumerate(self.required_inputs):
            if i >= len(input_types):
                errors.append("Not enough inputs for process")
            elif req_input['type'] != input_types[i]:
                errors.append("Input #{} is of incorrect type.".format(i+1))

        if len(input_types) > len(self.required_inputs):
            if (self.required_inputs[-1]['max'] is not None and
                len(input_types) > len(self.required_inputs) +
                    self.required_inputs[-1]['max']-1):
                errors.append("Incorrect # of inputs; expected {}".format(
                    len(self.required_inputs)))
            else:
                for i in range(len(self.required_inputs)-1, len(input_types)):
                    if input_types[i] != self.required_inputs[-1]['type']:
                        errors.append(
                            "Input #{} is of incorrect type.".format(i + 1))
        if errors:
            raise GaiaException('\n'.join(errors))
        for item in self.required_args:
            arg, arg_type = item['name'], item['type']
            if not hasattr(self, arg) or getattr(self, arg) is None:
                raise GaiaException('Missing required argument {}'.format(arg))
            self.test_arg_type(arg, arg_type)
            if 'options' in item and getattr(self, arg) not in item['options']:
                raise GaiaException('Invalid value for {}'.format(item['name']))
        for item in self.optional_args:
            arg, arg_type = item['name'], item['type']
            if hasattr(self, arg) and getattr(self, arg) is not None:
                self.test_arg_type(arg, arg_type)
                argval = getattr(self, arg)
                if 'options' in item and argval not in item['options']:
                    raise GaiaException(
                        'Invalid value for {}'.format(item['name']))
Exemplo n.º 6
0
    def __call__(cls, *args, **kwargs):
        registry = GaiaReaderFactoryMetaclass._registry
        subclass = None
        instance = None

        if id(cls) != id(GaiaReader):
            # Allow for direct subclass instantiation
            instance = cls.__new__(cls, args, kwargs)
        else:
            if 'reader_class' in kwargs:
                classname = kwargs['reader_class']
                if classname in registry:
                    subclass = registry[classname]
            else:
                for classname, classinstance in registry.items():
                    if hasattr(classinstance, 'can_read'):
                        canReadMethod = getattr(classinstance, 'can_read')
                        if canReadMethod(*args, **kwargs):
                            subclass = classinstance
                            # FIXME:
                            break

            if subclass:
                instance = subclass.__new__(subclass, args, kwargs)
            else:
                argsstr = 'args: %s, kwargs: %s' % (args, kwargs)
                msg = 'Unable to find GaiaReader subclass for: %s' % argsstr
                raise GaiaException(msg)

        if instance is not None:
            instance.__init__(*args, **kwargs)

        return instance
Exemplo n.º 7
0
 def transform_data(self, outformat=None, epsg=None):
     """
     Transform the IO data into the requested format and projection if
     necessary.
     :param format: Output format
     :param epsg:
     :return:
     """
     out_data = geopandas.GeoDataFrame.copy(self.data)
     if epsg and str(self.get_epsg()) != epsg:
         out_data[out_data.geometry.name] = \
             self.data.geometry.to_crs(epsg=epsg)
         out_data.crs = fiona.crs.from_epsg(epsg)
     if outformat == formats.JSON and self.default_output in (
             formats.PANDAS, formats.JSON):
         out_json = out_data.to_json()
         if out_data.crs:
             gj = json.loads(out_json)
             gj["crs"] = {
                 "type": "name",
                 "properties": {
                     "name": out_data.crs["init"].upper()
                 }
             }
             return json.dumps(gj)
         else:
             return out_json
     elif outformat in [formats.PANDAS, None]:
         return out_data
     else:
         raise GaiaException("Format {} not supported".format(outformat))
Exemplo n.º 8
0
    def calc_postgis(self):
        """
        Calculate the union using PostGIS

        :return: union result as a GeoDataFrame
        """
        union_queries = []
        union_params = []
        first = self.inputs[0]
        second = self.inputs[1]
        geom0, epsg = first.geom_column, first.epsg
        geom1, epsg1 = second.geom_column, second.epsg
        if ''.join(first.columns) != ''.join(second.columns):
            raise GaiaException('Inputs must have the same columns')
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            union_queries.append(io_query.rstrip(';'))
            union_params.extend(params)

        if epsg1 != epsg:
            geom1_query = 'ST_Transform({},{})'.format(geom1, epsg)
            union_queries[1] = union_queries[1].replace(
                '"{}"'.format(geom1), geom1_query)
        query = '({query0}) UNION ({query1})'\
            .format(query0=union_queries[0], query1=union_queries[1])
        return df_from_postgis(first.engine,
                               query, union_params, geom0, epsg)
Exemplo n.º 9
0
    def validator(inputs=[], args={}):
        # First should check if input is compatible w/ pandas computation
        if type(inputs[0].get_data()) is not GeoDataFrame:
            raise GaiaException('pandas process requires a GeoDataFrame')

        # Otherwise call up the chain to let parent do common validation
        return v(inputs, args)
Exemplo n.º 10
0
    def get_epsg(self):
        """
        Get the EPSG code of the data

        :return: EPSG code (integer)
        """
        if self.data is None:
            self.read()
        if self.data.__class__.__name__ == 'GeoDataFrame':
            if self.data.crs is None:
                # Make educated guess about projection based on longitude coords
                minx = min(self.data.geometry.bounds['minx'])
                maxx = max(self.data.geometry.bounds['maxx'])
                if minx >= -180.0 and maxx <= 180.0:
                    self.epsg = 4326
                    self.data.crs = fiona_crs.from_epsg(self.epsg)
                elif minx >= -20026376.39 and maxx <= 20026376.39:
                    self.epsg = 3857
                    self.data.crs = fiona_crs.from_epsg(self.epsg)
                else:
                    raise GaiaException('Could not determine data projection.')
                return self.epsg
            else:
                crs = self.data.crs.get('init', None)
                if crs:
                    if ':' in crs:
                        crs = crs.split(':')[1]
                    if crs.isdigit():
                        self.epsg = int(crs)
                        return self.epsg
                    # Assume EPSG:4326
                    self.epsg = 4326
                    self.data.crs = fiona_crs.from_epsg(self.epsg)
                    return self.epsg
                else:
                    # Assume EPSG:4326
                    self.epsg = 4326
                    self.data.crs = fiona_crs.from_epsg(self.epsg)
                    return self.epsg
        elif self.data.__class__.__name__ == 'Dataset':
            projection = self.data.GetProjection()
            data_crs = osr.SpatialReference(wkt=projection)
            try:
                self.epsg = int(data_crs.GetAttrValue('AUTHORITY', 1))
                return self.epsg
            except KeyError:
                raise GaiaException("EPSG code coud not be determined")
Exemplo n.º 11
0
def compute(processName, inputs, args):
    """
    Just looks up a process that can do the job and asks it to 'compute'
    """
    processes = find_processes(processName)

    if not processes:
        list_processes(processName)
        raise GaiaException('Unable to find suitable %s process' % processName)

    for p in processes:
        # How will we choose between equally "valid" processes?  For now
        # just return the first one.
        try:
            return p(inputs, args)
        except GaiaException:
            pass

    raise GaiaException('No registered processes were able to validate inputs')
Exemplo n.º 12
0
 def compute(self):
     """
     Run the Within process
     """
     if len(self.inputs) != 2:
         raise GaiaException('WithinProcess requires 2 inputs')
     input_classes = list(self.get_input_classes())
     use_postgis = (len(input_classes) == 1 and
                    input_classes[0] == 'PostgisIO')
     data = self.calc_postgis() if use_postgis else self.calc_pandas()
     self.output.data = data
     self.output.write()
Exemplo n.º 13
0
def test_arg_type(args, arg, arg_type):
    """
    Try to cast a process argument to its required type. Raise an
    exception if not successful.
    :param arg: The argument property
    :param arg_type: The required argument type (int, str, etc)
    """
    try:
        arg_type(args[arg])
    except Exception:
        raise GaiaException('Required argument {} must be of type {}'.format(
            arg, arg_type))
Exemplo n.º 14
0
    def calc_pandas(self):
        """
        Calculate the union using pandas GeoDataFrames

        :return: union result as a GeoDataFrame
        """
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        if ''.join(first_df.columns) != ''.join(second_df.columns):
            raise GaiaException('Inputs must have the same columns')
        uniondf = GeoDataFrame(pd.concat([first_df, second_df]))
        return uniondf
Exemplo n.º 15
0
 def __init__(self, uri='', **kwargs):
     """
     :param uri: Filepath of IO object
     :param kwargs:
     :return:
     """
     if uri and not self.allowed_folder(uri):
         raise GaiaException(
             "Access to this directory is not permitted : {}".format(
                 os.path.dirname(uri)))
     self.uri = uri
     super(FileIO, self).__init__(uri=uri, **kwargs)
     if self.uri:
         self.ext = os.path.splitext(self.uri)[1].lower()
Exemplo n.º 16
0
    def get_epsg(self):
        if not self._epsgComputed:
            if not self._data:
                self.get_data()

            projection = self._data.GetProjection()
            data_crs = osr.SpatialReference(wkt=projection)

            try:
                self.epsg = int(data_crs.GetAttrValue('AUTHORITY', 1))
                self._epsgComputed = True
            except KeyError:
                raise GaiaException("EPSG code coud not be determined")

        return self.epsg
Exemplo n.º 17
0
def gen_zonalstats(zones_json, raster):
    """
    Generator function that yields the statistics of a raster dataset
    within each polygon (zone) of a vector dataset.

    :param zones_json: Polygons in GeoJSON format
    :param raster: Raster dataset
    :return: Polygons with additional properties for calculated raster stats.
    """
    # Open data
    raster = get_dataset(raster)
    if type(zones_json) is str:
        shp = ogr.Open(zones_json)
        zones_json = json.loads(zones_json)
    else:
        shp = ogr.Open(json.dumps(zones_json))

    lyr = shp.GetLayer()

    # Get raster georeference info
    transform = raster.GetGeoTransform()
    xOrigin = transform[0]
    yOrigin = transform[3]
    pixelWidth = transform[1]
    pixelHeight = transform[5]

    # Reproject vector geometry to same projection as raster
    sourceSR = lyr.GetSpatialRef()
    targetSR = osr.SpatialReference()
    targetSR.ImportFromWkt(raster.GetProjectionRef())
    coordTrans = osr.CoordinateTransformation(sourceSR, targetSR)
    # TODO: Use a multiprocessing pool to process features more quickly
    for feature in zones_json['features']:
        geom = ogr.CreateGeometryFromJson(json.dumps(feature['geometry']))
        if sourceSR.ExportToWkt() != targetSR.ExportToWkt():
            geom.Transform(coordTrans)

        # Get extent of feat
        if geom.GetGeometryName() == 'MULTIPOLYGON':
            count = 0
            pointsX = []
            pointsY = []
            for polygon in geom:
                ring = geom.GetGeometryRef(count).GetGeometryRef(0)
                numpoints = ring.GetPointCount()
                for p in range(numpoints):
                    lon, lat, z = ring.GetPoint(p)
                    if abs(lon) != float('inf'):
                        pointsX.append(lon)
                    if abs(lat) != float('inf'):
                        pointsY.append(lat)
                count += 1
        elif geom.GetGeometryName() == 'POLYGON':
            ring = geom.GetGeometryRef(0)
            numpoints = ring.GetPointCount()
            pointsX = []
            pointsY = []
            for p in range(numpoints):
                lon, lat, z = ring.GetPoint(p)
                if abs(lon) != float('inf'):
                    pointsX.append(lon)
                if abs(lat) != float('inf'):
                    pointsY.append(lat)
        else:
            raise GaiaException(
                "ERROR: Geometry needs to be either Polygon or Multipolygon")

        xmin = min(pointsX)
        xmax = max(pointsX)
        ymin = min(pointsY)
        ymax = max(pointsY)

        # Specify offset and rows and columns to read
        xoff = int((xmin - xOrigin) / pixelWidth)
        yoff = int((yOrigin - ymax) / pixelWidth)
        xcount = int((xmax - xmin) / pixelWidth) + 1
        ycount = int((ymax - ymin) / pixelWidth) + 1

        # Create memory target raster
        target_ds = gdal.GetDriverByName('MEM').Create('', xcount, ycount, 1,
                                                       gdal.GDT_Byte)
        target_ds.SetGeoTransform((
            xmin,
            pixelWidth,
            0,
            ymax,
            0,
            pixelHeight,
        ))

        # Create for target raster the same projection as for the value raster
        raster_srs = osr.SpatialReference()
        raster_srs.ImportFromWkt(raster.GetProjectionRef())
        target_ds.SetProjection(raster_srs.ExportToWkt())

        # Rasterize zone polygon to raster
        gdal.RasterizeLayer(target_ds, [1], lyr, burn_values=[1])

        # Read raster as arrays
        banddataraster = raster.GetRasterBand(1)
        try:
            dataraster = banddataraster.ReadAsArray(xoff, yoff, xcount,
                                                    ycount).astype(numpy.float)
        except AttributeError:
            # Nothing within bounds, move on to next polygon
            properties = feature[u'properties']
            for p in [
                    'count', 'sum', 'mean', 'median', 'min', 'max', 'stddev'
            ]:
                properties[p] = None
            yield feature
        else:
            # Get no data value of array
            noDataValue = banddataraster.GetNoDataValue()
            if noDataValue:
                # Updata no data value in array with new value
                dataraster[dataraster == noDataValue] = numpy.nan

            bandmask = target_ds.GetRasterBand(1)
            datamask = bandmask.ReadAsArray(0, 0, xcount,
                                            ycount).astype(numpy.float)

            # Mask zone of raster
            zoneraster = numpy.ma.masked_array(dataraster,
                                               numpy.logical_not(datamask))

            properties = feature['properties']
            properties['count'] = zoneraster.count()
            properties['sum'] = numpy.nansum(zoneraster)
            if type(properties['sum']) == MaskedConstant:
                # No non-null values for raster data in polygon, skip
                for p in ['sum', 'mean', 'median', 'min', 'max', 'stddev']:
                    properties[p] = None
            else:
                properties['mean'] = numpy.nanmean(zoneraster)
                properties['min'] = numpy.nanmin(zoneraster)
                properties['max'] = numpy.nanmax(zoneraster)
                properties['stddev'] = numpy.nanstd(zoneraster)
                median = numpy.ma.median(zoneraster)
                if hasattr(median, 'data') and not numpy.isnan(median.data):
                    properties['median'] = median.data.item()
            yield (feature)