Пример #1
0
 def convert_code(in_file, out_file, in_alg='taudem', out_alg='arcgis', datatype=None):
     """
     convert D8 flow direction code from one algorithm to another.
     Args:
         in_file: input raster file path
         out_file: output raster file path
         in_alg: available algorithms are in FlowModelConst.d8_dirs. "taudem" is the default
         out_alg: same as in_alg. "arcgis" is the default
         datatype: default is None and use the datatype of the in_file
     """
     FileClass.check_file_exists(in_file)
     in_alg = in_alg.lower()
     out_alg = out_alg.lower()
     if in_alg not in FlowModelConst.d8_dirs or out_alg not in FlowModelConst.d8_dirs:
         raise RuntimeError('The input algorithm name should one of %s' %
                            ', '.join(list(FlowModelConst.d8_dirs.keys())))
     convert_dict = dict()
     in_code = FlowModelConst.d8_dirs.get(in_alg)
     out_code = FlowModelConst.d8_dirs.get(out_alg)
     assert len(in_code) == len(out_code)
     for i, tmp_in_code in enumerate(in_code):
         convert_dict[tmp_in_code] = out_code[i]
     if datatype is not None and datatype in GDALDataType:
         RasterUtilClass.raster_reclassify(in_file, convert_dict, out_file, datatype)
     else:
         RasterUtilClass.raster_reclassify(in_file, convert_dict, out_file)
Пример #2
0
 def convert_code(in_file, out_file, in_alg='taudem', out_alg='arcgis', datatype=None):
     """
     convert D8 flow direction code from one algorithm to another.
     Args:
         in_file: input raster file path
         out_file: output raster file path
         in_alg: available algorithms are in FlowModelConst.d8_dirs. "taudem" is the default
         out_alg: same as in_alg. "arcgis" is the default
         datatype: default is None and use the datatype of the in_file
     """
     FileClass.check_file_exists(in_file)
     in_alg = in_alg.lower()
     out_alg = out_alg.lower()
     if in_alg not in FlowModelConst.d8_dirs or out_alg not in FlowModelConst.d8_dirs:
         raise RuntimeError('The input algorithm name should one of %s' %
                            ', '.join(list(FlowModelConst.d8_dirs.keys())))
     convert_dict = dict()
     in_code = FlowModelConst.d8_dirs.get(in_alg)
     out_code = FlowModelConst.d8_dirs.get(out_alg)
     assert len(in_code) == len(out_code)
     for i, tmp_in_code in enumerate(in_code):
         convert_dict[tmp_in_code] = out_code[i]
     if datatype is not None and datatype in GDALDataType:
         RasterUtilClass.raster_reclassify(in_file, convert_dict, out_file, datatype)
     else:
         RasterUtilClass.raster_reclassify(in_file, convert_dict, out_file)
Пример #3
0
    def mask_origin_delineated_data(cfg):
        """Mask the original delineated data by Subbasin raster."""
        subbasin_tau_file = cfg.taudems.subbsn
        geodata2dbdir = cfg.dirs.geodata2db
        UtilClass.mkdir(geodata2dbdir)
        mask_file = cfg.spatials.mask
        RasterUtilClass.get_mask_from_raster(subbasin_tau_file, mask_file)
        # Total 12 raster files
        original_files = [
            cfg.taudems.subbsn, cfg.taudems.d8flow, cfg.taudems.stream_raster,
            cfg.taudems.slp, cfg.taudems.filldem, cfg.taudems.d8acc,
            cfg.taudems.stream_order, cfg.taudems.dinf, cfg.taudems.dinf_d8dir,
            cfg.taudems.dinf_slp, cfg.taudems.dinf_weight,
            cfg.taudems.dist2stream_d8
        ]
        # output masked files
        output_files = [
            cfg.taudems.subbsn_m, cfg.taudems.d8flow_m, cfg.taudems.stream_m,
            cfg.spatials.slope, cfg.spatials.filldem, cfg.spatials.d8acc,
            cfg.spatials.stream_order, cfg.spatials.dinf,
            cfg.spatials.dinf_d8dir, cfg.spatials.dinf_slp,
            cfg.spatials.dinf_weight, cfg.spatials.dist2stream_d8
        ]

        default_values = list()
        for i in range(len(original_files)):
            default_values.append(DEFAULT_NODATA)

        # other input rasters need to be masked
        # soil and landuse
        FileClass.check_file_exists(cfg.soil)
        FileClass.check_file_exists(cfg.landuse)
        original_files.append(cfg.soil)
        output_files.append(cfg.spatials.soil_type)
        default_values.append(cfg.default_soil)
        original_files.append(cfg.landuse)
        output_files.append(cfg.spatials.landuse)
        default_values.append(cfg.default_landuse)

        # Additional raster file
        for k, v in cfg.additional_rs.items():
            org_v = v
            if not FileClass.is_file_exists(org_v):
                v = cfg.spatial_dir + os.path.sep + org_v
                if not FileClass.is_file_exists(v):
                    print('WARNING: The additional file %s MUST be located in '
                          'SPATIAL_DATA_DIR, or provided as full file path!' %
                          k)
                    continue
            original_files.append(v)
            output_files.append(cfg.dirs.geodata2db + os.path.sep + k + '.tif')
            default_values.append(DEFAULT_NODATA)

        config_file = cfg.logs.mask_cfg
        # run mask operation
        print('Mask original delineated data by Subbasin raster...')
        SpatialDelineation.mask_raster_cpp(cfg.seims_bin, mask_file,
                                           original_files, output_files,
                                           default_values, config_file)
Пример #4
0
 def raster2shp(rasterfile, vectorshp, layername=None, fieldname=None):
     """Convert raster to ESRI shapefile"""
     FileClass.remove_files(vectorshp)
     FileClass.check_file_exists(rasterfile)
     # raster to polygon vector
     exepath = FileClass.get_executable_fullpath("gdal_polygonize.py")
     str_cmd = 'python %s -f "ESRI Shapefile" %s %s' % (exepath, rasterfile, vectorshp)
     if layername is not None and fieldname is not None:
         str_cmd += ' %s %s' % (layername, fieldname)
     print (str_cmd)
     print (UtilClass.run_command(str_cmd))
Пример #5
0
    def mask_origin_delineated_data(cfg):
        """Mask the original delineated data by Subbasin raster."""
        subbasin_tau_file = cfg.taudems.subbsn
        geodata2dbdir = cfg.dirs.geodata2db
        UtilClass.mkdir(geodata2dbdir)
        mask_file = cfg.spatials.mask
        RasterUtilClass.get_mask_from_raster(subbasin_tau_file, mask_file)
        # Total 12 raster files
        original_files = [cfg.taudems.subbsn, cfg.taudems.d8flow, cfg.taudems.stream_raster,
                          cfg.taudems.slp, cfg.taudems.filldem, cfg.taudems.d8acc,
                          cfg.taudems.stream_order, cfg.taudems.dinf, cfg.taudems.dinf_d8dir,
                          cfg.taudems.dinf_slp, cfg.taudems.dinf_weight,
                          cfg.taudems.dist2stream_d8]
        # output masked files
        output_files = [cfg.taudems.subbsn_m, cfg.taudems.d8flow_m, cfg.taudems.stream_m,
                        cfg.spatials.slope, cfg.spatials.filldem, cfg.spatials.d8acc,
                        cfg.spatials.stream_order, cfg.spatials.dinf, cfg.spatials.dinf_d8dir,
                        cfg.spatials.dinf_slp, cfg.spatials.dinf_weight,
                        cfg.spatials.dist2stream_d8]

        default_values = list()
        for i in range(len(original_files)):
            default_values.append(DEFAULT_NODATA)

        # other input rasters need to be masked
        # soil and landuse
        FileClass.check_file_exists(cfg.soil)
        FileClass.check_file_exists(cfg.landuse)
        original_files.append(cfg.soil)
        output_files.append(cfg.spatials.soil_type)
        default_values.append(cfg.default_soil)
        original_files.append(cfg.landuse)
        output_files.append(cfg.spatials.landuse)
        default_values.append(cfg.default_landuse)

        # Additional raster file
        for k, v in cfg.additional_rs.items():
            org_v = v
            if not FileClass.is_file_exists(org_v):
                v = cfg.spatial_dir + os.path.sep + org_v
                if not FileClass.is_file_exists(v):
                    print('WARNING: The additional file %s MUST be located in '
                          'SPATIAL_DATA_DIR, or provided as full file path!' % k)
                    continue
            original_files.append(v)
            output_files.append(cfg.dirs.geodata2db + os.path.sep + k + '.tif')
            default_values.append(DEFAULT_NODATA)

        config_file = cfg.logs.mask_cfg
        # run mask operation
        print('Mask original delineated data by Subbasin raster...')
        SpatialDelineation.mask_raster_cpp(cfg.seims_bin, mask_file, original_files,
                                           output_files, default_values, config_file)
Пример #6
0
 def raster2shp(rasterfile,
                vectorshp,
                layername=None,
                fieldname=None,
                band_num=1,
                mask='default'):
     """Convert raster to ESRI shapefile"""
     FileClass.remove_files(vectorshp)
     FileClass.check_file_exists(rasterfile)
     # this allows GDAL to throw Python Exceptions
     gdal.UseExceptions()
     src_ds = gdal.Open(rasterfile)
     if src_ds is None:
         print('Unable to open %s' % rasterfile)
         sys.exit(1)
     try:
         srcband = src_ds.GetRasterBand(band_num)
     except RuntimeError as e:
         # for example, try GetRasterBand(10)
         print('Band ( %i ) not found, %s' % (band_num, e))
         sys.exit(1)
     if mask == 'default':
         maskband = srcband.GetMaskBand()
     elif mask is None or mask.upper() == 'NONE':
         maskband = None
     else:
         mask_ds = gdal.Open(mask)
         maskband = mask_ds.GetRasterBand(1)
     #  create output datasource
     if layername is None:
         layername = FileClass.get_core_name_without_suffix(rasterfile)
     drv = ogr_GetDriverByName(str('ESRI Shapefile'))
     dst_ds = drv.CreateDataSource(vectorshp)
     srs = None
     if src_ds.GetProjection() != '':
         srs = osr_SpatialReference()
         srs.ImportFromWkt(src_ds.GetProjection())
     dst_layer = dst_ds.CreateLayer(str(layername), srs=srs)
     if fieldname is None:
         fieldname = layername.upper()
     fd = ogr_FieldDefn(str(fieldname), OFTInteger)
     dst_layer.CreateField(fd)
     dst_field = 0
     result = gdal.Polygonize(srcband,
                              maskband,
                              dst_layer,
                              dst_field, ['8CONNECTED=8'],
                              callback=None)
     return result
Пример #7
0
    def read_crop_lookup_table(crop_lookup_file):
        """read crop lookup table"""
        FileClass.check_file_exists(crop_lookup_file)
        data_items = read_data_items_from_txt(crop_lookup_file)
        attr_dic = dict()
        fields = data_items[0]
        n = len(fields)
        for i in range(n):
            attr_dic[fields[i]] = dict()
        for items in data_items[1:]:
            cur_id = int(items[0])

            for i in range(n):
                dic = attr_dic[fields[i]]
                try:
                    dic[cur_id] = float(items[i])
                except ValueError:
                    dic[cur_id] = items[i]
        return attr_dic
Пример #8
0
    def read_crop_lookup_table(crop_lookup_file):
        """read crop lookup table"""
        FileClass.check_file_exists(crop_lookup_file)
        data_items = read_data_items_from_txt(crop_lookup_file)
        attr_dic = dict()
        fields = data_items[0]
        n = len(fields)
        for i in range(n):
            attr_dic[fields[i]] = dict()
        for items in data_items[1:]:
            cur_id = int(items[0])

            for i in range(n):
                dic = attr_dic[fields[i]]
                try:
                    dic[cur_id] = float(items[i])
                except ValueError:
                    dic[cur_id] = items[i]
        return attr_dic
Пример #9
0
 def raster2shp(rasterfile, vectorshp, layername=None, fieldname=None,
                band_num=1, mask='default'):
     """Convert raster to ESRI shapefile"""
     FileClass.remove_files(vectorshp)
     FileClass.check_file_exists(rasterfile)
     # this allows GDAL to throw Python Exceptions
     gdal.UseExceptions()
     src_ds = gdal.Open(rasterfile)
     if src_ds is None:
         print('Unable to open %s' % rasterfile)
         sys.exit(1)
     try:
         srcband = src_ds.GetRasterBand(band_num)
     except RuntimeError as e:
         # for example, try GetRasterBand(10)
         print('Band ( %i ) not found, %s' % (band_num, e))
         sys.exit(1)
     if mask == 'default':
         maskband = srcband.GetMaskBand()
     elif mask is None or mask.upper() == 'NONE':
         maskband = None
     else:
         mask_ds = gdal.Open(mask)
         maskband = mask_ds.GetRasterBand(1)
     #  create output datasource
     if layername is None:
         layername = FileClass.get_core_name_without_suffix(rasterfile)
     drv = ogr_GetDriverByName(str('ESRI Shapefile'))
     dst_ds = drv.CreateDataSource(vectorshp)
     srs = None
     if src_ds.GetProjection() != '':
         srs = osr_SpatialReference()
         srs.ImportFromWkt(src_ds.GetProjection())
     dst_layer = dst_ds.CreateLayer(str(layername), srs=srs)
     if fieldname is None:
         fieldname = layername.upper()
     fd = ogr_FieldDefn(str(fieldname), OFTInteger)
     dst_layer.CreateField(fd)
     dst_field = 0
     result = gdal.Polygonize(srcband, maskband, dst_layer, dst_field,
                              ['8CONNECTED=8'], callback=None)
     return result
Пример #10
0
def main(landusef, fieldf, fieldtxt, jsonout):
    """Construct hydrologically connected fields units data in JSON file format."""
    # Check the file existence
    FileClass.check_file_exists(landusef)
    FileClass.check_file_exists(fieldf)
    FileClass.check_file_exists(fieldtxt)
    # read raster data and check the extent based on landuse.
    landuser = RasterUtilClass.read_raster(landusef)
    data_landuse = landuser.data
    nrows = landuser.nRows
    ncols = landuser.nCols
    dx = landuser.dx
    nodata_landuse = landuser.noDataValue

    fieldr = RasterUtilClass.read_raster(fieldf)
    if fieldr.nRows != nrows or fieldr.nCols != ncols:
        raise ValueError(
            'The connected_fields raster MUST have the same dimensions'
            ' with landuse!')
    data_fields = fieldr.data
    nodata_fields = fieldr.noDataValue

    # Read the initial relationships between fields
    fields_info = read_field_relationships(fields_txt)

    # add landuse types and areas
    for m in range(nrows):
        for n in range(ncols):
            cur_lu = int(data_landuse[m][n])
            cur_fld = int(data_fields[m][n])
            if cur_fld == nodata_fields or cur_lu == nodata_landuse or cur_lu <= 0:
                continue
            if cur_fld not in fields_info['units']:
                raise ValueError(
                    '%d is not recorded in field relationship text!' % cur_fld)
            if cur_lu not in fields_info['units'][cur_fld]['landuse']:
                fields_info['units'][cur_fld]['landuse'][cur_lu] = 1
            else:
                fields_info['units'][cur_fld]['landuse'][cur_lu] += 1
    for k, v in viewitems(fields_info['units']):
        area_field = 0.
        area_max = 0.
        area_max_lu = 0
        for luid, luarea in viewitems(v['landuse']):
            v['landuse'][luid] = luarea * dx * dx * 1.e-6
            area_field += v['landuse'][luid]
            if v['landuse'][luid] > area_max:
                area_max = v['landuse'][luid]
                area_max_lu = luid
        v['area'] = area_field
        if v['primarylanduse'] != area_max_lu:
            print(k, v['primarylanduse'], area_max_lu)
            v['primarylanduse'] = area_max_lu

    # save to json
    json_updown_data = json.dumps(fields_info, indent=4)
    with open(jsonout, 'w', encoding='utf-8') as f:
        f.write('%s' % json_updown_data)
Пример #11
0
    def __init__(self,
                 flowdirf,
                 subbsnf,
                 elevf,
                 rdgsrc,
                 flow_model=1,
                 prop=0.,
                 ws=None):
        """Initialize file names."""
        FileClass.check_file_exists(flowdirf)
        FileClass.check_file_exists(subbsnf)
        FileClass.check_file_exists(elevf)
        if ws is None:
            ws = os.path.basename(flowdirf)
        self.ws = ws
        if flow_model == 1:
            suffix = '_dinf.tif'
        else:
            suffix = '_d8.tif'
        self.rdgorg = self.ws + os.sep + 'RdgOrgSrc' + suffix
        self.boundsrc = self.ws + os.sep + 'RdgPotSrc' + suffix
        self.boundsrcfilter = self.ws + os.sep + 'RdgPotSrcFilter' + suffix

        if rdgsrc is None:
            rdgsrc = self.ws + os.sep + 'rdgsrc' + suffix
        self.rdgsrc = rdgsrc
        self.flowmodel = flow_model
        self.prop = prop
        # read raster data
        flowdir_r = RasterUtilClass.read_raster(flowdirf)
        self.flowdir_data = flowdir_r.data
        self.nrows = flowdir_r.nRows
        self.ncols = flowdir_r.nCols
        self.nodata_flow = flowdir_r.noDataValue
        self.geotrans = flowdir_r.geotrans
        self.srs = flowdir_r.srs
        subbsn_r = RasterUtilClass.read_raster(subbsnf)
        self.subbsn_data = subbsn_r.data
        self.nodata_subbsn = subbsn_r.noDataValue
        elev_r = RasterUtilClass.read_raster(elevf)
        self.elev_data = elev_r.data
        self.nodata_elev = elev_r.noDataValue

        # initialize output arrays
        self.rdgsrc_data = numpy.ones((self.nrows, self.ncols)) * 1
        self.rdgpot = numpy.ones((self.nrows, self.ncols)) * DEFAULT_NODATA
Пример #12
0
    def __init__(
            self,
            tag_names,  # type: List[Tuple[int, AnyStr]]
            slpposf,  # type: AnyStr
            reach_shp,  # type: AnyStr
            hillslpf,  # type: AnyStr
            landusef  # type: AnyStr
    ):
        # type: (...) -> None
        """Initialization.

        Args:
            tag_names: [tag(integer), name(str)], tag should be ascending from up to bottom.
            slpposf: Crisp classification of slope position full filename.
            reach_shp: Reach shapefile used to extract the up-down relationships of subbasins
            hillslpf: Delineated hillslope file by sd_hillslope.py.
            landusef: Landuse, used to statistics areas of each landuse types within
                      slope position units

        Attributes:
            slppos_tags(OrderedDict): {tag: name}
            subbsin_tree: up-down stream relationships of subbasins.
                          {subbsnID: {'upstream': [], 'downstream': []}}
            units_updwon: Output json data of slope position units.
                {"slppos_1": {id:{"downslope": [ids], "upslope": [ids], "landuse": {luID: area}
                                  "hillslope": [hillslpID], "subbasin": [subbsnID], "area": area
                                 }
                             }
                 "slppos_2": ...
                }
        """
        # Check the file existence
        FileClass.check_file_exists(slpposf)
        FileClass.check_file_exists(reach_shp)
        FileClass.check_file_exists(hillslpf)
        FileClass.check_file_exists(landusef)
        # Set inputs
        self.ws = os.path.dirname(slpposf)
        tag_names = sorted(tag_names, key=lambda x: x[0])
        # initialize slope position dict with up-down relationships
        self.slppos_tags = OrderedDict(
        )  # type: Dict[int, Dict[AnyStr, Union[int, AnyStr]]]
        for idx, tagname in enumerate(tag_names):
            tag, name = tagname
            if len(tag_names) > 1:
                if idx == 0:
                    self.slppos_tags[int(tag)] = {
                        'name': name,
                        'upslope': -1,
                        'downslope': tag_names[idx + 1][0]
                    }
                elif idx == len(tag_names) - 1:
                    self.slppos_tags[int(tag)] = {
                        'name': name,
                        'upslope': tag_names[idx - 1][0],
                        'downslope': -1
                    }
                else:
                    self.slppos_tags[int(tag)] = {
                        'name': name,
                        'upslope': tag_names[idx - 1][0],
                        'downslope': tag_names[idx + 1][0]
                    }
            else:
                self.slppos_tags[int(tag)] = {
                    'name': name,
                    'upslope': -1,
                    'downslope': -1
                }

        self.reach = reach_shp
        # read raster data and check the extent based on hillslope.
        hillslpr = RasterUtilClass.read_raster(hillslpf)
        self.data_hillslp = hillslpr.data
        self.nrows = hillslpr.nRows
        self.ncols = hillslpr.nCols
        self.dx = hillslpr.dx
        self.nodata_hillslp = hillslpr.noDataValue
        self.geotrans = hillslpr.geotrans
        self.srs = hillslpr.srs
        self.datatype = hillslpr.dataType
        slpposr = RasterUtilClass.read_raster(slpposf)
        if slpposr.nRows != self.nrows or slpposr.nCols != self.ncols:
            raise ValueError(
                'The slopeposition raster MUST have the same dimensions'
                ' with hillslope!')
        self.data_slppos = slpposr.data
        self.nodata_slppos = slpposr.noDataValue
        landuser = RasterUtilClass.read_raster(landusef)
        if landuser.nRows != self.nrows or landuser.nCols != self.ncols:
            raise ValueError(
                'The landuser raster MUST have the same dimensions'
                ' with hillslope!')
        self.data_landuse = landuser.data
        self.nodata_landuse = landuser.noDataValue

        # Set intermediate data
        self.subbsin_num = -1
        self.subbsin_tree = dict(
        )  # type: Dict[int, int]  # {subbsnID: dst_subbsnID}
        self.units_updwon = OrderedDict(
        )  # type: Dict[AnyStr, Dict[int, Dict[AnyStr, Union[List[float], AnyStr]]]]
        for tag in self.slppos_tags:
            self.units_updwon[self.slppos_tags.get(tag).get('name')] = dict()
        self.slppos_ids = numpy.ones((self.nrows, self.ncols)) * DEFAULT_NODATA
        self.hierarchy_units = dict(
        )  # type: Dict[int, Dict[int, Dict[AnyStr, int]]]

        # Set gene_values of outputs
        self.outf_units_origin = self.ws + os.path.sep + 'slppos_units_origin_uniqueid.tif'
        self.outshp_units_origin = self.ws + os.path.sep + 'origin_uniqueid.shp'
        self.json_units_origin = self.ws + os.path.sep + 'original_updown.json'
        self.outf_units_merged = self.ws + os.path.sep + 'slppos_units.tif'
        self.outshp_units_merged = self.ws + os.path.sep + 'slppos_units_merged.shp'
        self.json_units_merged = self.ws + os.path.sep + 'updown.json'
Пример #13
0
    def __init__(self, tag_names, slpposf, reach_shp, hillslpf, landusef):
        """Initialization.

        Args:
            tag_names: [tag(integer), name(str)], tag should be ascending from up to bottom.
            slpposf: Crisp classification of slope position.
            reach_shp: Reach shapefile used to extract the up-down relationships of subbasins
            hillslpf: Delineated hillslope file by sd_hillslope.py.
            landusef: Landuse, used to statistics areas of each landuse types within
                      slope position units

        Attributes:
            slppos_tags(OrderedDict): {tag: name}
            subbsin_tree: up-down stream relationships of subbasins.
                          {subbsnID: {'upstream': [], 'downstream': []}}
            units_updown_info: Output json data of slope position units.
                {"slppos_1": {id:{"downslope": [ids], "upslope": [ids], "landuse": {luID: area}
                                  "hillslope": [hillslpID], "subbasin": [subbsnID], "area": area
                                 }
                             }
                 "slppos_2": ...
                }
        """
        # Check the file existance
        FileClass.check_file_exists(slpposf)
        FileClass.check_file_exists(reach_shp)
        FileClass.check_file_exists(hillslpf)
        FileClass.check_file_exists(landusef)
        # Set inputs
        self.ws = os.path.dirname(slpposf)
        tag_names = sorted(tag_names, key=lambda x: x[0])
        # initialize slope position dict with up-down relationships
        self.slppos_tags = OrderedDict()
        for idx, tagname in enumerate(tag_names):
            tag, name = tagname
            if len(tag_names) > 1:
                if idx == 0:
                    self.slppos_tags[int(tag)] = {'name': name, 'upslope': -1,
                                                  'downslope': tag_names[idx + 1][0]}
                elif idx == len(tag_names) - 1:
                    self.slppos_tags[int(tag)] = {'name': name, 'upslope': tag_names[idx - 1][0],
                                                  'downslope': -1}
                else:
                    self.slppos_tags[int(tag)] = {'name': name, 'upslope': tag_names[idx - 1][0],
                                                  'downslope': tag_names[idx + 1][0]}
            else:
                self.slppos_tags[int(tag)] = {'name': name, 'upslope': -1, 'downslope': -1}

        self.reach = reach_shp
        # read raster data and check the extent based on hillslope.
        hillslpr = RasterUtilClass.read_raster(hillslpf)
        self.data_hillslp = hillslpr.data
        self.nrows = hillslpr.nRows
        self.ncols = hillslpr.nCols
        self.dx = hillslpr.dx
        self.nodata_hillslp = hillslpr.noDataValue
        self.geotrans = hillslpr.geotrans
        self.srs = hillslpr.srs
        self.datatype = hillslpr.dataType
        slpposr = RasterUtilClass.read_raster(slpposf)
        if slpposr.nRows != self.nrows or slpposr.nCols != self.ncols:
            raise ValueError('The slopeposition raster MUST have the same dimensions'
                             ' with hillslope!')
        self.data_slppos = slpposr.data
        self.nodata_slppos = slpposr.noDataValue
        landuser = RasterUtilClass.read_raster(landusef)
        if landuser.nRows != self.nrows or landuser.nCols != self.ncols:
            raise ValueError('The landuser raster MUST have the same dimensions'
                             ' with hillslope!')
        self.data_landuse = landuser.data
        self.nodata_landuse = landuser.noDataValue

        # Set intermediate data
        self.subbsin_num = -1
        self.subbsin_tree = dict()
        self.units_updwon = OrderedDict()
        for tag in self.slppos_tags:
            self.units_updwon[self.slppos_tags.get(tag).get('name')] = dict()
        self.slppos_ids = numpy.ones((self.nrows, self.ncols)) * DEFAULT_NODATA
        self.hierarchy_units = dict()

        # Set gene_values of outputs
        self.outf_units_origin = self.ws + os.path.sep + 'slppos_units_origin_uniqueid.tif'
        self.outshp_units_origin = self.ws + os.path.sep + 'origin_uniqueid.shp'
        self.json_units_origin = self.ws + os.path.sep + 'original_updown.json'
        self.outf_units_merged = self.ws + os.path.sep + 'slppos_units.tif'
        self.outshp_units_merged = self.ws + os.path.sep + 'slppos_units_merged.shp'
        self.json_units_merged = self.ws + os.path.sep + 'updown.json'
Пример #14
0
    def __init__(self, cf):
        """Initialization."""
        SAConfig.__init__(self, cf)  # initialize base class first
        # Handling self.bmps_info for specific application
        # 1. Check the required key and values
        requiredkeys = ['COLLECTION', 'DISTRIBUTION', 'SUBSCENARIO', 'UPDOWNJSON',
                        'ENVEVAL', 'BASE_ENV']
        for k in requiredkeys:
            if k not in self.bmps_info:
                raise ValueError('[%s]: MUST be provided!' % k)
        # 2. Slope position units information
        updownf = self.bmps_info.get('UPDOWNJSON')
        FileClass.check_file_exists(updownf)
        with open(updownf, 'r') as updownfo:
            self.units_infos = json.load(updownfo)
        self.units_infos = UtilClass.decode_strs_in_dict(self.units_infos)
        # 3. Get slope position sequence
        sptags = cf.get('BMPs', 'slppos_tag_name')
        self.slppos_tags = json.loads(sptags)
        self.slppos_tags = UtilClass.decode_strs_in_dict(self.slppos_tags)
        self.slppos_tagnames = sorted(list(self.slppos_tags.items()), key=operator.itemgetter(0))
        self.slppos_unit_num = self.units_infos['overview']['all_units']
        self.slppos_to_gene = OrderedDict()
        self.gene_to_slppos = dict()

        # method 1: (deprecated)
        #     gene index: 0, 1, 2, ..., n
        #     slppos unit: rdg1, rdg2,..., bks1, bks2,..., vly1, vly2...
        # idx = 0
        # for tag, sp in self.slppos_tagnames:
        #     for uid in self.units_infos[sp]:
        #         self.gene_to_slppos[idx] = uid
        #         self.slppos_to_gene[uid] = idx
        #         idx += 1
        # method 2:
        #     gene index: 0, 1, 2, ..., n
        #     slppos unit: rdg1, bks2, vly1,..., rdgn, bksn, vlyn
        idx = 0
        spname = self.slppos_tagnames[0][1]
        for uid, udict in self.units_infos[spname].items():
            spidx = 0
            self.gene_to_slppos[idx] = uid
            self.slppos_to_gene[uid] = idx
            idx += 1
            next_uid = udict['downslope']
            while next_uid > 0:
                self.gene_to_slppos[idx] = next_uid
                self.slppos_to_gene[next_uid] = idx
                idx += 1
                spidx += 1
                spname = self.slppos_tagnames[spidx][1]
                next_uid = self.units_infos[spname][next_uid]['downslope']

        assert (idx == self.slppos_unit_num)

        # 4. SubScenario IDs and parameters read from MongoDB
        self.bmps_subids = self.bmps_info.get('SUBSCENARIO')
        self.bmps_coll = self.bmps_info.get('COLLECTION')
        self.bmps_params = dict()
        self.slppos_suit_bmps = dict()

        self.read_bmp_parameters()
        self.get_suitable_bmps_for_slppos()
Пример #15
0
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time,
                                                  eliminate_zero=False,
                                                  time_sys_output='UTCTIME', day_divided_hour=0):
    """
    Interpolate not regular observed data to regular time interval data.
    Args:
        in_file: input data file, the basic format is as follows:
                 line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME
                 line 2: DATETIME,field1,field2,...
                 line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,...
                 line 4: ...
                 ...
                 Field name can be PCP, FLOW, SED
                 the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively.
        time_interval: time interval, unit is minute, e.g., daily output is 1440
        start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system
                    is based on time_sys.
        end_time: end time, see also start_time.
        eliminate_zero: Boolean flag. If true, the time interval without original records will
                        not be output.
        time_sys_output: time system of output time_system, the format must be
                  '<time_system> [<time_zone>]', e.g.,
                  'LOCALTIME'
                  'LOCALTIME 8'
                  'UTCTIME' (default)
        day_divided_hour: If the time_interval is equal to N*1440, this parameter should be
                          carefully specified. The value must range from 0 to 23. e.g.,
                          day_divided_hour ==> day ranges (all expressed as 2013-02-03)
                          0  ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default)
                          8  ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59
                          20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59
    Returns:
        The output data files are located in the same directory with the input file.
        The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g.,
        pcp_utctime_1440_nonzero.txt, flow_localtime_60.txt
    """
    FileClass.check_file_exists(in_file)
    time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file)
    data_items = read_data_items_from_txt(in_file)
    flds = data_items[0][:]
    data_items.remove(flds)
    if not 0 <= day_divided_hour <= 23:
        raise ValueError('Day divided hour must range from 0 to 23!')
    try:
        date_idx = flds.index('DATETIME')
        flds.remove('DATETIME')
    except ValueError:
        raise ValueError('DATETIME must be one of the fields!')
    # available field
    available_flds = ['FLOW', 'SED', 'PCP']

    def check_avaiable_field(cur_fld):
        """Check if the given field name is supported."""
        support_flag = False
        for fff in available_flds:
            if fff.lower() in cur_fld.lower():
                support_flag = True
                break
        return support_flag

    ord_data = OrderedDict()
    time_zone_output = time.timezone / -3600
    if time_sys_output.lower().find('local') >= 0:
        tmpstrs = StringClass.split_string(time_sys_output, [' '])
        if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]):
            time_zone_output = int(tmpstrs[1])
        time_sys_output = 'LOCALTIME'
    else:
        time_sys_output = 'UTCTIME'
        time_zone_output = 0
    for item in data_items:
        org_datetime = StringClass.get_datetime(item[date_idx])
        if time_sys_input == 'LOCALTIME':
            org_datetime -= timedelta(hours=time_zone_input)
        # now, org_datetime is UTC time.
        if time_sys_output == 'LOCALTIME':
            org_datetime += timedelta(hours=time_zone_output)
        # now, org_datetime is consistent with the output time system
        ord_data[org_datetime] = list()
        for i, v in enumerate(item):
            if i == date_idx:
                continue
            if MathClass.isnumerical(v):
                ord_data[org_datetime].append(float(v))
            else:
                ord_data[org_datetime].append(v)
    # print(ord_data)
    itp_data = OrderedDict()
    out_time_delta = timedelta(minutes=time_interval)
    sdatetime = StringClass.get_datetime(start_time)
    edatetime = StringClass.get_datetime(end_time)
    item_dtime = sdatetime
    if time_interval % 1440 == 0:
        item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \
                     timedelta(minutes=day_divided_hour * 60)
    while item_dtime <= edatetime:
        # print(item_dtime)
        # if item_dtime.month == 12 and item_dtime.day == 31:
        #     print("debug")
        sdt = item_dtime  # start datetime of records
        edt = item_dtime + out_time_delta  # end datetime of records
        # get original data items
        org_items = list()
        pre_dt = list(ord_data.keys())[0]
        pre_added = False
        for i, v in list(ord_data.items()):
            if sdt <= i < edt:
                if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta:
                    # only add one item that less than sdt.
                    org_items.append([pre_dt] + ord_data.get(pre_dt))
                    pre_added = True
                org_items.append([i] + v)
            if i > edt:
                break
            pre_dt = i
        if len(org_items) > 0:
            org_items.append([edt])  # Just add end time for compute convenient
            if org_items[0][0] < sdt:
                org_items[0][0] = sdt  # set the begin datetime of current time interval
        # if eliminate time interval without original records
        # initial interpolated list
        itp_data[item_dtime] = [0.] * len(flds)
        if len(org_items) == 0:
            if eliminate_zero:
                itp_data.popitem()
            item_dtime += out_time_delta
            continue
        # core interpolation code
        flow_idx = -1
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            if 'SED' in v_name.upper():  # FLOW must be existed
                for v_idx2, v_name2 in enumerate(flds):
                    if 'FLOW' in v_name2.upper():
                        flow_idx = v_idx2
                        break
                if flow_idx < 0:
                    raise RuntimeError('To interpolate SED, FLOW must be provided!')
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            itp_value = 0.
            itp_auxiliary_value = 0.
            for org_item_idx, org_item_dtv in enumerate(org_items):
                if org_item_idx == 0:
                    continue
                org_item_dt = org_item_dtv[0]
                pre_item_dtv = org_items[org_item_idx - 1]
                pre_item_dt = pre_item_dtv[0]
                tmp_delta_dt = org_item_dt - pre_item_dt
                tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds
                if 'SED' in v_name.upper():
                    itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \
                                 tmp_delta_secs
                    itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs
                else:
                    itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs
            if 'SED' in v_name.upper():
                if MathClass.floatequal(itp_auxiliary_value, 0.):
                    itp_value = 0.
                    print('WARNING: Flow is 0 for %s, please check!' %
                          item_dtime.strftime('%Y-%m-%d %H:%M:%S'))
                itp_value /= itp_auxiliary_value
            elif 'FLOW' in v_name.upper():
                itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds)
            elif 'PCP' in v_name.upper():  # the input is mm/h, and output is mm
                itp_value /= 3600.
            itp_data[item_dtime][v_idx] = round(itp_value, 4)
        item_dtime += out_time_delta

    # for i, v in itp_data.items():
    #     print(i, v)
    # output to files
    work_path = os.path.dirname(in_file)
    header_str = '#' + time_sys_output
    if time_sys_output == 'LOCALTIME':
        header_str = header_str + ' ' + str(time_zone_output)
    for idx, fld in enumerate(flds):
        if not check_avaiable_field(fld):
            continue
        file_name = fld + '_' + time_sys_output + '_' + str(time_interval)
        if eliminate_zero:
            file_name += '_nonzero'
        file_name += '.txt'
        out_file = work_path + os.path.sep + file_name
        with open(out_file, 'w') as f:
            f.write(header_str + '\n')
            f.write('DATETIME,' + fld + '\n')
            for i, v in list(itp_data.items()):
                cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n'
                f.write(cur_line)
Пример #16
0
def main(landusef, unitsf, jsonout):
    """Construct common spatial units data in JSON file format."""
    # Check the file existence
    FileClass.check_file_exists(landusef)
    FileClass.check_file_exists(unitsf)
    # read raster data and check the extent based on landuse.
    landuser = RasterUtilClass.read_raster(landusef)
    data_landuse = landuser.data
    nrows = landuser.nRows
    ncols = landuser.nCols
    dx = landuser.dx
    nodata_landuse = landuser.noDataValue

    fieldr = RasterUtilClass.read_raster(unitsf)
    if fieldr.nRows != nrows or fieldr.nCols != ncols:
        raise ValueError(
            'The spatial units raster MUST have the same dimensions'
            ' with landuse!')
    data_units = fieldr.data
    nodata_units = fieldr.noDataValue

    units_info = dict(
    )  # type: Dict[AnyStr, Dict[Union[int, AnyStr], Dict[AnyStr, Union[int, float, List[Union[int,float]], AnyStr, Dict[int, float]]]]]

    units_info.setdefault('units', dict())
    units_info.setdefault('overview', dict())

    units_ids = list()  # type: List[int]

    for m in range(nrows):
        for n in range(ncols):
            cur_lu = int(data_landuse[m][n])
            cur_unit = int(data_units[m][n])
            if cur_unit == nodata_units or cur_lu == nodata_landuse or cur_lu <= 0:
                continue
            if cur_unit not in units_ids:
                units_ids.append(cur_unit)
            if cur_unit not in units_info['units']:
                units_info['units'].setdefault(cur_unit, {
                    'landuse': dict(),
                    'primarylanduse': 0,
                    'area': 0.
                })
            if cur_lu not in units_info['units'][cur_unit]['landuse']:
                units_info['units'][cur_unit]['landuse'][cur_lu] = 1
            else:
                units_info['units'][cur_unit]['landuse'][cur_lu] += 1
    for k, v in viewitems(units_info['units']):
        area_field = 0.
        area_max = 0.
        area_max_lu = 0
        for luid, luarea in viewitems(v['landuse']):
            v['landuse'][luid] = luarea * dx * dx * 1.e-6
            area_field += v['landuse'][luid]
            if v['landuse'][luid] > area_max:
                area_max = v['landuse'][luid]
                area_max_lu = luid
        v['area'] = area_field
        v['primarylanduse'] = area_max_lu

    units_info['overview'].setdefault('all_units', len(units_ids))

    # save to json
    json_data = json.dumps(units_info, indent=4)
    with open(json_out, 'w', encoding='utf-8') as f:
        f.write('%s' % json_data)
Пример #17
0
def interpolate_observed_data_to_regular_interval(in_file, time_interval, start_time, end_time,
                                                  eliminate_zero=False,
                                                  time_sys_output='UTCTIME', day_divided_hour=0):
    """
    Interpolate not regular observed data to regular time interval data.

    Todo: Not tested yet!

    Args:
        in_file: input data file, the basic format is as follows:
                 line 1: #<time_system> [<time_zone>], e.g., #LOCALTIME 8, #UTCTIME
                 line 2: DATETIME,field1,field2,...
                 line 3: YYYY-mm-dd HH:MM:SS,field1_value,field2_value,...
                 line 4: ...
                 ...
                 Field name can be PCP, FLOW, SED
                 the unit is mm/h, m3/s, g/L (i.e., kg/m3), respectively.
        time_interval: time interval, unit is minute, e.g., daily output is 1440
        start_time: start time, the format must be 'YYYY-mm-dd HH:MM:SS', and the time system
                    is based on time_sys.
        end_time: end time, see also start_time.
        eliminate_zero: Boolean flag. If true, the time interval without original records will
                        not be output.
        time_sys_output: time system of output time_system, the format must be
                  '<time_system> [<time_zone>]', e.g.,
                  'LOCALTIME'
                  'LOCALTIME 8'
                  'UTCTIME' (default)
        day_divided_hour: If the time_interval is equal to N*1440, this parameter should be
                          carefully specified. The value must range from 0 to 23. e.g.,
                          day_divided_hour ==> day ranges (all expressed as 2013-02-03)
                          0  ==> 2013-02-03 00:00:00 to 2013-02-03 23:59:59 (default)
                          8  ==> 2013-02-03 08:00:00 to 2013-02-04 07:59:59
                          20 ==> 2013-02-03 20:00:00 to 2013-02-04 19:59:59
    Returns:
        The output data files are located in the same directory with the input file.
        The nomenclature is: <field name>_<time system>_<time interval>_<nonzero>, e.g.,
        pcp_utctime_1440_nonzero.csv, flow_localtime_60.csv.
        Note that `.txt` format is also supported.
    """
    FileClass.check_file_exists(in_file)
    time_sys_input, time_zone_input = HydroClimateUtilClass.get_time_system_from_data_file(in_file)
    data_items = read_data_items_from_txt(in_file)
    flds = data_items[0][:]
    data_items.remove(flds)
    if not 0 <= day_divided_hour <= 23:
        raise ValueError('Day divided hour must range from 0 to 23!')
    try:
        date_idx = flds.index('DATETIME')
        flds.remove('DATETIME')
    except ValueError:
        raise ValueError('DATETIME must be one of the fields!')
    # available field
    available_flds = ['FLOW', 'SED', 'PCP']

    def check_avaiable_field(cur_fld):
        """Check if the given field name is supported."""
        support_flag = False
        for fff in available_flds:
            if fff.lower() in cur_fld.lower():
                support_flag = True
                break
        return support_flag

    ord_data = OrderedDict()
    time_zone_output = time.timezone // 3600
    if time_sys_output.lower().find('local') >= 0:
        tmpstrs = StringClass.split_string(time_sys_output, [' '])
        if len(tmpstrs) == 2 and MathClass.isnumerical(tmpstrs[1]):
            time_zone_output = -1 * int(tmpstrs[1])
        time_sys_output = 'LOCALTIME'
    else:
        time_sys_output = 'UTCTIME'
        time_zone_output = 0
    for item in data_items:
        org_datetime = StringClass.get_datetime(item[date_idx])
        if time_sys_input == 'LOCALTIME':
            org_datetime += timedelta(hours=time_zone_input)  # now, org_datetime is UTC time.
        if time_sys_output == 'LOCALTIME':
            org_datetime -= timedelta(hours=time_zone_output)
        # now, org_datetime is consistent with the output time system
        ord_data[org_datetime] = list()
        for i, v in enumerate(item):
            if i == date_idx:
                continue
            if MathClass.isnumerical(v):
                ord_data[org_datetime].append(float(v))
            else:
                ord_data[org_datetime].append(v)
    # print(ord_data)
    itp_data = OrderedDict()
    out_time_delta = timedelta(minutes=time_interval)
    sdatetime = StringClass.get_datetime(start_time)
    edatetime = StringClass.get_datetime(end_time)
    item_dtime = sdatetime
    if time_interval % 1440 == 0:
        item_dtime = sdatetime.replace(hour=0, minute=0, second=0) + \
                     timedelta(minutes=day_divided_hour * 60)
    while item_dtime <= edatetime:
        # print(item_dtime)
        # if item_dtime.month == 12 and item_dtime.day == 31:
        #     print("debug")
        sdt = item_dtime  # start datetime of records
        edt = item_dtime + out_time_delta  # end datetime of records
        # get original data items
        org_items = list()
        pre_dt = list(ord_data.keys())[0]
        pre_added = False
        for i, v in list(ord_data.items()):
            if sdt <= i < edt:
                if not pre_added and pre_dt < sdt < i and sdt - pre_dt < out_time_delta:
                    # only add one item that less than sdt.
                    org_items.append([pre_dt] + ord_data.get(pre_dt))
                    pre_added = True
                org_items.append([i] + v)
            if i > edt:
                break
            pre_dt = i
        if len(org_items) > 0:
            org_items.append([edt])  # Just add end time for compute convenient
            if org_items[0][0] < sdt:
                org_items[0][0] = sdt  # set the begin datetime of current time interval
        # if eliminate time interval without original records
        # initial interpolated list
        itp_data[item_dtime] = [0.] * len(flds)
        if len(org_items) == 0:
            if eliminate_zero:
                itp_data.popitem()
            item_dtime += out_time_delta
            continue
        # core interpolation code
        flow_idx = -1
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            if 'SED' in v_name.upper():  # FLOW must be existed
                for v_idx2, v_name2 in enumerate(flds):
                    if 'FLOW' in v_name2.upper():
                        flow_idx = v_idx2
                        break
                if flow_idx < 0:
                    raise RuntimeError('To interpolate SED, FLOW must be provided!')
        for v_idx, v_name in enumerate(flds):
            if not check_avaiable_field(v_name):
                continue
            itp_value = 0.
            itp_auxiliary_value = 0.
            for org_item_idx, org_item_dtv in enumerate(org_items):
                if org_item_idx == 0:
                    continue
                org_item_dt = org_item_dtv[0]
                pre_item_dtv = org_items[org_item_idx - 1]
                pre_item_dt = pre_item_dtv[0]
                tmp_delta_dt = org_item_dt - pre_item_dt
                tmp_delta_secs = tmp_delta_dt.days * 86400 + tmp_delta_dt.seconds
                if 'SED' in v_name.upper():
                    itp_value += pre_item_dtv[v_idx + 1] * pre_item_dtv[flow_idx + 1] * \
                                 tmp_delta_secs
                    itp_auxiliary_value += pre_item_dtv[flow_idx + 1] * tmp_delta_secs
                else:
                    itp_value += pre_item_dtv[v_idx + 1] * tmp_delta_secs
            if 'SED' in v_name.upper():
                if MathClass.floatequal(itp_auxiliary_value, 0.):
                    itp_value = 0.
                    print('WARNING: Flow is 0 for %s, please check!' %
                          item_dtime.strftime('%Y-%m-%d %H:%M:%S'))
                itp_value /= itp_auxiliary_value
            elif 'FLOW' in v_name.upper():
                itp_value /= (out_time_delta.days * 86400 + out_time_delta.seconds)
            elif 'PCP' in v_name.upper():  # the input is mm/h, and output is mm
                itp_value /= 3600.
            itp_data[item_dtime][v_idx] = round(itp_value, 4)
        item_dtime += out_time_delta

    # for i, v in itp_data.items():
    #     print(i, v)
    # output to files
    work_path = os.path.dirname(in_file)
    header_str = '#' + time_sys_output
    if time_sys_output == 'LOCALTIME':
        header_str = header_str + ' ' + str(time_zone_output)
    for idx, fld in enumerate(flds):
        if not check_avaiable_field(fld):
            continue
        file_name = fld + '_' + time_sys_output + '_' + str(time_interval)
        if eliminate_zero:
            file_name += '_nonzero'
        file_name += '.csv'
        out_file = work_path + os.path.sep + file_name
        with open(out_file, 'w', encoding='utf-8') as f:
            f.write(header_str + '\n')
            f.write('DATETIME,' + fld + '\n')
            for i, v in list(itp_data.items()):
                cur_line = i.strftime('%Y-%m-%d %H:%M:%S') + ',' + str(v[idx]) + '\n'
                f.write(cur_line)