Пример #1
0
    def _extract_subset(self, input_dir, output_dir, patterns, subset, subset_name, overwrite = False):
        logger.info('entering _extract_subset')
        _all_files = directory_utils.get_matching_files(input_dir, patterns[0])
        if not _all_files:
            logger.debug('Extracting subset {0}. No files found in {1} with pattern {2}'.format(
                subset_name, input_dir, patterns[0]))
            logger.info('No files found in ' + input_dir + ', please check directory and try again')
            return -1

        # check output directory exists and create it if not
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)
        new_files = []
        _sr = osr.SpatialReference()
        _sr.ImportFromEPSG(4326)
        for _ifl in _all_files:
            # generate output file
            _nfl = filename_utils.generate_output_filename(os.path.basename(_ifl), patterns[0], patterns[1])
            _ofl = os.path.join(output_dir, _nfl)
            if not os.path.exists(_ofl) or overwrite == True:
                try:
                    gdal.SetConfigOption( 'CPL_DEBUG', 'ON' )
                    gdal.UseExceptions()
                    _name_str = 'HDF5:"{0}"://{1}'.format(_ifl, subset_name)
                    src_ds = gdal.Open(_name_str)
                    _proj = src_ds.GetProjection()
                    print _proj
                    _geotransform = src_ds.GetGeoTransform()
                    print _geotransform
                    src_ds.SetProjection(_sr.ExportToWkt())
                    src_ds.SetGeoTransform([-180, 0.1, 0, -90, 0, 0.1])
                    print src_ds.RasterCount
                    print src_ds.GetMetadata()
                    t = src_ds.RasterXSize
                    _band = src_ds.GetRasterBand(1)
                    _data = _band.ReadAsArray().T
                    stats = _band.GetStatistics( True, True )
                    if stats is None:
                        continue

                    logger.debug('[ STATS ] =  Minimum=%.3f, Maximum=%.3f, Mean=%.3f, StdDev=%.3f' %
                                 (stats[0], stats[1], stats[2], stats[3]))
                    ysize,xsize = _data.shape
                    tif = gdal.GetDriverByName('GTiff').Create(_ofl, xsize, ysize, eType=gdal.GDT_Float32)
                    tif.SetProjection(src_ds.GetProjection())
                    tif.SetGeoTransform(list(src_ds.GetGeoTransform()))
                    band = tif.GetRasterBand(1)
                    band.WriteArray(_data)
                    band.FlushCache()
                    band.SetNoDataValue(-9999.900390625)
                    tif = None # closes file
                except RuntimeError, e:
                    logger.debug('Unable to open file')
                    return -1
                if src_ds is None:
                    logger.debug('Unable to open file {0}'.format(_ifl))
                    raise RuntimeError
                new_files.append(_ofl)
Пример #2
0
    def calculate_impact_poverty(self, impact_file, impact_dir, impact_pattern,
                                 impact_field, impact_match_field,
                                 poor_file, poor_field, poor_multiplier, poor_match_field,
                                 output_file, output_dir, output_pattern, output_field,
                                 start_date, end_date):
        if impact_file is None:
            if impact_pattern is not None:
                _input_files = directory_utils.get_matching_files(impact_dir, impact_pattern)
                _impact_file = os.path.join(impact_dir, _input_files[0])
            else:
                raise ValueError("Hazard raster is not specified")
        else:
            _impact_file = impact_file

        _country_name = self.vp.get_country_name(self.vp.get_home_country())
        if impact_match_field is None:
            _impact_match_field = self.vp.get_country(_country_name)['crop_area_code']
        else:
            _impact_match_field = impact_match_field
        if poor_match_field is None:
            _poor_match_field = self.vp.get_country(_country_name)['admin_3_boundary_area_code']
        else:
            _poor_match_field = poor_match_field

        if output_file is None:
            if output_pattern is not None:
                _input_pattern = self.vp.get('hazard_impact', 'vhi_popn_pattern')
                _output_file = filename_utils.generate_output_filename(os.path.basename(_impact_file),
                                                                       _input_pattern, output_pattern)
                _output_file = os.path.join(output_dir, _output_file)
                if not os.path.exists(output_dir):
                    os.makedirs(output_dir)
            else:
                raise ValueError("No output specified")
        else:
            _output_file = output_file

        if poor_multiplier is None:
            _multiplier = 0.01
        else:
            _multiplier = poor_multiplier

        if output_field is None:
            _output_field = 'poor_aff'
        else:
            _output_field = output_field

        impact_analysis.calculate_poverty_impact(self, popn_impact_file=_impact_file,
                                                 popn_impact_field=impact_field,
                                                 popn_match_field=_impact_match_field,
                                                 poor_file=poor_file, poor_field=poor_field,
                                                 poor_match_field=_poor_match_field,
                                                 multiplier=_multiplier,
                                                 output_file=_output_file, output_field=_output_field,
                                                 start_date=start_date, end_date=end_date)
        return None
Пример #3
0
    def mask_by_shapefile(self,
                          raster_file,
                          raster_dir,
                          raster_pattern,
                          polygon_file,
                          polygon_dir,
                          polygon_pattern,
                          output_file,
                          output_dir,
                          output_pattern,
                          nodata=False):
        if raster_file is None:
            _file_list = directory_utils.get_matching_files(
                raster_dir, raster_pattern)
            if _file_list:
                _raster_file = _file_list[0]
            else:
                raise ValueError, "No matching raster file found."
        else:
            _raster_file = raster_file

        if polygon_file is None:
            _file_list = directory_utils.get_matching_files(
                polygon_dir, polygon_pattern)
            if _file_list is not None:
                _polygon_file = _file_list[0]
            else:
                raise ValueError, "No matching polygon file or polygon dir/pattern found."
        else:
            _polygon_file = polygon_file

        if output_file is None:
            if output_dir is None:
                raise ValueError, "No output directory provided."
            if output_pattern is None:
                raise ValueError, "No output pattern provided."
            _output_dir = output_dir
            _output_file = os.path.join(
                _output_dir,
                filename_utils.generate_output_filename(
                    os.path.basename(_raster_file), raster_pattern,
                    output_pattern, False))
        else:
            _output_file = output_file
        _gdal_path = self.vp.get('directories', 'gdal_dir')

        raster_utils.mask_by_shapefile(raster_file=_raster_file,
                                       polygon_file=_polygon_file,
                                       output_file=_output_file,
                                       gdal_path=_gdal_path,
                                       nodata=nodata)

        return None
Пример #4
0
    def mask_by_raster(self,
                       raster_file,
                       raster_dir,
                       raster_pattern,
                       boundary_raster,
                       boundary_raster_dir,
                       boundary_raster_pattern,
                       output_file,
                       output_dir,
                       output_pattern,
                       nodata=False):
        if raster_file is None:
            _file_list = directory_utils.get_matching_files(
                raster_dir, raster_pattern)
            if _file_list:
                _raster_file = _file_list[0]
            else:
                raise ValueError, "No matching raster file found."
        else:
            _raster_file = raster_file

        if boundary_raster is None:
            _file_list = directory_utils.get_matching_files(
                boundary_raster_dir, boundary_raster_pattern)
            if _file_list is not None:
                _boundary_raster = _file_list[0]
            else:
                raise ValueError, "No matching raster boundary file or boundary dir/pattern found."
        else:
            _boundary_raster = boundary_raster

        if output_file is None:
            if output_dir is None:
                raise ValueError, "No output directory provided."
            if output_pattern is None:
                raise ValueError, "No output pattern provided."
            _output_dir = output_dir
            _output_file = os.path.join(
                _output_dir,
                filename_utils.generate_output_filename(
                    os.path.basename(_raster_file), raster_pattern,
                    output_pattern, False))
        else:
            _output_file = output_file

        raster_utils.mask_by_raster(raster_file=_raster_file,
                                    mask_file=_boundary_raster,
                                    output_file=_output_file)
        return None
Пример #5
0
    def process(self):
        logger.debug("Mosaic list of rasters")
        _input_dir = None
        _file_pattern = None
        _output_dir = None
        _output_pattern = None
        _mosaic_method = None

        if 'input_dir' in self.params:
            _input_dir = self.params['input_dir']
        else:
            raise BaseTaskImpl.ConfigFileError(
                "No input directory 'input_dir' specified.", None)
        if 'file_pattern' in self.params:
            _file_pattern = self.params['file_pattern']
        else:
            raise BaseTaskImpl.ConfigFileError(
                "No file pattern 'file_pattern' specified.", None)
        if 'output_dir' in self.params:
            _output_dir = self.params['output_dir']
        else:
            raise BaseTaskImpl.ConfigFileError(
                "No output directory 'output_dir' specified.", None)
        if 'output_pattern' in self.params:
            _output_pattern = self.params['output_pattern']
        else:
            raise BaseTaskImpl.ConfigFileError(
                "No output pattern 'output_pattern' specified.", None)
        if 'mosaic_method' in self.params:
            _mosaic_method = self.params['mosaic_method']
        else:
            _mosaic_method = 'MAXIMUM'

        _file_list = directory_utils.get_matching_files(
            _input_dir, _file_pattern)
        if not _file_list:
            raise ValueError, "No matching raster file found."

        _base_name = os.path.basename(_file_list[0])
        _output_file = filename_utils.generate_output_filename(
            _base_name, _file_pattern, _output_pattern, False)

        _file_str = ','.join(map(str, _file_list))
        calculate_statistics.mosaic_rasters(_file_list, _output_dir,
                                            _output_file, _mosaic_method)

        return None
Пример #6
0
    def match_projection(self, master_file, master_dir, master_pattern,
                         slave_file, slave_dir, slave_pattern, output_file,
                         output_dir, output_pattern):
        if master_file is None:
            _file_list = directory_utils.get_matching_files(
                master_dir, master_pattern)
            if _file_list is not None:
                _master_file = _file_list[0]
            else:
                raise ValueError, "No matching master file found."
        else:
            _master_file = master_file

        if slave_file is None:
            _file_list = directory_utils.get_matching_files(
                slave_dir, slave_pattern)
            if _file_list is not None:
                _slave_file = _file_list[0]
            else:
                raise ValueError, "No matching slave file found."
        else:
            _slave_file = slave_file

        if output_file is not None:
            _output_file = output_file
            _output_dir = os.path.dirname(_output_file)
        else:
            if output_dir is None:
                raise ValueError, "No output directory provided."
            if output_pattern is None:
                raise ValueError, "No output pattern provided."
            _output_dir = output_dir
            _output_file = os.path.join(
                _output_dir,
                filename_utils.generate_output_filename(
                    os.path.basename(_slave_file), slave_pattern,
                    output_pattern, False))

        if not os.path.isdir(_output_dir):
            # need to create output dir
            os.makedirs(_output_dir)

        raster_utils.reproject_image_to_master(_master_file, _slave_file,
                                               _output_file)

        return None
Пример #7
0
    def calc_zonal_statistics(self, raster_file, raster_dir, raster_pattern,
                              polygon_file, polygon_dir, polygon_pattern,
                              zone_field, output_dir, output_file,
                              output_pattern):

        if raster_file is None:
            _file_list = directory_utils.get_matching_files(
                raster_dir, raster_pattern)
            if _file_list is not None:
                _raster_file = _file_list[0]
            else:
                raise ValueError, "No matching raster file found."
        else:
            _raster_file = raster_file

        if polygon_file is None:
            _file_list = directory_utils.get_matching_files(
                polygon_dir, polygon_pattern)
            if _file_list is not None:
                _polygon_file = _file_list[0]
            else:
                raise ValueError, "No matching polygon file found."
        else:
            _polygon_file = polygon_file

        if output_file is None:
            if output_dir is None:
                raise ValueError, "No output directory provided."
            if output_pattern is None:
                raise ValueError, "No output pattern provided."
            _output_dir = output_dir
            _output_file = os.path.join(
                _output_dir,
                filename_utils.generate_output_filename(
                    os.path.basename(_raster_file), raster_pattern,
                    output_pattern, False))
        else:
            _output_file = output_file

        calculate_statistics.calc_zonal_statistics(raster_file=_raster_file,
                                                   polygon_file=_polygon_file,
                                                   zone_field=zone_field,
                                                   output_table=_output_file)

        return None
Пример #8
0
def crop_files(base_path,
               output_path,
               bounds,
               tools_path,
               patterns=None,
               overwrite=False,
               nodata=True):
    #    import re
    _fileslist = []
    if not patterns[0]:
        # if no pattern, try all files
        _p = '*'
    else:
        _p = patterns[0]

    _all_files = directory_utils.get_matching_files(base_path, _p)

    for ifl in _all_files:
        _f = os.path.basename(os.path.basename(ifl))
        #        m = re.match(_p, _f)
        _new_filename = filename_utils.generate_output_filename(
            input_filename=_f,
            in_pattern=_p,
            out_pattern=patterns[1],
            ignore_leap_year=False)
        _out_raster = os.path.join(output_path, _new_filename)

        if not os.path.exists(_out_raster) or overwrite == True:
            # crop file here
            logger.debug("Cropping file: %s", ifl)
            if os.path.splitext(ifl)[1] == '.gz':
                # unzip first
                directory_utils.unzip_file_list([ifl])
                ifl = ifl[:-3]  # remove .gz from filename
            clip_raster_to_shp(shpfile=bounds,
                               in_raster=ifl,
                               out_raster=_out_raster,
                               gdal_path=tools_path,
                               nodata=nodata)
            _fileslist.append(_new_filename)
    return _fileslist
Пример #9
0
 def mosaic_tiles(self, files, output_dir, product, overwrite=False):
     logger.info('entering mosaic_tiles')
     #def mosaicTiles(files, output_dir, tools_dir="", overwrite = False, subset=[1,1,0,0,0,0,0,0,0,0,0], ofmt='HDF4Image',
     #                gdal=False, logger = None):
     # use MRTools
     _mrt_path = self.vp.get('directories', 'mrt_dir')
     _list_filename = os.path.join(output_dir, "file_list.txt")
     self._write_mosaic_list(_list_filename, files)
     _new_filename = filename_utils.generate_output_filename(
         input_filename=os.path.basename(files[0]),
         in_pattern=self.vp.get('MODIS', 'modis_tile_pattern'),
         out_pattern=self.vp.get('MODIS', 'modis_mosaic_output_pattern'),
         ignore_leap_year=False)
     _spectral_subset = self.vp.get('MODIS_PRODUCTS', product)
     if not os.path.exists(
             os.path.normpath(os.path.join(output_dir,
                                           _new_filename))) or overwrite:
         self._mosaic_files(
             os.path.normpath(_list_filename),
             os.path.normpath(os.path.join(output_dir, _new_filename)),
             _spectral_subset)
     os.remove(_list_filename)
     logger.info('leaving mosaic_tiles')
     return _new_filename
Пример #10
0
    def calculate_impact_area(self, hazard_raster, hazard_dir, hazard_pattern, threshold, threshold_direction,
                              boundary, b_field, output_file, output_dir, output_pattern, start_date, end_date,
                              hazard_var='vhi'):
        logger.debug("calculate_impact_area with hazard {0}, hazard dir {1}, hazard pattern {2}".format(hazard_raster,
                                                                                                        hazard_dir,
                                                                                                        hazard_pattern))
        if threshold is None:
            # get threshold from VampireDefaults
            _threshold = self.vp.get('hazard_impact', '{0}_threshold'.format(hazard_var))
        else:
            _threshold = threshold
        if threshold_direction is None:
            _threshold_direction = self.vp.get('hazard_impact', '{0}_threshold_direction'.format(hazard_var))
        else:
            _threshold_direction = threshold_direction

        if hazard_raster is None:
            if hazard_pattern is not None:
                _input_files = directory_utils.get_matching_files(hazard_dir, hazard_pattern)
                _hazard_raster = os.path.join(hazard_dir, _input_files[0])
                logger.debug("hazard files: {0}".format(_input_files))
            else:
                raise ValueError("Hazard raster is not specified")
        else:
            _hazard_raster = hazard_raster

        if output_file is None:
            if output_pattern is not None:
#                _input_pattern = self.vp.get('MODIS_VHI', 'vhi_crop_pattern')
                _input_pattern = self.vp.get('hazard_impact', '{0}_input_pattern'.format(hazard_var))
                _output_file = filename_utils.generate_output_filename(os.path.basename(_hazard_raster),
                                                                       _input_pattern, output_pattern)
                _output_file = os.path.join(output_dir, _output_file)
                if not os.path.exists(output_dir):
                    os.makedirs(output_dir)
            else:
                raise ValueError("No output specified")
        else:
            _output_file = output_file
        logger.debug("Output file: {0}".format(_output_file))

        if _threshold == '':
            _reclass_raster = _hazard_raster
        else:
            if _threshold_direction == '':
                _threshold_direction = 'LESS_THAN'
            # reclassify hazard raster to generate mask of all <= threshold
            _reclass_raster = os.path.join(os.path.dirname(_output_file), 'hazard_area_reclass.tif')
            impact_analysis.reclassify_raster(raster=_hazard_raster, threshold=_threshold,
                                              threshold_direction=_threshold_direction, output_raster=_reclass_raster)

        # calculate impact on boundary
        stats = calculate_statistics.calc_zonal_statistics(raster_file=_reclass_raster, polygon_file=boundary,
                                                           zone_field=b_field, output_table=_output_file)
        # convert to hectares
        # TODO: get multiplier from defaults depending on resolution of hazard raster
        _multiplier = float(self.vp.get('hazard_impact', '{0}_area_multiplier'.format(hazard_var)))
#        csv_utils.calc_field(table_name=_output_file, new_field='area_aff', cal_field='COUNT', multiplier=_multiplier)
        csv_utils.calc_field(table_name=_output_file, new_field='area_aff', cal_field='SUM', multiplier=_multiplier)
        # add start and end date fields and set values
        csv_utils.add_field(table_name=_output_file, new_field='start_date', value=start_date)
        csv_utils.add_field(table_name=_output_file, new_field='end_date', value=end_date)

        csv_utils.copy_field(table_name=_output_file, new_field='kabupaten_id', copy_field=b_field)
        return None
Пример #11
0
                _lst_min_filename = files_list[0]
            except IndexError, e:
                raise ValueError(
                    'Cannot find matching LST long-term minimum file in directory'
                )

        if dst_filename is None:
            # get new filename from directory and pattern
            if cur_pattern is None:
                _cur_pattern = self.vp.get('MODIS_LST', 'lst_regional_pattern')
            else:
                _cur_pattern = cur_pattern
            _dst_filename = os.path.join(
                dst_dir,
                filename_utils.generate_output_filename(
                    os.path.split(_cur_filename)[1], _cur_pattern,
                    dst_pattern))
        else:
            _dst_filename = dst_filename
        if not os.path.isdir(dst_dir):
            # make directory if not existing
            os.makedirs(dst_dir)

        temperature_analysis.calc_TCI(cur_filename=_cur_filename,
                                      lta_max_filename=_lst_max_filename,
                                      lta_min_filename=_lst_min_filename,
                                      dst_filename=_dst_filename)
        if _temp_file is not None:
            os.remove(os.path.join(_dst_dir, _temp_file))
        logger.info('leaving calc_tci')
        return None
Пример #12
0
    def calculate_impact_popn(self, hazard_raster, hazard_dir, hazard_pattern, threshold,
                              population_raster, boundary, b_field, output_file,
                              output_dir, output_pattern, start_date, end_date, threshold_direction, hazard_var='vhi'):
        if threshold is None:
            # get threshold from VampireDefaults
            _threshold = self.vp.get('hazard_impact', '{0}_threshold'.format(hazard_var))
        else:
            _threshold = threshold

        if threshold_direction is None:
            _threshold_direction = self.vp.get('hazard_impact', '{0}_threshold_direction'.format(hazard_var))
        else:
            _threshold_direction = threshold_direction

        if hazard_raster is None:
            if hazard_pattern is not None:
                _input_files = directory_utils.get_matching_files(hazard_dir, hazard_pattern)
                _hazard_raster = os.path.join(hazard_dir, _input_files[0])
            else:
                raise ValueError("Hazard raster is not specified")
        else:
            _hazard_raster = hazard_raster

        if output_file is None:
            if output_pattern is not None:
                _input_pattern = self.vp.get('hazard_impact', '{0}_input_pattern'.format(hazard_var))
#                _input_pattern = self.vp.get('MODIS_VHI', 'vhi_crop_pattern')
                _output_file = filename_utils.generate_output_filename(os.path.basename(_hazard_raster),
                                                                       _input_pattern, output_pattern)
                _output_file = os.path.join(output_dir, _output_file)
                if not os.path.exists(output_dir):
                    os.makedirs(output_dir)
            else:
                raise ValueError("No output specified")
        else:
            _output_file = output_file

        if _threshold == '':
            _reclass_raster = _hazard_raster
        else:
            if _threshold_direction == '':
                _threshold_direction = 'LESS_THAN'
            # reclassify hazard raster to generate mask of all <= threshold
            _reclass_raster = os.path.join(os.path.dirname(_output_file), 'hazard_popn_reclass.tif')
            impact_analysis.reclassify_raster(raster=_hazard_raster, threshold=_threshold, output_raster=_reclass_raster,
                                              threshold_direction=_threshold_direction)

        if population_raster is None:
            _hazard_raster = _reclass_raster
        else:
            # calculate population from hazard raster and population raster intersection
            _hazard_raster = os.path.join(os.path.dirname(_output_file), 'hazard_popn.tif')
            impact_analysis.create_mask(raster=population_raster, mask=_reclass_raster, output_raster=_hazard_raster)
#            impact_analysis.multiply_by_mask(raster=population_raster, mask=_reclass_raster,
#                                             output_raster=_hazard_raster)
        # calculate impact on boundary
        calculate_statistics.calc_zonal_statistics(raster_file=_hazard_raster, polygon_file=boundary,
                                                   zone_field=b_field, output_table=_output_file)

        # add field to table and calculate total for each area
        if population_raster is None:
            csv_utils.calc_field(table_name=_output_file, new_field='popn_aff', cal_field='COUNT', type='LONG')
        else:
            csv_utils.calc_field(table_name=_output_file, new_field='popn_aff', cal_field='SUM', type='LONG')

        # add start and end date fields and set values
        csv_utils.add_field(table_name=_output_file, new_field='start_date', value=start_date)
        csv_utils.add_field(table_name=_output_file, new_field='end_date', value=end_date)
        csv_utils.copy_field(table_name=_output_file, new_field='kabupaten_id', copy_field=b_field)

        return None
Пример #13
0
    def accumulate_data(self,
                        data_dir=None,
                        data_pattern=None,
                        output_dir=None,
                        output_pattern=None,
                        num_days=None,
                        dates=None,
                        overwrite=False):
        """ Accumulate GFS precipitation data for given interval and accumulate to given number of days if specified

        Download GFS precipitation data for given interval. Will download all available data unless start
        and/or end dates are provided.

        Parameters
        ----------
        output_dir : str
            Filename of raster file
        interval : str
            Filename of vector file
        dates : str
            Name of field labelling the zones within vector file
        start_date : str
            Filename of output table (.dbf or .csv)
        end_date : str
            F
        overwrite : boolean

        Returns
        -------
        None
            Returns None

        """
        logger.info('entering accumulate_data')
        files_list = []
        _dates = []
        if dates is None:
            # get date from data directory name
            _dir_s = os.path.basename(data_dir)[:-2]
            _date = datetime.datetime.strptime(_dir_s, '%Y%m%d')
            _dates.append(_date)
        else:
            _dates = dates

        for d in _dates:
            # find all files
            _allfiles = directory_utils.get_matching_files(
                data_dir, data_pattern)
            _allfiles.sort()
            _cur_day = 0
            _cur_hr = 0
            _cur_accum_str = ''
            _accum_window_start = 0
            _accum_window_end = 0
            _day_ptrs = []

            _pattern = re.compile(data_pattern)

            for idx, fname in enumerate(_allfiles):
                # get forecast hour from filename
                _base_fname = os.path.basename(fname)
                _result = _pattern.match(_base_fname)
                if _result:
                    _forecast_hr = _result.group('forecast_hr')
                    if int(_forecast_hr) % 24 == 0:
                        # end of day
                        _cur_day = _cur_day + 1
                        _day_ptrs.append(idx)
                        if _cur_day >= num_days:
                            # accumulate last num_days
                            _output_pattern = output_pattern.replace(
                                '{forecast_day}', ''.join(
                                    map(
                                        str,
                                        range(_cur_day - num_days + 1,
                                              _cur_day + 1))))
                            _output_pattern = _output_pattern.replace(
                                '{year}', '{0}'.format(d.year))
                            _output_pattern = _output_pattern.replace(
                                '{month}', '{0:0>2}'.format(d.month))
                            _output_pattern = _output_pattern.replace(
                                '{day}', '{0:0>2}'.format(d.day))
                            _new_filename = filename_utils.generate_output_filename(
                                _base_fname, data_pattern, _output_pattern)
                            if not os.path.exists(
                                    os.path.join(output_dir,
                                                 _new_filename)) or overwrite:
                                self._accumulate_data(
                                    _allfiles[
                                        _accum_window_start:_accum_window_end +
                                        1], output_dir, _new_filename)
                            _accum_window_start = _day_ptrs[_cur_day -
                                                            num_days] + 1
                            files_list.append(_new_filename)
                    _accum_window_end = _accum_window_end + 1

        return files_list
Пример #14
0
            _threshold_filename = threshold_filename
        if dst_dir is not None and not os.path.isdir(dst_dir):
            # destination directory does not exist, create it first
            os.makedirs(dst_dir)
        if output_value is None:
            _output_value = 1
        else:
            _output_value = output_value

        for f in _forecast_filenames:
            # calculate flood forecast for each file
            if dst_filename is None:
                # TODO this doesn't work for flood alert file names!! Not appropriate

                _dst_pattern = dst_pattern.replace(
                    '{num_years}', '{0:0>2}yrs'.format(num_years))
                # get new filename from directory and pattern
                _dst_filename = os.path.join(
                    dst_dir,
                    filename_utils.generate_output_filename(
                        os.path.split(f)[1], forecast_pattern, _dst_pattern))
            else:
                _dst_filename = dst_filename
            precipitation_analysis.calc_flood_alert(
                forecast_filename=f,
                threshold_filename=_threshold_filename,
                dst_filename=_dst_filename,
                value=_output_value)
        logger.info('leaving calc_flood_alert')
        return None
    def calc_days_since_last_rainfall(self, data_dir, data_pattern, dst_dir,
                                      start_date, threshold, max_days):
        logger.info('entering calc_days_since_last_rainfall')
        # get list of files from start_date back max_days
        files_list = directory_utils.get_matching_files(data_dir, data_pattern)
        raster_list = []
        _r_in = regex.compile(data_pattern)
        for f in files_list:
            _m = _r_in.match(os.path.basename(f))
            max_date = start_date - datetime.timedelta(days=max_days)
            f_date = datetime.date(int(_m.group('year')),
                                   int(_m.group('month')),
                                   int(_m.group('day')))
            if max_date <= f_date < start_date:
                raster_list.append(f)

        if not os.path.exists(dst_dir):
            os.makedirs(dst_dir)

        def replace_closure(subgroup, replacement, m):
            if m.group(subgroup) not in [None, '']:
                start = m.start(subgroup)
                end = m.end(subgroup)
                return m.group()[:start] + replacement + m.group()[end:]

        _ref_file = regex.sub(
            data_pattern,
            functools.partial(replace_closure, 'year',
                              '{0}'.format(start_date.year)),
            os.path.basename(files_list[0]))
        _ref_file = regex.sub(
            data_pattern,
            functools.partial(replace_closure, 'month',
                              '{0:0>2}'.format(start_date.month)), _ref_file)
        _ref_file = regex.sub(
            data_pattern,
            functools.partial(replace_closure, 'day',
                              '{0:0>2}'.format(start_date.day)), _ref_file)
        dslw_file = self.vp.get('Days_Since_Last_Rain',
                                'regional_dslr_output_pattern').replace(
                                    '{max_days}', '{0}d'.format(max_days))
        dslw_file = dslw_file.replace('{threshold}', '{0}mm'.format(threshold))
        dslw_file = os.path.join(
            dst_dir,
            filename_utils.generate_output_filename(_ref_file, data_pattern,
                                                    dslw_file))
        dsld_file = self.vp.get('Days_Since_Last_Rain',
                                'regional_dsld_output_pattern').replace(
                                    '{max_days}', '{0}d'.format(max_days))
        dsld_file = dsld_file.replace('{threshold}', '{0}mm'.format(threshold))
        dsld_file = os.path.join(
            dst_dir,
            filename_utils.generate_output_filename(_ref_file, data_pattern,
                                                    dsld_file))
        num_wet_file = self.vp.get(
            'Days_Since_Last_Rain',
            'regional_wet_accum_output_pattern').replace(
                '{max_days}', '{0}d'.format(max_days))
        num_wet_file = num_wet_file.replace('{threshold}',
                                            '{0}mm'.format(threshold))
        num_wet_file = os.path.join(
            dst_dir,
            filename_utils.generate_output_filename(_ref_file, data_pattern,
                                                    num_wet_file))
        ra_file = self.vp.get('Days_Since_Last_Rain',
                              'regional_accum_output_pattern').replace(
                                  '{max_days}', '{0}d'.format(max_days))
        ra_file = ra_file.replace('{threshold}', '{0}mm'.format(threshold))
        ra_file = os.path.join(
            dst_dir,
            filename_utils.generate_output_filename(_ref_file, data_pattern,
                                                    ra_file))
        _temp_dir = self.vp.get('directories', 'temp_dir')
        if not os.path.exists(_temp_dir):
            os.makedirs(_temp_dir)
        precipitation_analysis.days_since_last_rain(
            raster_list=raster_list,
            dslw_filename=dslw_file,
            dsld_filename=dsld_file,
            num_wet_days_filename=num_wet_file,
            rainfall_accum_filename=ra_file,
            temp_dir=_temp_dir,
            threshold=threshold,
            max_days=max_days)
        logger.info('leaving calc_days_since_last_rainfall')
        return None
                              cur_pattern=None, lta_pattern=None, dst_pattern=None,
                              dst_dir=None
                             ):
        logger.info('entering calc_rainfall_anomaly')
        if cur_filename is None:
            # get filename from pattern and directory
            files_list = directory_utils.get_matching_files(cur_dir, cur_pattern)
            try:
                cur_filename = files_list[0]
            except IndexError, e:
                raise ValueError('Cannot find matching rainfall file in directory')
        if lta_filename is None:
            # get filename from pattern and diretory
            files_list = directory_utils.get_matching_files(lta_dir, lta_pattern)
            try:
                lta_filename = files_list[0]
            except IndexError, e:
                raise ValueError('Cannot find matching long-term average file.')

        if dst_filename is None:
            # get new filename from directory and pattern
            dst_filename = os.path.join(dst_dir, filename_utils.generate_output_filename(
                os.path.split(cur_filename)[1], cur_pattern, dst_pattern))
        if not os.path.exists(dst_dir):
            os.makedirs(dst_dir)
        precipitation_analysis.calc_rainfall_anomaly(cur_filename=cur_filename,
                                                     lta_filename=lta_filename,
                                                     dst_filename=dst_filename)
        logger.info('leaving calc_rainfall_anomaly')
        return None
Пример #17
0
    def calc_longterm_stats(self,
                            input_dir,
                            output_dir,
                            product,
                            interval,
                            country=None,
                            input_pattern=None,
                            output_pattern=None,
                            start_date=None,
                            end_date=None,
                            function_list=None):
        logger.info('entering calc_longterm_stats')
        if output_pattern is None:
            if product == self.vp.get('MODIS', 'vegetation_product'):
                _output_pattern = self.vp.get('MODIS_EVI_Long_Term_Average',
                                              'lta_output_pattern')
            elif product == self.vp.get('MODIS',
                                        'land_surface_temperature_product'):
                _output_pattern = self.vp.get('MODIS_LST_Long_Term_Average',
                                              'lta_output_pattern')
            else:
                raise  # product unknown
        else:
            _output_pattern = output_pattern
        if country is None:
            _country = self.vp.get('vampire_tmp', 'home_country')
        else:
            _country = country
        if input_pattern is None:
            if country == 'Global':
                if product == self.vp.get('MODIS', 'vegetation_product'):
                    _input_pattern = self.vp.get('MODIS', 'evi_pattern')
                elif product == self.vp.get(
                        'MODIS', 'land_surface_temperature_product'):
                    _input_pattern = self.vp.get('MODIS_LST', 'lst_pattern')
                else:
                    raise  # product unknown
            else:
                if product == self.vp.get('MODIS', 'vegetation_product'):
                    _input_pattern = self.vp.get('MODIS_EVI',
                                                 'evi_regional_pattern')
                elif product == self.vp.get(
                        'MODIS', 'land_surface_temperature_product'):
                    _input_pattern = self.vp.get('MODIS_LST',
                                                 'lst_regional_pattern')
                else:
                    raise  # product unknown
        else:
            _input_pattern = input_pattern

        _convert_interval = False
        _new_interval = None
        if interval is not None:
            if interval != self.vp.get('MODIS_PRODUCTS',
                                       '{0}.interval'.format(product)):
                _convert_interval = True
                _new_interval = interval
                _interval = self.vp.get('MODIS_PRODUCTS',
                                        '{0}.interval'.format(product))
            else:
                _interval = interval
        else:
            _interval = self.vp.get('MODIS_PRODUCTS',
                                    '{0}.interval'.format(product))

        # if product is not None:
        #     _interval = self.vampire_tmp.get('MODIS_PRODUCTS', '{0}.interval'.format(product))
        # else:
        #     # use default - monthly
        #     _interval = interval

        _all_files = directory_utils.get_matching_files(
            input_dir, input_pattern)
        _file_list = {}
        _yrs = []
        _doy = []
        for f in _all_files:
            _fname = os.path.basename(f)
            _result = re.match(_input_pattern, _fname)
            if not 'month' in _result.groupdict():
                if 'dayofyear' in _result.groupdict():
                    _dt = datetime.datetime(int(_result.group('year')), 1,1) + \
                          datetime.timedelta(int(_result.group('dayofyear'))-1)
                    _month = _dt.month
                    _day = _dt.day
                else:
                    raise ValueError('No month or day of year in file pattern')
            else:
                _month = int(_result.group('month'))
                _day = int(_result.group('day'))

            _f_date = datetime.date(int(_result.group('year')), _month, _day)
            # TODO: base date should be from start of long-term average data, not necessarily 2000
            _base_date = datetime.date(2000, _month, _day)
            #            _base_name = '{0}-{1}'.format(_result.group('base_name'), _result.group('version'))
            # check if start_date is a datetime object or a string
            if start_date is not None:
                if type(start_date) is datetime.date or type(
                        start_date) is datetime.datetime:
                    if _f_date < start_date.date():
                        break
                else:
                    if _f_date < (dateutil.parser.parse(start_date)).date():
                        break
            else:
                if end_date is not None:
                    if type(end_date) is datetime.date or type(
                            end_date) is datetime.datetime:
                        if _f_date > end_date:
                            break
                    else:
                        if _f_date > (dateutil.parser.parse(end_date)).date():
                            break
#                else:
            _yrs.append(_result.group('year'))
            _doy.append(_f_date.timetuple().tm_yday)
            _file_list.setdefault(_f_date.timetuple().tm_yday, []).append(f)

        _years = set(_yrs)
        _syr = min(_years)  #1981
        _eyr = max(_years)
        _num_yrs = str(int(_eyr) - int(_syr))
        # if need to convert 8-days to 16-days, do the next section to combine files into correct
        # lists. NOTE: This will only work for 8-day to 16-day conversion
        # TODO: make this generic so it works for other conversions.
        if _convert_interval and _new_interval == '16-days':
            _doys = set(_doy)
            _sorted_doy = sorted(_doys)
            _new_doys = []
            _new_file_list = {}
            _new_temp_list = []
            for x in _sorted_doy:
                if (x - 1) % 16 == 0:
                    # yes
                    _new_doys.append(x)
                    _new_file_list.setdefault(x, []).extend(_file_list[x])
                    _new_file_list.setdefault(x, []).extend(_new_temp_list)
                    _new_temp_list = []
                else:
                    _new_temp_list.extend(_file_list[x])
            if _new_temp_list:
                # add last data to first item
                _new_file_list.setdefault(1, []).extend(_new_temp_list)
            _interval = _new_interval
            _file_list = _new_file_list

        _output_pattern = _output_pattern.replace('{yr_range}',
                                                  '{0}-{1}'.format(_syr, _eyr))
        _output_pattern = _output_pattern.replace('{num_yrs}',
                                                  '{0}yrs'.format(_num_yrs))
        _output_pattern = _output_pattern.replace(
            '{subset}', '{0}'.format(_interval.lower()))
        if function_list is None:
            _function_list = ['AVG']
        else:
            _function_list = function_list

        if not os.path.isdir(output_dir):
            # directory doesn't exist....create it first
            os.makedirs(output_dir)
        for d in _file_list:
            fl = _file_list[d]
            newfl = directory_utils.unzip_file_list(fl)
            if len(fl) != 0:
                for func in _function_list:
                    _fn_output_pattern = _output_pattern.replace(
                        '{statistic}', func.lower())
                    newfilename = filename_utils.generate_output_filename(
                        os.path.basename(fl[0]), _input_pattern,
                        _fn_output_pattern)
                    self._calculate_stats(newfl, newfilename, output_dir,
                                          [func])

#         if interval == 'monthly':
#             for m in _months:
#                 # for each month, calculate long term average
#                 if m in _file_list:
#                     fl = _file_list[m]
#                     newfl = vampire_tmp.directory_utils.unzip_file_list(fl)
#                     for func in function_list:
#                         _fn_output_pattern = _output_pattern.replace('{statistic}', func.lower())
#                         newfilename = vampire_tmp.filename_utils.generate_output_filename(fl, _input_pattern, _fn_output_pattern)
# #                    newfilename = '{0}.{1}-{2}.{3}.monthly.{4}yrs'.format(_base_name, _syr, _eyr, m, _numyrs)
#                         self._calculate_stats(newfl, newfilename, output_dir, [func])

        logger.info('leaving calc_longterm_stats')
        return None
Пример #18
0
    def _extract_subset(self,
                        input_dir,
                        output_dir,
                        patterns,
                        subset,
                        subset_name,
                        overwrite=False):
        logger.info('entering _extract_subset')
        _all_files = directory_utils.get_matching_files(input_dir, patterns[0])
        if not _all_files:
            logger.debug(
                'Extracting subset {0}. No files found in {1} with pattern {2}'
                .format(subset_name, input_dir, patterns[0]))
            print 'No files found in ' + input_dir + ', please check directory and try again'
            return -1

        # check output directory exists and create it if not
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)
        new_files = []
        for _ifl in _all_files:
            # generate parameter file
            _nfl = filename_utils.generate_output_filename(
                os.path.basename(_ifl), patterns[0], patterns[1])
            _ofl = os.path.join(output_dir, _nfl)
            _checkfl = "{0}.{1}{2}".format(
                os.path.splitext(_ofl)[0], subset_name,
                os.path.splitext(_ofl)[1])
            if not os.path.exists(_checkfl) or overwrite == True:
                try:
                    src_ds = gdal.Open(_ifl)
                except RuntimeError, e:
                    logger.debug('Unable to open file')
                    return -1
                sds = src_ds.GetSubDatasets()
                #                if logger: logger.debug("Number of bands: %s",src_ds.RasterCount)
                self._convert_to_tiff(_ifl, _ofl,
                                      self.vp.get('directories', 'gdal_dir'))
                if len(sds) == 0:
                    # no subdatasets - rename file to include subset name
                    _new_name = "{0}.{1}{2}".format(
                        os.path.splitext(os.path.basename(_ofl))[0],
                        subset_name,
                        os.path.splitext(_ofl)[1])
                    if not os.path.exists(os.path.join(
                            output_dir, _new_name)) or overwrite:
                        os.rename(_ofl, os.path.join(output_dir, _new_name))
                    if not os.path.exists(
                            os.path.join(
                                output_dir,
                                "{0}.aux.xml".format(_new_name))) or overwrite:
                        os.rename(
                            "{0}.aux.xml".format(_ofl),
                            os.path.join(output_dir,
                                         "{0}.aux.xml".format(_new_name)))
                for idx, sbs in enumerate(sds):
                    #                    if logger: logger.debug("Subdataset: %s", sbs[0])
                    # get subset name (without spaces)
                    _n = (sbs[0].rsplit(':', 1)[1]).replace(' ', '_')
                    _rf = "{0}.{1}{2}".format(
                        os.path.splitext(os.path.basename(_ofl))[0], _n,
                        os.path.splitext(_ofl)[1])
                    _cf = "{0}_{1}{2}".format(
                        os.path.splitext(os.path.basename(_ofl))[0],
                        str(idx + 1).zfill(2),
                        os.path.splitext(_ofl)[1])
                    if not os.path.exists(os.path.join(output_dir,
                                                       _cf)) or overwrite:
                        _cf = "{0}_{1}{2}".format(
                            os.path.splitext(os.path.basename(_ofl))[0],
                            str(idx + 1),
                            os.path.splitext(_ofl)[1])
                    if idx + 1 not in subset:
                        # remove un-needed files (including .aux & .aux.xml)
                        os.remove(os.path.join(output_dir, _cf))
                        _aux_f = os.path.join(output_dir,
                                              "{0}.aux.xml".format(_cf))
                        if os.path.exists(_aux_f):
                            os.remove(_aux_f)
                    else:
                        # keep this file - rename with subset name
                        if os.path.exists(os.path.join(output_dir, _rf)):
                            if overwrite:
                                # file already exists....delete first
                                os.remove(os.path.join(output_dir, _rf))
                                os.rename(os.path.join(output_dir, _cf),
                                          os.path.join(output_dir, _rf))
                            else:
                                # just remove the new file
                                os.remove(os.path.join(output_dir, _cf))
                        else:
                            os.rename(os.path.join(output_dir, _cf),
                                      os.path.join(output_dir, _rf))

                        _aux_f = os.path.join(output_dir,
                                              "{0}.aux.xml".format(_cf))
                        if os.path.exists(_aux_f):
                            if os.path.exists(
                                    os.path.join(output_dir,
                                                 "{0}.aux.xml".format(_rf))):
                                # can't rename, delete first
                                os.remove(
                                    os.path.join(output_dir,
                                                 "{0}.aux.xml".format(_rf)))
                            os.rename(
                                _aux_f,
                                os.path.join(output_dir,
                                             "{0}.aux.xml".format(_rf)))
                        new_files.append(_rf)