Exemplo n.º 1
0
    def test_subset_variables(self):
        fileName = str(self.datadir.join('example_jan.nc'))
        timestr = ['xtime_start', 'xtime_end']
        varList = ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature']

        # first, test loading the whole data set and then calling
        # subset_variables explicitly
        ds = xr.open_mfdataset(
            fileName,
            preprocess=lambda x: mpas_xarray.preprocess_mpas(x,
                                                             timestr=timestr,
                                                             yearoffset=1850))
        ds = mpas_xarray.subset_variables(ds, varList)
        self.assertEqual(sorted(ds.data_vars.keys()), sorted(varList))
        self.assertEqual(pd.Timestamp(ds.Time.values[0]),
                         pd.Timestamp('1855-01-16 12:22:30'))

        # next, test the same with the onlyvars argument
        ds = xr.open_mfdataset(
            fileName,
            preprocess=lambda x: mpas_xarray.preprocess_mpas(x,
                                                             timestr=timestr,
                                                             onlyvars=varList,
                                                             yearoffset=1850))
        self.assertEqual(ds.data_vars.keys(), varList)
Exemplo n.º 2
0
    def test_subset_variables(self):
        fileName = str(self.datadir.join('example_jan.nc'))
        timestr = ['xtime_start', 'xtime_end']
        varList = ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature']

        # first, test loading the whole data set and then calling
        # subset_variables explicitly
        ds = xr.open_mfdataset(
            fileName,
            preprocess=lambda x: mpas_xarray.preprocess_mpas(
                x, timestr=timestr, yearoffset=1850))
        ds = mpas_xarray.subset_variables(ds, varList)
        self.assertEqual(sorted(ds.data_vars.keys()), sorted(varList))
        self.assertEqual(pd.Timestamp(ds.Time.values[0]),
                         pd.Timestamp('1855-01-16 12:22:30'))

        # next, test the same with the onlyvars argument
        ds = xr.open_mfdataset(
            fileName,
            preprocess=lambda x: mpas_xarray.preprocess_mpas(
                x, timestr=timestr, onlyvars=varList, yearoffset=1850))
        self.assertEqual(ds.data_vars.keys(), varList)

        with self.assertRaisesRegexp(AssertionError,
                                     'Empty dataset is returned.'):
            missingvars = ['foo', 'bar']
            ds = xr.open_mfdataset(
                fileName,
                preprocess=lambda x: mpas_xarray.preprocess_mpas(
                    x, timestr=timestr, onlyvars=missingvars, yearoffset=1850))
Exemplo n.º 3
0
    def test_subset_variables(self):
        fileName = str(self.datadir.join('example_jan.nc'))
        calendar = 'gregorian_noleap'
        timestr = ['xtime_start', 'xtime_end']
        variableList = \
            ['time_avg_avgValueWithinOceanRegion_avgSurfaceTemperature']

        # first, test loading the whole data set and then calling
        # subset_variables explicitly
        ds = mpas_xarray.open_multifile_dataset(fileNames=fileName,
                                                calendar=calendar,
                                                timeVariableName=timestr)
        ds = mpas_xarray.subset_variables(ds, variableList)
        dsVarList = list(ds.data_vars.keys()) + list(ds.coords.keys())
        assert (numpy.all([var in dsVarList for var in variableList]))
        self.assertEqual(
            days_to_datetime(days=ds.Time.values,
                             referenceDate='0001-01-01',
                             calendar=calendar),
            string_to_datetime('0005-01-16 12:22:30'))
        # next, test the same with the onlyvars argument
        ds = mpas_xarray.open_multifile_dataset(fileNames=fileName,
                                                calendar=calendar,
                                                timeVariableName=timestr,
                                                variableList=variableList)
        self.assertEqual(list(ds.data_vars.keys()), variableList)

        with six.assertRaisesRegex(self, ValueError,
                                   'Empty dataset is returned.'):
            missingvars = ['foo', 'bar']
            ds = mpas_xarray.open_multifile_dataset(fileNames=fileName,
                                                    calendar=calendar,
                                                    timeVariableName=timestr,
                                                    variableList=missingvars)
Exemplo n.º 4
0
    def _compute_area_vol(self):  # {{{
        '''
        Compute part of the time series of sea ice volume and area, given time
        indices to process.
        '''

        outFileNames = {}
        for hemisphere in ['NH', 'SH']:
            baseDirectory = build_config_full_path(self.config, 'output',
                                                   'timeSeriesSubdirectory')

            make_directories(baseDirectory)

            outFileName = '{}/seaIceAreaVol{}.nc'.format(
                baseDirectory, hemisphere)
            outFileNames[hemisphere] = outFileName

        dsTimeSeries = {}
        dsMesh = xr.open_dataset(self.restartFileName)
        dsMesh = subset_variables(dsMesh, variableList=['latCell', 'areaCell'])
        # Load data
        ds = open_mpas_dataset(fileName=self.inputFile,
                               calendar=self.calendar,
                               variableList=self.variableList,
                               startDate=self.startDate,
                               endDate=self.endDate)

        for hemisphere in ['NH', 'SH']:

            if hemisphere == 'NH':
                mask = dsMesh.latCell > 0
            else:
                mask = dsMesh.latCell < 0

            dsAreaSum = (ds.where(mask) * dsMesh.areaCell).sum('nCells')
            dsAreaSum = dsAreaSum.rename({
                'timeMonthly_avg_iceAreaCell':
                'iceArea',
                'timeMonthly_avg_iceVolumeCell':
                'iceVolume'
            })
            dsAreaSum['iceThickness'] = (dsAreaSum.iceVolume /
                                         dsMesh.areaCell.sum('nCells'))

            dsAreaSum['iceArea'].attrs['units'] = 'm$^2$'
            dsAreaSum['iceArea'].attrs['description'] = \
                'Total {} sea ice area'.format(hemisphere)
            dsAreaSum['iceVolume'].attrs['units'] = 'm$^3$'
            dsAreaSum['iceVolume'].attrs['description'] = \
                'Total {} sea ice volume'.format(hemisphere)
            dsAreaSum['iceThickness'].attrs['units'] = 'm'
            dsAreaSum['iceThickness'].attrs['description'] = \
                'Mean {} sea ice volume'.format(hemisphere)

            dsTimeSeries[hemisphere] = dsAreaSum

            write_netcdf(dsAreaSum, outFileNames[hemisphere])

        return dsTimeSeries  # }}}
    def build_observational_dataset(self, fileName):  # {{{
        '''
        read in the data sets for observations, and possibly rename some
        variables and dimensions

        Parameters
        ----------
        fileName : str
            observation file name

        Returns
        -------
        dsObs : ``xarray.Dataset``
            The observational dataset
        '''
        # Authors
        # -------
        # Xylar Asay-Davis, Luke Van Roekel

        # Load Argo observational data
        dsObs = xr.open_dataset(fileName)

        # Rename coordinates to be consistent with other datasets
        dsObs.rename(
            {
                'month': 'calmonth',
                'LATITUDE': 'latCoord',
                'LONGITUDE': 'lonCoord',
                'DEPTH': 'depth'
            },
            inplace=True)
        dsObs.coords['LATITUDE'] = dsObs['latCoord']
        dsObs.coords['LONGITUDE'] = dsObs['lonCoord']
        dsObs.coords['DEPTH'] = dsObs['depth']
        dsObs.coords['month'] = ('Time', np.array(dsObs['calmonth'], int))

        # no meaningful year since this is already a climatology
        dsObs.coords['year'] = ('Time', np.ones(dsObs.dims['Time'], int))
        dsObs = mpas_xarray.subset_variables(dsObs, [self.fieldName, 'month'])

        slices = []
        field = dsObs[self.fieldName]
        for depth in self.depths:
            if depth == 'top':
                slices.append(
                    field.sel(method='nearest', depth=0.).drop('depth'))
            else:
                slices.append(
                    field.sel(method='nearest', depth=depth).drop('depth'))

        depthNames = [str(depth) for depth in self.depths]
        field = xr.concat(slices, dim='depthSlice')

        dsObs = xr.Dataset(data_vars={self.fieldName: field},
                           coords={'depthSlice': depthNames})

        return dsObs  # }}}
    def run_task(self):  # {{{
        '''
        Compute climatologies of melt rates from E3SM/MPAS output

        This function has been overridden to compute  ``zMid`` based on data
        from a restart file for later use in vertically interpolating to
        reference depths.
        '''
        # Authors
        # -------
        # Xylar Asay-Davis

        # first, compute zMid and cell mask from the restart file
        with xr.open_dataset(self.restartFileName) as ds:
            ds = mpas_xarray.subset_variables(ds, ['maxLevelCell',
                                                   'bottomDepth',
                                                   'layerThickness'])
            ds = ds.isel(Time=0)

            self.maxLevelCell = ds.maxLevelCell - 1

            zMid = compute_zmid(ds.bottomDepth, ds.maxLevelCell,
                                ds.layerThickness)

            self.zMid = \
                xr.DataArray.from_dict({'dims': ('nCells', 'nVertLevels'),
                                        'data': zMid})
            ds.close()

        # then, call run from the base class (RemapMpasClimatologySubtask),
        # which will perform the horizontal remapping
        super(ComputeTransectsSubtask, self).run_task()

        obsDatasets = self.obsDatasets.get_observations()

        self.logger.info('Interpolating each transect vertically...')
        # finally, vertically interpolate and write out each transect
        for season in self.seasons:

            remappedFileName = self.get_remapped_file_name(
                    season, comparisonGridName=self.transectCollectionName)

            with xr.open_dataset(remappedFileName) as ds:
                transectNames = list(obsDatasets.keys())
                for transectIndex, transectName in enumerate(transectNames):
                    self.logger.info('  {}'.format(transectName))
                    dsObs = obsDatasets[transectName]
                    outFileName = self.get_remapped_file_name(
                            season, comparisonGridName=transectName)
                    outObsFileName = self.obsDatasets.get_out_file_name(
                            transectName, self.verticalComparisonGridName)
                    self._vertical_interp(ds, transectIndex, dsObs,
                                          outFileName, outObsFileName)
                ds.close()

        for transectName in obsDatasets:
            obsDatasets[transectName].close()
Exemplo n.º 7
0
    def _mask_climatologies(self, season, dsMask):  # {{{
        '''
        For each season, creates a masked version of the climatology

        Parameters
        ----------
        season : str
            The name of the season to be masked

        dsMask : ``xarray.Dataset`` object
            A data set (from the first input file) that can be used to
            determine the mask in MPAS output files.

        Author
        ------
        Xylar Asay-Davis
        '''

        climatologyFileName = self.mpasClimatologyTask.get_file_name(season)

        maskedClimatologyFileName = self.get_masked_file_name(season)

        if not os.path.exists(maskedClimatologyFileName):
            # slice and mask the data set
            climatology = xr.open_dataset(climatologyFileName)
            climatology = mpas_xarray.subset_variables(climatology,
                                                       self.variableList)
            iselValues = {}
            if 'Time' in climatology.dims:
                iselValues['Time'] = 0
            if self.iselValues is not None:
                iselValues.update(self.iselValues)
            # select only Time=0 and possibly only the desired vertical
            # slice
            if len(iselValues.keys()) > 0:
                climatology = climatology.isel(**iselValues)

            # add valid mask as a variable, useful for remapping later
            climatology['validMask'] = \
                xr.DataArray(numpy.ones(climatology.dims['nCells']),
                             dims=['nCells'])
            # mask the data set
            for variableName in self.variableList:
                climatology[variableName] = \
                    climatology[variableName].where(
                        dsMask[variableName] != self._fillValue)

            # customize (if this function has been overridden)
            climatology = self.customize_masked_climatology(
                climatology, season)

            write_netcdf(climatology, maskedClimatologyFileName)
    def build_observational_dataset(self, fileName):  # {{{
        '''
        read in the data sets for observations, and possibly rename some
        variables and dimensions

        Parameters
        ----------
        fileName : str
            observation file name

        Returns
        -------
        dsObs : ``xarray.Dataset``
            The observational dataset
        '''
        # Authors
        # -------
        # Xylar Asay-Davis

        # Load MLD observational data
        dsObs = xr.open_dataset(fileName)

        # Increment month value to be consistent with the model output
        dsObs.iMONTH.values += 1
        # Rename the dimensions to be consistent with other obs. data sets
        dsObs.rename(
            {
                'month': 'calmonth',
                'lat': 'latCoord',
                'lon': 'lonCoord',
                'mld_dt_mean': 'mld'
            },
            inplace=True)
        dsObs.rename({
            'iMONTH': 'Time',
            'iLAT': 'lat',
            'iLON': 'lon'
        },
                     inplace=True)

        # set the coordinates now that the dimensions have the same names
        dsObs.coords['lat'] = dsObs['latCoord']
        dsObs.coords['lon'] = dsObs['lonCoord']
        dsObs.coords['Time'] = dsObs['calmonth']
        dsObs.coords['month'] = ('Time', np.array(dsObs['calmonth'], int))

        # no meaningful year since this is already a climatology
        dsObs.coords['year'] = ('Time', np.ones(dsObs.dims['Time'], int))

        dsObs = mpas_xarray.subset_variables(dsObs, ['mld', 'month'])
        return dsObs  # }}}
    def build_observational_dataset(self, fileName):  # {{{
        '''
        read in the data sets for observations, and possibly rename some
        variables and dimensions

        Parameters
        ----------
        fileName : str
            observation file name

        Returns
        -------
        dsObs : ``xarray.Dataset``
            The observational dataset
        '''
        # Authors
        # -------
        # Xylar Asay-Davis

        # Load MLD observational data
        dsObs = xr.open_dataset(fileName)

        varList = [self.fieldName, 'month', 'year']

        if self.botFieldName is not None:
            varList.append(self.botFieldName)
        dsObs = mpas_xarray.subset_variables(dsObs, varList)

        if self.depths is not None:
            field = dsObs[self.fieldName]
            slices = []
            for depth in self.depths:
                if depth == 'top':
                    slices.append(field.sel(method='nearest', z=0.).drop(
                        'z'))
                elif depth == 'bot':
                    slices.append(dsObs[self.botFieldName])
                else:
                    level = field.sel(method='nearest', z=depth).drop(
                        'z')
                    slices.append(level)

            depthNames = [str(depth) for depth in self.depths]
            field = xr.concat(slices, dim='depthSlice')

            dsObs = xr.Dataset(data_vars={self.fieldName: field},
                               coords={'depthSlice': depthNames})

        return dsObs  # }}}
def compute_mpas_region_masks(geojsonFileName, meshFileName, maskFileName,
                              featureList=None, logger=None, processCount=1,
                              chunkSize=1000, showProgress=True):
    '''
    Build a region mask file from the given MPAS mesh and geojson file defining
    a set of regions.
    '''
    if os.path.exists(maskFileName):
        return

    with xr.open_dataset(meshFileName) as dsMesh:
        dsMesh = mpas_xarray.subset_variables(dsMesh, ['lonCell', 'latCell'])
        latCell = numpy.rad2deg(dsMesh.latCell.values)

        # transform longitudes to [-180, 180)
        lonCell = numpy.mod(numpy.rad2deg(dsMesh.lonCell.values) + 180.,
                            360.) - 180.

    # create shapely geometry for lonCell and latCell
    cellPoints = [shapely.geometry.Point(x, y) for x, y in
                  zip(lonCell, latCell)]

    regionNames, masks, properties, nChar = compute_region_masks(
        geojsonFileName, cellPoints, maskFileName, featureList, logger,
        processCount, chunkSize, showProgress)

    nCells = len(cellPoints)

    # create a new data array for masks and another for mask names
    if logger is not None:
        logger.info('  Creating and writing masks dataset...')
    nRegions = len(regionNames)
    dsMasks = xr.Dataset()
    dsMasks['regionCellMasks'] = (('nRegions', 'nCells'),
                                  numpy.zeros((nRegions, nCells), dtype=bool))
    dsMasks['regionNames'] = (('nRegions'),
                              numpy.zeros((nRegions),
                                          dtype='|S{}'.format(nChar)))

    for index in range(nRegions):
        regionName = regionNames[index]
        mask = masks[index]
        dsMasks['regionCellMasks'][index, :] = mask
        dsMasks['regionNames'][index] = regionName

    for propertyName in properties:
        dsMasks[propertyName] = (('nRegions'), properties[propertyName])

    write_netcdf(dsMasks, maskFileName)
    def run_task(self):  # {{{
        '''
        Compute the requested climatologies
        '''
        # Authors
        # -------
        # Xylar Asay-Davis

        self.logger.info('\nRemapping climatology {}'.format(
            self.climatologyName))

        dsMask = xr.open_dataset(self.mpasClimatologyTask.inputFiles[0])
        dsMask = mpas_xarray.subset_variables(dsMask, self.variableList)
        iselValues = {'Time': 0}
        if self.iselValues is not None:
            iselValues.update(self.iselValues)
        # select only Time=0 and possibly only the desired vertical
        # slice
        dsMask = dsMask.isel(**iselValues)

        for season in self.seasons:
            self._mask_climatologies(season, dsMask)

        for comparisonGridName in self.comparisonDescriptors:

            for season in self.seasons:

                maskedClimatologyFileName = self.get_masked_file_name(
                        season)

                remappedFileName = self.get_remapped_file_name(
                        season, comparisonGridName)

                if not os.path.exists(remappedFileName):
                    self._remap(inFileName=maskedClimatologyFileName,
                                outFileName=remappedFileName,
                                remapper=self.remappers[comparisonGridName],
                                comparisonGridName=comparisonGridName,
                                season=season)
    def run_task(self):  # {{{
        """
        Compute climatologies of melt rates from E3SM/MPAS output

        This function has been overridden to load ``landIceMask`` from a
        restart file for later use in masking the melt rate.  It then simply
        calls the run function from
        """
        # Authors
        # -------
        # Xylar Asay-Davis

        # first, load the land-ice mask from the restart file
        dsLandIceMask = xr.open_dataset(self.restartFileName)
        dsLandIceMask = mpas_xarray.subset_variables(dsLandIceMask,
                                                     ['landIceMask'])
        dsLandIceMask = dsLandIceMask.isel(Time=0)
        self.landIceMask = dsLandIceMask.landIceMask > 0.

        # then, call run from the base class (RemapMpasClimatologySubtask),
        # which will perform the main function of the task
        super(RemapMpasAntarcticMeltClimatology, self).run_task()
Exemplo n.º 13
0
from mpas_analysis.shared.interpolation import Remapper
from mpas_analysis.shared.grid import ProjectionGridDescriptor
from mpas_analysis.shared.mpas_xarray.mpas_xarray import subset_variables
from mpas_analysis.shared.climatology \
    import get_Antarctic_stereographic_comparison_descriptor
from mpas_analysis.configuration.MpasAnalysisConfigParser \
    import MpasAnalysisConfigParser

inFileName = '/media/xylar/extra_data/data_overflow/observations/Antarctica/' \
             'Rignot_et_al._2013/Ant_MeltingRate.nc'

config = MpasAnalysisConfigParser()
config.read('config.default')

ds = xarray.open_dataset(inFileName)
ds = subset_variables(ds, ['melt_actual', 'xaxis', 'yaxis'])
lx = numpy.abs(1e-3 * (ds.xaxis.values[-1] - ds.xaxis.values[0]))
ly = numpy.abs(1e-3 * (ds.yaxis.values[-1] - ds.yaxis.values[0]))

maskedMeltRate = numpy.ma.masked_array(ds.melt_actual,
                                       mask=(ds.melt_actual.values == 0.))

ds['meltRate'] = xarray.DataArray(maskedMeltRate,
                                  dims=ds.melt_actual.dims,
                                  coords=ds.melt_actual.coords,
                                  attrs=ds.melt_actual.attrs)

ds = ds.drop('melt_actual')

inGridName = '{}x{}km_1.0km_Antarctic_stereo'.format(lx, ly)
Exemplo n.º 14
0
    def run_task(self):  # {{{
        """
        Compute climatologies of T or S  from ACME/MPAS output

        This function has been overridden to load ``maxLevelCell`` from a
        restart file for later use in indexing bottom T and S.
        ``verticalIndex`` is also computed for later indexing of
        the model level. It then simply calls the run function from
        ClimatologyMapOcean.
        """
        # Authors
        # -------
        # Xylar Asay-Davis

        # first, load the land-ice mask from the restart file
        ds = xr.open_dataset(self.restartFileName)
        ds = mpas_xarray.subset_variables(
            ds, ['maxLevelCell', 'bottomDepth', 'layerThickness'])
        ds = ds.isel(Time=0)

        self.maxLevelCell = ds.maxLevelCell - 1

        depthNames = [str(depth) for depth in self.depths]

        zMid = compute_zmid(ds.bottomDepth, ds.maxLevelCell, ds.layerThickness)

        nVertLevels = zMid.shape[1]
        zMid.coords['verticalIndex'] = \
            ('nVertLevels',
             np.arange(nVertLevels))

        zTop = zMid.isel(nVertLevels=0)
        # Each vertical layer has at most one non-NaN value so the "sum"
        # over the vertical is used to collapse the array in the vertical
        # dimension
        zBot = zMid.where(zMid.verticalIndex == self.maxLevelCell).sum(
            dim='nVertLevels')

        verticalIndices = np.zeros((len(self.depths), ds.dims['nCells']), int)

        mask = np.zeros(verticalIndices.shape, bool)

        for depthIndex, depth in enumerate(self.depths):
            depth = self.depths[depthIndex]
            if depth == 'top':
                # switch to zero-based index
                verticalIndices[depthIndex, :] = 0
                mask[depthIndex, :] = self.maxLevelCell.values >= 0
            elif depth == 'bot':
                # switch to zero-based index
                verticalIndices[depthIndex, :] = self.maxLevelCell.values
                mask[depthIndex, :] = self.maxLevelCell.values >= 0
            else:

                verticalIndex = np.argmin(np.abs(zMid - depth), axis=1)

                verticalIndices[depthIndex, :] = verticalIndex.values
                mask[depthIndex, :] = np.logical_and(depth <= zTop,
                                                     depth >= zBot).values

        self.verticalIndices = \
            xr.DataArray.from_dict({'dims': ('depthSlice', 'nCells'),
                                    'coords': {'depthSlice':
                                               {'dims': ('depthSlice',),
                                                'data': depthNames}},
                                    'data': verticalIndices})
        self.verticalIndexMask = \
            xr.DataArray.from_dict({'dims': ('depthSlice', 'nCells'),
                                    'coords': {'depthSlice':
                                               {'dims': ('depthSlice',),
                                                'data': depthNames}},
                                    'data': mask})

        # then, call run from the base class (RemapMpasClimatologySubtask),
        # which will perform the main function of the task
        super(RemapDepthSlicesSubtask, self).run_task()
Exemplo n.º 15
0
    def run_task(self):  # {{{
        '''
        Compute the requested climatologies
        '''
        # Authors
        # -------
        # Xylar Asay-Davis

        if self.maskExists:
            return

        self.logger.info('Creating masks file {}'.format(self.maskFileName))

        with xr.open_dataset(self.restartFileName) as dsRestart:
            dsRestart = mpas_xarray.subset_variables(dsRestart,
                                                     ['lonCell', 'latCell'])
            latCell = numpy.rad2deg(dsRestart.latCell.values)

            # transform longitudes to [-180, 180)
            lonCell = numpy.mod(
                numpy.rad2deg(dsRestart.lonCell.values) + 180., 360.) - 180.

        # create shapely geometry for lonCell and latCell
        cellPoints = [
            shapely.geometry.Point(x, y) for x, y in zip(lonCell, latCell)
        ]

        nCells = len(cellPoints)

        masks = []
        regionNames = []
        nChar = 0
        self.logger.info('  Computing masks from {}...'.format(
            self.geojsonFileName))
        with open(self.geojsonFileName) as f:
            featureData = json.load(f)

        for feature in featureData['features']:
            name = feature['properties']['name']
            if name not in self.featureList:
                continue

            self.logger.info('      {}'.format(name))

            shape = shapely.geometry.shape(feature['geometry'])
            mask = numpy.array([shape.contains(point) for point in cellPoints],
                               dtype=bool)

            nChar = max(nChar, len(name))

            masks.append(mask)
            regionNames.append(name)

        # create a new data array for masks and another for mask names
        self.logger.info('  Creating and writing masks dataset...')
        nRegions = len(regionNames)
        dsMasks = xr.Dataset()
        dsMasks['masks'] = (('nRegions', 'nCells'),
                            numpy.zeros((nRegions, nCells), dtype=bool))
        dsMasks['regionNames'] = (('nRegions'),
                                  numpy.zeros((nRegions),
                                              dtype='|S{}'.format(nChar)))
        for index in range(nRegions):
            regionName = regionNames[index]
            mask = masks[index]
            dsMasks['regionCellMasks'][index, :] = mask
            dsMasks['regionNames'][index] = regionName

        write_netcdf(dsMasks, self.maskFileName)
Exemplo n.º 16
0
def compute_region_masks(geojsonFileName, meshFileName, maskFileName,
                         featureList=None, logger=None, processCount=1,
                         chunkSize=1000, showProgress=True):
    '''
    Build a region mask file from the given mesh and geojson file defining
    a set of regions.
    '''
    if os.path.exists(maskFileName):
        return

    if logger is not None:
        logger.info('Creating masks file {}'.format(maskFileName))

    if featureList is None:
        # get a list of features for use by other tasks (e.g. to determine
        # plot names)
        featureList = get_feature_list(geojsonFileName)

    with xr.open_dataset(meshFileName) as dsMesh:
        dsMesh = mpas_xarray.subset_variables(dsMesh, ['lonCell', 'latCell'])
        latCell = numpy.rad2deg(dsMesh.latCell.values)

        # transform longitudes to [-180, 180)
        lonCell = numpy.mod(numpy.rad2deg(dsMesh.lonCell.values) + 180.,
                            360.) - 180.

    # create shapely geometry for lonCell and latCell
    cellPoints = [shapely.geometry.Point(x, y) for x, y in
                  zip(lonCell, latCell)]

    nCells = len(cellPoints)

    masks = []
    regionNames = []
    nChar = 0
    if logger is not None:
        logger.info('  Computing masks from {}...'.format(geojsonFileName))
    with open(geojsonFileName) as f:
        featureData = json.load(f)

    properties = {}

    for feature in featureData['features']:
        name = feature['properties']['name']
        if name not in featureList:
            continue

        if logger is not None:
            logger.info('      {}'.format(name))

        shape = shapely.geometry.shape(feature['geometry'])
        if processCount == 1:
            mask = _contains(shape, cellPoints)
        else:
            nChunks = int(numpy.ceil(nCells / chunkSize))
            chunks = []
            indices = [0]
            for iChunk in range(nChunks):
                start = iChunk * chunkSize
                end = min((iChunk + 1) * chunkSize, nCells)
                chunks.append(cellPoints[start:end])
                indices.append(end)

            partial_func = partial(_contains, shape)
            pool = Pool(processCount)

            if showProgress:
                widgets = ['  ', progressbar.Percentage(), ' ',
                           progressbar.Bar(), ' ', progressbar.ETA()]
                bar = progressbar.ProgressBar(widgets=widgets,
                                              maxval=nChunks).start()

            mask = numpy.zeros((nCells,), bool)
            for iChunk, maskChunk in \
                    enumerate(pool.imap(partial_func, chunks)):
                mask[indices[iChunk]:indices[iChunk + 1]] = maskChunk
                if showProgress:
                    bar.update(iChunk + 1)
            if showProgress:
                bar.finish()
            pool.terminate()

        nChar = max(nChar, len(name))

        masks.append(mask)
        regionNames.append(name)

        for propertyName in feature['properties']:
            if propertyName not in ['name', 'author', 'tags', 'component',
                                    'object']:
                propertyVal = feature['properties'][propertyName]
                if propertyName in properties:
                    properties[propertyName].append(propertyVal)
                else:
                    properties[propertyName] = [propertyVal]

    # create a new data array for masks and another for mask names
    if logger is not None:
        logger.info('  Creating and writing masks dataset...')
    nRegions = len(regionNames)
    dsMasks = xr.Dataset()
    dsMasks['regionCellMasks'] = (('nRegions', 'nCells'),
                                  numpy.zeros((nRegions, nCells), dtype=bool))
    dsMasks['regionNames'] = (('nRegions'),
                              numpy.zeros((nRegions),
                                          dtype='|S{}'.format(nChar)))

    for index in range(nRegions):
        regionName = regionNames[index]
        mask = masks[index]
        dsMasks['regionCellMasks'][index, :] = mask
        dsMasks['regionNames'][index] = regionName

    for propertyName in properties:
        dsMasks[propertyName] = (('nRegions'), properties[propertyName])

    write_netcdf(dsMasks, maskFileName)
Exemplo n.º 17
0
 def _preprocess(ds):
     # drop unused variables during preprocessing because only the
     # variables we want are guaranteed to be in all the files
     return subset_variables(ds, variableList)