def _update_time_series_bounds_from_file_names(self):  # {{{
        """
        Update the start and end years and dates for time series based on the
        years actually available in the list of files.
        """
        # Authors
        # -------
        # Xylar Asay-Davis

        config = self.config
        section = self.section

        requestedStartYear = config.getint(section, 'startYear')
        requestedEndYear = config.getint(section, 'endYear')

        fileNames = sorted(self.inputFiles)
        years, months = get_files_year_month(fileNames,
                                             self.historyStreams,
                                             'timeSeriesStatsMonthlyOutput')

        # search for the start of the first full year
        firstIndex = 0
        while(firstIndex < len(years) and months[firstIndex] != 1):
            firstIndex += 1
        startYear = years[firstIndex]

        # search for the end of the last full year
        lastIndex = len(years)-1
        while(lastIndex >= 0 and months[lastIndex] != 12):
            lastIndex -= 1
        endYear = years[lastIndex]

        if startYear != requestedStartYear or endYear != requestedEndYear:
            print("Warning: {} start and/or end year different from "
                  "requested\n"
                  "requestd: {:04d}-{:04d}\n"
                  "actual:   {:04d}-{:04d}\n".format(section,
                                                     requestedStartYear,
                                                     requestedEndYear,
                                                     startYear,
                                                     endYear))
            config.set(section, 'startYear', str(startYear))
            config.set(section, 'endYear', str(endYear))

            startDate = '{:04d}-01-01_00:00:00'.format(startYear)
            config.set(section, 'startDate', startDate)
            endDate = '{:04d}-12-31_23:59:59'.format(endYear)
            config.set(section, 'endDate', endDate)
        else:
            startDate = config.get(section, 'startDate')
            endDate = config.get(section, 'endDate')

        self.startDate = startDate
        self.endDate = endDate
        self.startYear = startYear
        self.endYear = endYear
    def _create_symlinks(self):  # {{{
        """
        Create symlinks to monthly mean files so they have the expected file
        naming convention for ncclimo.

        Returns
        -------
        symlinkDirectory : str
            The path to the symlinks created for each timeSeriesStatsMonthly
            input file
        """
        # Authors
        # -------
        # Xylar Asay-Davis

        config = self.config

        fileNames = sorted(self.inputFiles)
        years, months = get_files_year_month(fileNames, self.historyStreams,
                                             'timeSeriesStatsMonthlyOutput')

        climatologyBaseDirectory = build_config_full_path(
            config, 'output', 'mpasClimatologySubdirectory')

        symlinkDirectory = '{}/source_symlinks'.format(
            climatologyBaseDirectory)

        make_directories(symlinkDirectory)

        for inFileName, year, month in zip(fileNames, years, months):
            outFileName = '{}/{}.hist.am.timeSeriesStatsMonthly.{:04d}-' \
                '{:02d}-01.nc'.format(symlinkDirectory, self.ncclimoModel,
                                      year, month)

            if not os.path.exists(outFileName):
                os.symlink(inFileName, outFileName)

        return symlinkDirectory
    def _create_symlinks(self):  # {{{
        """
        Create symlinks to monthly mean files so they have the expected file
        naming convention for ncclimo.

        Returns
        -------
        symlinkDirectory : str
            The path to the symlinks created for each timeSeriesStatsMonthly
            input file
        """
        # Authors
        # -------
        # Xylar Asay-Davis

        config = self.config

        fileNames = sorted(self.inputFiles)
        years, months = get_files_year_month(fileNames, self.historyStreams,
                                             self.streamName)

        climatologyOpDirectory = get_climatology_op_directory(config, self.op)

        symlinkDirectory = '{}/source_symlinks'.format(climatologyOpDirectory)

        make_directories(symlinkDirectory)

        for inFileName, year, month in zip(fileNames, years, months):
            outFileName = '{}/{}.hist.am.timeSeriesStatsMonthly.{:04d}-' \
                '{:02d}-01.nc'.format(symlinkDirectory, self.ncclimoModel,
                                      year, month)

            try:
                os.symlink(inFileName, outFileName)
            except OSError:
                pass

        return symlinkDirectory
    def _compute_climatologies_with_xarray(self, inDirectory, outDirectory):
        # {{{
        '''
        Uses xarray to compute seasonal and/or annual climatologies.

        Parameters
        ----------
        inDirectory : str
            The run directory containing timeSeriesStatsMonthly output

        outDirectory : str
            The output directory where climatologies will be written
        '''

        # Authors
        # -------
        # Xylar Asay-Davis

        def _preprocess(ds):
            # drop unused variables during preprocessing because only the
            # variables we want are guaranteed to be in all the files
            return ds[variableList]

        season = self.season
        parentTask = self.parentTask
        variableList = parentTask.variableList[season]

        chunkSize = self.config.getint('input', 'maxChunkSize')

        if season in constants.abrevMonthNames:
            # this is an individual month, so create a climatology from
            # timeSeriesStatsMonthlyOutput

            fileNames = sorted(parentTask.inputFiles)
            years, months = get_files_year_month(
                fileNames, self.historyStreams, 'timeSeriesStatsMonthlyOutput')

            with xarray.open_mfdataset(parentTask.inputFiles,
                                       combine='nested',
                                       concat_dim='Time',
                                       chunks={'nCells': chunkSize},
                                       decode_cf=False,
                                       decode_times=False,
                                       preprocess=_preprocess) as ds:

                ds.coords['year'] = ('Time', years)
                ds.coords['month'] = ('Time', months)
                month = constants.abrevMonthNames.index(season) + 1
                climatologyFileName = parentTask.get_file_name(season)
                self.logger.info('computing climatology {}'.format(
                    os.path.basename(climatologyFileName)))

                ds = ds.where(ds.month == month, drop=True)
                ds = ds.mean(dim='Time')
                ds.compute(num_workers=self.subprocessCount)
                write_netcdf(ds, climatologyFileName)
        else:
            outFileName = parentTask.get_file_name(season=season)
            self.logger.info('computing climatology {}'.format(
                os.path.basename(outFileName)))
            fileNames = []
            weights = []
            for month in constants.monthDictionary[season]:
                monthName = constants.abrevMonthNames[month - 1]
                fileNames.append(parentTask.get_file_name(season=monthName))
                weights.append(constants.daysInMonth[month - 1])

            with xarray.open_mfdataset(fileNames,
                                       concat_dim='weight',
                                       combine='nested',
                                       chunks={'nCells': chunkSize},
                                       decode_cf=False,
                                       decode_times=False,
                                       preprocess=_preprocess) as ds:
                ds.coords['weight'] = ('weight', weights)
                ds = ((ds.weight * ds).sum(dim='weight') /
                      ds.weight.sum(dim='weight'))
                ds.compute(num_workers=self.subprocessCount)
                write_netcdf(ds, outFileName)
    def run_task(self):  # {{{
        """
        Compute the regional-mean time series
        """
        # Authors
        # -------
        # Xylar Asay-Davis

        config = self.config

        self.logger.info("\nCompute time series of regional means...")

        startDate = '{:04d}-01-01_00:00:00'.format(self.startYear)
        endDate = '{:04d}-12-31_23:59:59'.format(self.endYear)

        regionGroup = self.regionGroup
        sectionSuffix = regionGroup[0].upper() + \
            regionGroup[1:].replace(' ', '')
        timeSeriesName = sectionSuffix[0].lower() + sectionSuffix[1:]
        sectionName = 'timeSeries{}'.format(sectionSuffix)

        outputDirectory = '{}/{}/'.format(
            build_config_full_path(config, 'output', 'timeseriesSubdirectory'),
            timeSeriesName)
        try:
            os.makedirs(outputDirectory)
        except OSError:
            pass

        outFileName = '{}/{}_{:04d}-{:04d}.nc'.format(outputDirectory,
                                                      timeSeriesName,
                                                      self.startYear,
                                                      self.endYear)

        inputFiles = sorted(
            self.historyStreams.readpath('timeSeriesStatsMonthlyOutput',
                                         startDate=startDate,
                                         endDate=endDate,
                                         calendar=self.calendar))

        years, months = get_files_year_month(inputFiles, self.historyStreams,
                                             'timeSeriesStatsMonthlyOutput')

        variables = config.getExpression(sectionName, 'variables')

        variableList = [var['mpas'] for var in variables] + \
            ['timeMonthly_avg_layerThickness']

        outputExists = os.path.exists(outFileName)
        outputValid = outputExists
        if outputExists:
            with open_mpas_dataset(fileName=outFileName,
                                   calendar=self.calendar,
                                   timeVariableNames=None,
                                   variableList=None,
                                   startDate=startDate,
                                   endDate=endDate) as dsOut:

                for inIndex in range(dsOut.dims['Time']):

                    mask = numpy.logical_and(
                        dsOut.year[inIndex].values == years,
                        dsOut.month[inIndex].values == months)
                    if numpy.count_nonzero(mask) == 0:
                        outputValid = False
                        break

        if outputValid:
            self.logger.info('  Time series exists -- Done.')
            return

        regionMaskFileName = '{}/depthMasks{}.nc'.format(
            outputDirectory, timeSeriesName)
        dsRegionMask = xarray.open_dataset(regionMaskFileName)
        nRegions = dsRegionMask.sizes['nRegions']
        areaCell = dsRegionMask.areaCell

        datasets = []
        nTime = len(inputFiles)
        for tIndex in range(nTime):
            self.logger.info('  {}/{}'.format(tIndex + 1, nTime))

            dsIn = open_mpas_dataset(fileName=inputFiles[tIndex],
                                     calendar=self.calendar,
                                     variableList=variableList,
                                     startDate=startDate,
                                     endDate=endDate).isel(Time=0)

            layerThickness = dsIn.timeMonthly_avg_layerThickness

            innerDatasets = []
            for regionIndex in range(nRegions):
                self.logger.info('    region: {}'.format(
                    self.regionNames[regionIndex]))
                dsRegion = dsRegionMask.isel(nRegions=regionIndex)
                cellMask = dsRegion.cellMask
                totalArea = dsRegion.totalArea
                depthMask = dsRegion.depthMask.where(cellMask, drop=True)
                localArea = areaCell.where(cellMask, drop=True)
                localThickness = layerThickness.where(cellMask, drop=True)

                volCell = (localArea * localThickness).where(depthMask)
                volCell = volCell.transpose('nCells', 'nVertLevels')
                totalVol = volCell.sum(dim='nVertLevels').sum(dim='nCells')
                self.logger.info('      totalVol (mil. km^3): {}'.format(
                    1e-15 * totalVol.values))

                dsOut = xarray.Dataset()
                dsOut['totalVol'] = totalVol
                dsOut.totalVol.attrs['units'] = 'm^3'

                for var in variables:
                    outName = var['name']
                    self.logger.info('      {}'.format(outName))
                    mpasVarName = var['mpas']
                    timeSeries = dsIn[mpasVarName].where(cellMask, drop=True)
                    units = timeSeries.units
                    description = timeSeries.long_name

                    is3d = 'nVertLevels' in timeSeries.dims
                    if is3d:
                        timeSeries = \
                            (volCell*timeSeries.where(depthMask)).sum(
                                dim='nVertLevels').sum(dim='nCells') / totalVol
                    else:
                        timeSeries = \
                            (localArea*timeSeries).sum(
                                dim='nCells') / totalArea

                    dsOut[outName] = timeSeries
                    dsOut[outName].attrs['units'] = units
                    dsOut[outName].attrs['description'] = description
                    dsOut[outName].attrs['is3d'] = str(is3d)

                innerDatasets.append(dsOut)

            datasets.append(innerDatasets)

        # combine data sets into a single data set
        dsOut = xarray.combine_nested(datasets, ['Time', 'nRegions'])

        dsOut['totalArea'] = dsRegionMask.totalArea
        dsOut.totalArea.attrs['units'] = 'm^2'
        dsOut['zbounds'] = dsRegionMask.zbounds
        dsOut.zbounds.attrs['units'] = 'm'
        dsOut.coords['regionNames'] = dsRegionMask.regionNames
        dsOut.coords['year'] = (('Time'), years)
        dsOut['year'].attrs['units'] = 'years'
        dsOut.coords['month'] = (('Time'), months)
        dsOut['month'].attrs['units'] = 'months'

        write_netcdf(dsOut, outFileName)  # }}}
Пример #6
0
    def _compute_time_series_with_ncrcat(self):
        # {{{
        '''
        Uses ncrcat to extact time series from timeSeriesMonthlyOutput files

        Raises
        ------
        OSError
            If ``ncrcat`` is not in the system path.

        Author
        ------
        Xylar Asay-Davis
        '''

        if find_executable('ncrcat') is None:
            raise OSError('ncrcat not found. Make sure the latest nco '
                          'package is installed: \n'
                          'conda install nco\n'
                          'Note: this presumes use of the conda-forge '
                          'channel.')

        inputFiles = self.inputFiles
        append = False
        if os.path.exists(self.outputFile):
            # make sure all the necessary variables are also present
            with xr.open_dataset(self.outputFile) as ds:
                if ds.sizes['Time'] == 0:
                    updateSubset = False
                else:
                    updateSubset = True
                    for variableName in self.variableList:
                        if variableName not in ds.variables:
                            updateSubset = False
                            break

                if updateSubset:
                    # add only input files wiht times that aren't already in
                    # the output file

                    append = True

                    fileNames = sorted(self.inputFiles)
                    inYears, inMonths = get_files_year_month(
                        fileNames, self.historyStreams,
                        'timeSeriesStatsMonthlyOutput')

                    inYears = numpy.array(inYears)
                    inMonths = numpy.array(inMonths)
                    totalMonths = 12 * inYears + inMonths

                    dates = decode_strings(ds.xtime_startMonthly)

                    lastDate = dates[-1]

                    lastYear = int(lastDate[0:4])
                    lastMonth = int(lastDate[5:7])
                    lastTotalMonths = 12 * lastYear + lastMonth

                    inputFiles = []
                    for index, inputFile in enumerate(fileNames):
                        if totalMonths[index] > lastTotalMonths:
                            inputFiles.append(inputFile)

                    if len(inputFiles) == 0:
                        # nothing to do
                        return
                else:
                    # there is an output file but it has the wrong variables
                    # so we need ot delete it.
                    self.logger.warning('Warning: deleting file {} because '
                                        'it is empty or some variables were '
                                        'missing'.format(self.outputFile))
                    os.remove(self.outputFile)

        variableList = self.variableList + ['xtime_startMonthly',
                                            'xtime_endMonthly']

        args = ['ncrcat', '-4', '--no_tmp_fl',
                '-v', ','.join(variableList)]

        if append:
            args.append('--record_append')

        printCommand = '{} {} ... {} {}'.format(' '.join(args), inputFiles[0],
                                                inputFiles[-1],
                                                self.outputFile)
        args.extend(inputFiles)
        args.append(self.outputFile)

        self.logger.info('running: {}'.format(printCommand))
        for handler in self.logger.handlers:
            handler.flush()

        process = subprocess.Popen(args, stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
        stdout, stderr = process.communicate()

        if stdout:
            stdout = stdout.decode('utf-8')
            for line in stdout.split('\n'):
                self.logger.info(line)
        if stderr:
            stderr = stderr.decode('utf-8')
            for line in stderr.split('\n'):
                self.logger.error(line)

        if process.returncode != 0:
            raise subprocess.CalledProcessError(process.returncode,
                                                ' '.join(args))
Пример #7
0
    def run_task(self):  # {{{
        """
        Compute time series of regional profiles
        """
        # Authors
        # -------
        # Milena Veneziani, Mark Petersen, Phillip J. Wolfram, Xylar Asay-Davis

        self.logger.info("\nCompute time series of regional profiles...")

        startDate = '{:04d}-01-01_00:00:00'.format(self.startYear)
        endDate = '{:04d}-12-31_23:59:59'.format(self.endYear)

        timeSeriesName = self.masksSubtask.regionGroup.replace(' ', '')

        outputDirectory = '{}/{}/'.format(
            build_config_full_path(self.config, 'output',
                                   'timeseriesSubdirectory'),
            timeSeriesName)
        try:
            os.makedirs(outputDirectory)
        except OSError:
            pass

        outputFileName = '{}/regionalProfiles_{}_{:04d}-{:04d}.nc'.format(
            outputDirectory, timeSeriesName, self.startYear, self.endYear)

        inputFiles = sorted(self.historyStreams.readpath(
            'timeSeriesStatsMonthlyOutput', startDate=startDate,
            endDate=endDate, calendar=self.calendar))

        years, months = get_files_year_month(inputFiles,
                                             self.historyStreams,
                                             'timeSeriesStatsMonthlyOutput')

        variableList = [field['mpas'] for field in self.fields]

        outputExists = os.path.exists(outputFileName)
        outputValid = outputExists
        if outputExists:
            with open_mpas_dataset(fileName=outputFileName,
                                   calendar=self.calendar,
                                   timeVariableNames=None,
                                   variableList=None,
                                   startDate=startDate,
                                   endDate=endDate) as dsIn:

                for inIndex in range(dsIn.dims['Time']):

                    mask = np.logical_and(
                        dsIn.year[inIndex].values == years,
                        dsIn.month[inIndex].values == months)
                    if np.count_nonzero(mask) == 0:
                        outputValid = False
                        break

        if outputValid:
            self.logger.info('  Time series exists -- Done.')
            return

        # get areaCell
        restartFileName = \
            self.runStreams.readpath('restart')[0]

        dsRestart = xr.open_dataset(restartFileName)
        dsRestart = dsRestart.isel(Time=0)
        areaCell = dsRestart.areaCell

        nVertLevels = dsRestart.sizes['nVertLevels']

        vertIndex = \
            xr.DataArray.from_dict({'dims': ('nVertLevels',),
                                    'data': np.arange(nVertLevels)})

        vertMask = vertIndex < dsRestart.maxLevelCell

        # get region masks
        regionMaskFileName = self.masksSubtask.maskFileName
        dsRegionMask = xr.open_dataset(regionMaskFileName)

        # figure out the indices of the regions to plot
        regionNames = decode_strings(dsRegionMask.regionNames)

        regionIndices = []
        for regionToPlot in self.regionNames:
            for index, regionName in enumerate(regionNames):
                if regionToPlot == regionName:
                    regionIndices.append(index)
                    break

        # select only those regions we want to plot
        dsRegionMask = dsRegionMask.isel(nRegions=regionIndices)
        cellMasks = dsRegionMask.regionCellMasks
        regionNamesVar = dsRegionMask.regionNames

        totalArea = (cellMasks * areaCell * vertMask).sum('nCells')

        datasets = []
        for timeIndex, fileName in enumerate(inputFiles):

            dsLocal = open_mpas_dataset(
                fileName=fileName,
                calendar=self.calendar,
                variableList=variableList,
                startDate=startDate,
                endDate=endDate)
            dsLocal = dsLocal.isel(Time=0)
            time = dsLocal.Time.values
            date = days_to_datetime(time, calendar=self.calendar)

            self.logger.info('    date: {:04d}-{:02d}'.format(date.year,
                                                              date.month))

            # for each region and variable, compute area-weighted sum and
            # squared sum
            for field in self.fields:
                variableName = field['mpas']
                prefix = field['prefix']
                self.logger.info('      {}'.format(field['titleName']))

                var = dsLocal[variableName].where(vertMask)

                meanName = '{}_mean'.format(prefix)
                dsLocal[meanName] = \
                    (cellMasks * areaCell * var).sum('nCells') / totalArea

                meanSquaredName = '{}_meanSquared'.format(prefix)
                dsLocal[meanSquaredName] = \
                    (cellMasks * areaCell * var**2).sum('nCells') / totalArea

            # drop the original variables
            dsLocal = dsLocal.drop_vars(variableList)

            datasets.append(dsLocal)

        # combine data sets into a single data set
        dsOut = xr.concat(datasets, 'Time')

        dsOut.coords['regionNames'] = regionNamesVar
        dsOut['totalArea'] = totalArea
        dsOut.coords['year'] = (('Time',), years)
        dsOut['year'].attrs['units'] = 'years'
        dsOut.coords['month'] = (('Time',), months)
        dsOut['month'].attrs['units'] = 'months'

        # Note: restart file, not a mesh file because we need refBottomDepth,
        # not in a mesh file
        try:
            restartFile = self.runStreams.readpath('restart')[0]
        except ValueError:
            raise IOError('No MPAS-O restart file found: need at least one '
                          'restart file for plotting time series vs. depth')

        with xr.open_dataset(restartFile) as dsRestart:
            depths = dsRestart.refBottomDepth.values
            z = np.zeros(depths.shape)
            z[0] = -0.5 * depths[0]
            z[1:] = -0.5 * (depths[0:-1] + depths[1:])

        dsOut.coords['z'] = (('nVertLevels',), z)
        dsOut['z'].attrs['units'] = 'meters'

        write_netcdf(dsOut, outputFileName)
    def _compute_moc_time_series_postprocess(self):  # {{{
        '''compute MOC time series as a post-process'''

        # Compute and plot time series of Atlantic MOC at 26.5N (RAPID array)
        self.logger.info('\n  Compute and/or plot post-processed Atlantic MOC '
                         'time series...')
        self.logger.info('   Load data...')

        outputDirectory = build_config_full_path(self.config, 'output',
                                                 'timeseriesSubdirectory')
        try:
            os.makedirs(outputDirectory)
        except OSError:
            pass

        outputFileTseries = '{}/mocTimeSeries.nc'.format(outputDirectory)

        dvEdge, areaCell, refBottomDepth, latCell, nVertLevels, \
            refTopDepth, refLayerThickness = self._load_mesh()

        latAtlantic = self.lat['Atlantic']
        dLat = latAtlantic - 26.5
        indlat26 = np.where(np.abs(dLat) == np.amin(np.abs(dLat)))

        dictRegion = self.dictRegion['Atlantic']
        maxEdgesInTransect = dictRegion['maxEdgesInTransect']
        transectEdgeGlobalIDs = dictRegion['transectEdgeGlobalIDs']
        transectEdgeMaskSigns = dictRegion['transectEdgeMaskSigns']
        regionCellMask = dictRegion['cellMask']

        streamName = 'timeSeriesStatsMonthlyOutput'
        inputFilesTseries = sorted(
            self.historyStreams.readpath(streamName,
                                         startDate=self.startDateTseries,
                                         endDate=self.endDateTseries,
                                         calendar=self.calendar))

        years, months = get_files_year_month(inputFilesTseries,
                                             self.historyStreams,
                                             'timeSeriesStatsMonthlyOutput')

        mocRegion = np.zeros(len(inputFilesTseries))
        times = np.zeros(len(inputFilesTseries))
        computed = np.zeros(len(inputFilesTseries), bool)

        continueOutput = os.path.exists(outputFileTseries)
        if continueOutput:
            self.logger.info('   Read in previously computed MOC time series')
            with open_mpas_dataset(fileName=outputFileTseries,
                                   calendar=self.calendar,
                                   timeVariableNames=None,
                                   variableList=['mocAtlantic26'],
                                   startDate=self.startDateTseries,
                                   endDate=self.endDateTseries) as dsMOCIn:

                dsMOCIn.load()

                # first, copy all computed data
                for inIndex in range(dsMOCIn.dims['Time']):

                    mask = np.logical_and(
                        dsMOCIn.year[inIndex].values == years,
                        dsMOCIn.month[inIndex].values == months)

                    outIndex = np.where(mask)[0][0]

                    mocRegion[outIndex] = dsMOCIn.mocAtlantic26[inIndex]
                    times[outIndex] = dsMOCIn.Time[inIndex]
                    computed[outIndex] = True

                if np.all(computed):
                    # no need to waste time writing out the data set again
                    return dsMOCIn

        for timeIndex, fileName in enumerate(inputFilesTseries):
            if computed[timeIndex]:
                continue

            dsLocal = open_mpas_dataset(fileName=fileName,
                                        calendar=self.calendar,
                                        variableList=self.variableList,
                                        startDate=self.startDateTseries,
                                        endDate=self.endDateTseries)
            dsLocal = dsLocal.isel(Time=0)
            time = dsLocal.Time.values
            times[timeIndex] = time
            date = days_to_datetime(time, calendar=self.calendar)

            self.logger.info('     date: {:04d}-{:02d}'.format(
                date.year, date.month))

            if self.includeBolus:
                dsLocal['avgNormalVelocity'] = \
                    dsLocal['timeMonthly_avg_normalVelocity'] + \
                    dsLocal['timeMonthly_avg_normalGMBolusVelocity']

                dsLocal['avgVertVelocityTop'] = \
                    dsLocal['timeMonthly_avg_vertVelocityTop'] + \
                    dsLocal['timeMonthly_avg_vertGMBolusVelocityTop']
            else:
                # rename some variables for convenience
                dsLocal = dsLocal.rename({
                    'timeMonthly_avg_normalVelocity':
                    'avgNormalVelocity',
                    'timeMonthly_avg_vertVelocityTop':
                    'avgVertVelocityTop'
                })

            horizontalVel = dsLocal.avgNormalVelocity.values
            verticalVel = dsLocal.avgVertVelocityTop.values
            velArea = verticalVel * areaCell[:, np.newaxis]
            transportZ = self._compute_transport(maxEdgesInTransect,
                                                 transectEdgeGlobalIDs,
                                                 transectEdgeMaskSigns,
                                                 nVertLevels, dvEdge,
                                                 refLayerThickness,
                                                 horizontalVel)
            mocTop = self._compute_moc(latAtlantic, nVertLevels, latCell,
                                       regionCellMask, transportZ, velArea)
            mocRegion[timeIndex] = np.amax(mocTop[:, indlat26])

        description = 'Max MOC Atlantic streamfunction nearest to RAPID ' \
            'Array latitude (26.5N)'

        dictonary = {
            'dims': ['Time'],
            'coords': {
                'Time': {
                    'dims': ('Time'),
                    'data': times,
                    'attrs': {
                        'units': 'days since 0001-01-01'
                    }
                },
                'year': {
                    'dims': ('Time'),
                    'data': years,
                    'attrs': {
                        'units': 'year'
                    }
                },
                'month': {
                    'dims': ('Time'),
                    'data': months,
                    'attrs': {
                        'units': 'month'
                    }
                }
            },
            'data_vars': {
                'mocAtlantic26': {
                    'dims': ('Time'),
                    'data': mocRegion,
                    'attrs': {
                        'units': 'Sv (10^6 m^3/s)',
                        'description': description
                    }
                }
            }
        }
        dsMOCTimeSeries = xr.Dataset.from_dict(dictonary)
        write_netcdf(dsMOCTimeSeries, outputFileTseries)

        return dsMOCTimeSeries  # }}}
    def _compute_moc_time_series_analysismember(self):  # {{{
        '''compute MOC time series from analysis member'''

        # Compute and plot time series of Atlantic MOC at 26.5N (RAPID array)
        self.logger.info(
            '\n  Compute Atlantic MOC time series from analysis member...')
        self.logger.info('   Load data...')

        outputDirectory = build_config_full_path(self.config, 'output',
                                                 'timeseriesSubdirectory')
        try:
            os.makedirs(outputDirectory)
        except OSError:
            pass

        outputFileTseries = '{}/mocTimeSeries.nc'.format(outputDirectory)

        streamName = 'timeSeriesStatsMonthlyOutput'

        # Get bin latitudes and index of 26.5N
        binBoundaryMocStreamfunction = None
        # first try timeSeriesStatsMonthly for bin boundaries, then try
        # mocStreamfunctionOutput stream as a backup option
        for streamName in [
                'timeSeriesStatsMonthlyOutput', 'mocStreamfunctionOutput'
        ]:
            try:
                inputFile = self.historyStreams.readpath(streamName)[0]
            except ValueError:
                raise IOError('At least one file from stream {} is needed '
                              'to compute MOC'.format(streamName))

            with xr.open_dataset(inputFile) as ds:
                if 'binBoundaryMocStreamfunction' in ds.data_vars:
                    binBoundaryMocStreamfunction = \
                        ds.binBoundaryMocStreamfunction.values
                    break

        if binBoundaryMocStreamfunction is None:
            raise ValueError('Could not find binBoundaryMocStreamfunction in '
                             'either timeSeriesStatsMonthlyOutput or '
                             'mocStreamfunctionOutput streams')

        binBoundaryMocStreamfunction = np.rad2deg(binBoundaryMocStreamfunction)
        dLat = binBoundaryMocStreamfunction - 26.5
        indlat26 = np.where(np.abs(dLat) == np.amin(np.abs(dLat)))

        inputFilesTseries = sorted(
            self.historyStreams.readpath(streamName,
                                         startDate=self.startDateTseries,
                                         endDate=self.endDateTseries,
                                         calendar=self.calendar))

        years, months = get_files_year_month(inputFilesTseries,
                                             self.historyStreams,
                                             'timeSeriesStatsMonthlyOutput')

        mocRegion = np.zeros(len(inputFilesTseries))
        times = np.zeros(len(inputFilesTseries))
        computed = np.zeros(len(inputFilesTseries), bool)

        continueOutput = os.path.exists(outputFileTseries)
        if continueOutput:
            self.logger.info('   Read in previously computed MOC time series')
            with open_mpas_dataset(fileName=outputFileTseries,
                                   calendar=self.calendar,
                                   timeVariableNames=None,
                                   variableList=['mocAtlantic26'],
                                   startDate=self.startDateTseries,
                                   endDate=self.endDateTseries) as dsMOCIn:

                dsMOCIn.load()

                # first, copy all computed data
                for inIndex in range(dsMOCIn.dims['Time']):

                    mask = np.logical_and(
                        dsMOCIn.year[inIndex].values == years,
                        dsMOCIn.month[inIndex].values == months)

                    outIndex = np.where(mask)[0][0]

                    mocRegion[outIndex] = dsMOCIn.mocAtlantic26[inIndex]
                    times[outIndex] = dsMOCIn.Time[inIndex]
                    computed[outIndex] = True

                if np.all(computed):
                    # no need to waste time writing out the data set again
                    return dsMOCIn

        for timeIndex, fileName in enumerate(inputFilesTseries):
            if computed[timeIndex]:
                continue

            dsLocal = open_mpas_dataset(fileName=fileName,
                                        calendar=self.calendar,
                                        variableList=self.variableList,
                                        startDate=self.startDateTseries,
                                        endDate=self.endDateTseries)
            dsLocal = dsLocal.isel(Time=0)
            time = dsLocal.Time.values
            times[timeIndex] = time
            date = days_to_datetime(time, calendar=self.calendar)

            self.logger.info('     date: {:04d}-{:02d}'.format(
                date.year, date.month))

            # hard-wire region=0 (Atlantic) for now
            indRegion = 0
            mocTop = dsLocal.timeMonthly_avg_mocStreamvalLatAndDepthRegion[
                indRegion, :, :].values
            mocRegion[timeIndex] = np.amax(mocTop[:, indlat26])

        description = 'Max MOC Atlantic streamfunction nearest to RAPID ' \
            'Array latitude (26.5N)'

        dictonary = {
            'dims': ['Time'],
            'coords': {
                'Time': {
                    'dims': ('Time'),
                    'data': times,
                    'attrs': {
                        'units': 'days since 0001-01-01'
                    }
                },
                'year': {
                    'dims': ('Time'),
                    'data': years,
                    'attrs': {
                        'units': 'year'
                    }
                },
                'month': {
                    'dims': ('Time'),
                    'data': months,
                    'attrs': {
                        'units': 'month'
                    }
                }
            },
            'data_vars': {
                'mocAtlantic26': {
                    'dims': ('Time'),
                    'data': mocRegion,
                    'attrs': {
                        'units': 'Sv (10^6 m^3/s)',
                        'description': description
                    }
                }
            }
        }
        dsMOCTimeSeries = xr.Dataset.from_dict(dictonary)
        write_netcdf(dsMOCTimeSeries, outputFileTseries)

        return dsMOCTimeSeries  # }}}
    def run_task(self):  # {{{
        """
        Computes time-series of transport through transects.
        """
        # Authors
        # -------
        # Xylar Asay-Davis, Stephen Price

        self.logger.info("Computing time series of transport through "
                         "transects...")

        config = self.config

        startDate = '{:04d}-01-01_00:00:00'.format(self.startYear)
        endDate = '{:04d}-12-31_23:59:59'.format(self.endYear)

        outputDirectory = '{}/transport/'.format(
            build_config_full_path(config, 'output', 'timeseriesSubdirectory'))
        try:
            os.makedirs(outputDirectory)
        except OSError:
            pass

        outFileName = '{}/transport_{:04d}-{:04d}.nc'.format(
            outputDirectory, self.startYear, self.endYear)

        inputFiles = sorted(
            self.historyStreams.readpath('timeSeriesStatsMonthlyOutput',
                                         startDate=startDate,
                                         endDate=endDate,
                                         calendar=self.calendar))

        years, months = get_files_year_month(inputFiles, self.historyStreams,
                                             'timeSeriesStatsMonthlyOutput')

        variableList = ['timeMonthly_avg_layerThickness']
        with open_mpas_dataset(fileName=inputFiles[0],
                               calendar=self.calendar,
                               startDate=startDate,
                               endDate=endDate) as dsIn:
            if 'timeMonthly_avg_normalTransportVelocity' in dsIn:
                variableList.append('timeMonthly_avg_normalTransportVelocity')
            elif 'timeMonthly_avg_normalGMBolusVelocity' in dsIn:
                variableList = variableList + \
                    ['timeMonthly_avg_normalVelocity',
                     'timeMonthly_avg_normalGMBolusVelocity']
            else:
                self.logger.warning('Cannot compute transport velocity. '
                                    'Using advection velocity.')
                variableList.append('timeMonthly_avg_normalVelocity')

        outputExists = os.path.exists(outFileName)
        outputValid = outputExists
        if outputExists:
            with open_mpas_dataset(fileName=outFileName,
                                   calendar=self.calendar,
                                   timeVariableNames=None,
                                   variableList=None,
                                   startDate=startDate,
                                   endDate=endDate) as dsOut:

                for inIndex in range(dsOut.dims['Time']):

                    mask = numpy.logical_and(
                        dsOut.year[inIndex].values == years,
                        dsOut.month[inIndex].values == months)
                    if numpy.count_nonzero(mask) == 0:
                        outputValid = False
                        break

        if outputValid:
            self.logger.info('  Time series exists -- Done.')
            return

        transectMaskFileName = self.masksSubtask.maskFileName

        dsTransectMask = xarray.open_dataset(transectMaskFileName)

        # figure out the indices of the transects to plot
        maskTransectNames = decode_strings(dsTransectMask.transectNames)

        dsMesh = xarray.open_dataset(self.restartFileName)
        dvEdge = dsMesh.dvEdge
        cellsOnEdge = dsMesh.cellsOnEdge - 1

        timeDatasets = []
        self.logger.info('  Computing transport...')
        for fileName in inputFiles:
            self.logger.info('    input file: {}'.format(fileName))
            dsTimeSlice = open_mpas_dataset(fileName=fileName,
                                            calendar=self.calendar,
                                            variableList=variableList,
                                            startDate=startDate,
                                            endDate=endDate)

            transectDatasets = []
            transectIndices = []
            for transect in self.transectsToPlot:
                self.logger.info('    transect: {}'.format(transect))
                try:
                    transectIndex = maskTransectNames.index(transect)
                except ValueError:
                    self.logger.warning('      Not found in masks. '
                                        'Skipping.')
                    continue
                transectIndices.append(transectIndex)

                # select the current transect
                dsMask = dsTransectMask.isel(nTransects=[transectIndex])
                edgeIndices = dsMask.transectEdgeGlobalIDs - 1
                edgeIndices = edgeIndices.where(edgeIndices >= 0,
                                                drop=True).astype(int)
                edgeSign = dsMask.transectEdgeMaskSigns.isel(
                    nEdges=edgeIndices)

                dsIn = dsTimeSlice.isel(nEdges=edgeIndices)

                dv = dvEdge.isel(nEdges=edgeIndices)
                coe = cellsOnEdge.isel(nEdges=edgeIndices)

                # work on data from simulations
                if 'timeMonthly_avg_normalTransportVelocity' in dsIn:
                    vel = dsIn.timeMonthly_avg_normalTransportVelocity
                elif 'timeMonthly_avg_normalGMBolusVelocity' in dsIn:
                    vel = (dsIn.timeMonthly_avg_normalVelocity +
                           dsIn.timeMonthly_avg_normalGMBolusVelocity)
                else:
                    vel = dsIn.timeMonthly_avg_normalVelocity

                # get layer thickness on edges by averaging adjacent cells
                h = 0.5 * dsIn.timeMonthly_avg_layerThickness.isel(
                    nCells=coe).sum(dim='TWO')

                edgeTransport = edgeSign * vel * h * dv

                # convert from m^3/s to Sv
                transport = (constants.m3ps_to_Sv * edgeTransport.sum(
                    dim=['maxEdgesInTransect', 'nVertLevels']))

                dsOut = xarray.Dataset()
                dsOut['transport'] = transport
                dsOut.transport.attrs['units'] = 'Sv'
                dsOut.transport.attrs['description'] = \
                    'Transport through transects'
                transectDatasets.append(dsOut)

            dsOut = xarray.concat(transectDatasets, 'nTransects')
            timeDatasets.append(dsOut)

        # combine data sets into a single data set
        dsOut = xarray.concat(timeDatasets, 'Time')
        dsOut.coords['transectNames'] = dsTransectMask.transectNames.isel(
            nTransects=transectIndices)
        dsOut.coords['year'] = (('Time'), years)
        dsOut['year'].attrs['units'] = 'years'
        dsOut.coords['month'] = (('Time'), months)
        dsOut['month'].attrs['units'] = 'months'
        write_netcdf(dsOut, outFileName)
Пример #11
0
def update_time_bounds_from_file_names(config, section, componentName):  # {{{
    """
    Update the start and end years and dates for time series, climatologies or
    climate indices based on the years actually available in the list of files.
    """
    # Authors
    # -------
    # Xylar Asay-Davis

    # read parameters from config file
    # the run directory contains the restart files
    runDirectory = build_config_full_path(config, 'input', 'runSubdirectory')
    # if the history directory exists, use it; if not, fall back on
    # runDirectory
    historyDirectory = build_config_full_path(
        config,
        'input',
        '{}HistorySubdirectory'.format(componentName),
        defaultPath=runDirectory)

    errorOnMissing = config.getboolean('input', 'errorOnMissing')

    namelistFileName = build_config_full_path(
        config, 'input', '{}NamelistFileName'.format(componentName))
    try:
        namelist = NameList(namelistFileName)
    except (OSError, IOError):
        # this component likely doesn't have output in this run
        return

    streamsFileName = build_config_full_path(
        config, 'input', '{}StreamsFileName'.format(componentName))
    try:
        historyStreams = StreamsFile(streamsFileName,
                                     streamsdir=historyDirectory)
    except (OSError, IOError):
        # this component likely doesn't have output in this run
        return

    calendar = namelist.get('config_calendar_type')

    requestedStartYear = config.getint(section, 'startYear')
    requestedEndYear = config.get(section, 'endYear')
    if requestedEndYear == 'end':
        requestedEndYear = None
    else:
        # get it again as an integer
        requestedEndYear = config.getint(section, 'endYear')

    startDate = '{:04d}-01-01_00:00:00'.format(requestedStartYear)
    if requestedEndYear is None:
        endDate = None
    else:
        endDate = '{:04d}-12-31_23:59:59'.format(requestedEndYear)

    streamName = 'timeSeriesStatsMonthlyOutput'
    try:
        inputFiles = historyStreams.readpath(streamName,
                                             startDate=startDate,
                                             endDate=endDate,
                                             calendar=calendar)
    except ValueError:
        # this component likely doesn't have output in this run
        return

    if len(inputFiles) == 0:
        raise ValueError('No input files found for stream {} in {} between '
                         '{} and {}'.format(streamName, componentName,
                                            requestedStartYear,
                                            requestedEndYear))

    years, months = get_files_year_month(sorted(inputFiles), historyStreams,
                                         streamName)

    # search for the start of the first full year
    firstIndex = 0
    while (firstIndex < len(years) and months[firstIndex] != 1):
        firstIndex += 1
    startYear = years[firstIndex]

    # search for the end of the last full year
    lastIndex = len(years) - 1
    while (lastIndex >= 0 and months[lastIndex] != 12):
        lastIndex -= 1
    endYear = years[lastIndex]

    if requestedEndYear is None:
        config.set(section, 'endYear', str(endYear))
        requestedEndYear = endYear

    if startYear != requestedStartYear or endYear != requestedEndYear:
        if errorOnMissing:
            raise ValueError(
                "{} start and/or end year different from requested\n"
                "requested: {:04d}-{:04d}\n"
                "actual:   {:04d}-{:04d}\n".format(section, requestedStartYear,
                                                   requestedEndYear, startYear,
                                                   endYear))
        else:
            print("Warning: {} start and/or end year different from "
                  "requested\n"
                  "requested: {:04d}-{:04d}\n"
                  "actual:   {:04d}-{:04d}\n".format(section,
                                                     requestedStartYear,
                                                     requestedEndYear,
                                                     startYear, endYear))
            config.set(section, 'startYear', str(startYear))
            config.set(section, 'endYear', str(endYear))

    startDate = '{:04d}-01-01_00:00:00'.format(startYear)
    config.set(section, 'startDate', startDate)
    endDate = '{:04d}-12-31_23:59:59'.format(endYear)
    config.set(section, 'endDate', endDate)
    def run_task(self):  # {{{
        '''
        Compute the regional-mean time series
        '''
        # Authors
        # -------
        # Xylar Asay-Davis

        config = self.config

        self.logger.info("\nCompute time series of regional means...")

        startDate = '{:04d}-01-01_00:00:00'.format(self.startYear)
        endDate = '{:04d}-12-31_23:59:59'.format(self.endYear)

        regionGroup = self.regionGroup
        sectionSuffix = regionGroup[0].upper() + \
            regionGroup[1:].replace(' ', '')
        timeSeriesName = sectionSuffix[0].lower() + sectionSuffix[1:]
        sectionName = 'timeSeries{}'.format(sectionSuffix)

        outputDirectory = '{}/{}/'.format(
            build_config_full_path(config, 'output', 'timeseriesSubdirectory'),
            timeSeriesName)
        try:
            os.makedirs(outputDirectory)
        except OSError:
            pass

        outFileName = '{}/{}_{:04d}-{:04d}.nc'.format(outputDirectory,
                                                      timeSeriesName,
                                                      self.startYear,
                                                      self.endYear)

        inputFiles = sorted(
            self.historyStreams.readpath('timeSeriesStatsMonthlyOutput',
                                         startDate=startDate,
                                         endDate=endDate,
                                         calendar=self.calendar))

        years, months = get_files_year_month(inputFiles, self.historyStreams,
                                             'timeSeriesStatsMonthlyOutput')

        variables = config.getExpression(sectionName, 'variables')

        variableList = [var['mpas'] for var in variables] + \
            ['timeMonthly_avg_layerThickness']

        outputExists = os.path.exists(outFileName)
        outputValid = outputExists
        if outputExists:
            with open_mpas_dataset(fileName=outFileName,
                                   calendar=self.calendar,
                                   timeVariableNames=None,
                                   variableList=None,
                                   startDate=startDate,
                                   endDate=endDate) as dsOut:

                for inIndex in range(dsOut.dims['Time']):

                    mask = numpy.logical_and(
                        dsOut.year[inIndex].values == years,
                        dsOut.month[inIndex].values == months)
                    if numpy.count_nonzero(mask) == 0:
                        outputValid = False
                        break

        if outputValid:
            self.logger.info('  Time series exists -- Done.')
            return

        # Load mesh related variables
        try:
            restartFileName = self.runStreams.readpath('restart')[0]
        except ValueError:
            raise IOError('No MPAS-O restart file found: need at least one '
                          'restart file for ocean region time series')

        cellsChunk = 32768
        timeChunk = 1

        datasets = []
        for timeIndex, fileName in enumerate(inputFiles):

            dsTimeSlice = open_mpas_dataset(fileName=fileName,
                                            calendar=self.calendar,
                                            variableList=variableList,
                                            startDate=startDate,
                                            endDate=endDate)
            datasets.append(dsTimeSlice)

        chunk = {'Time': timeChunk, 'nCells': cellsChunk}

        if config.has_option(sectionName, 'zmin'):
            config_zmin = config.getfloat(sectionName, 'zmin')
        else:
            config_zmin = None

        if config.has_option(sectionName, 'zmax'):
            config_zmax = config.getfloat(sectionName, 'zmax')
        else:
            config_zmax = None

        with dask.config.set(schedular='threads',
                             pool=ThreadPool(self.daskThreads)):
            # combine data sets into a single data set
            dsIn = xarray.concat(datasets, 'Time').chunk(chunk)

            chunk = {'nCells': cellsChunk}
            dsRestart = xarray.open_dataset(restartFileName)
            dsRestart = dsRestart.isel(Time=0).chunk(chunk)
            dsIn['areaCell'] = dsRestart.areaCell
            if 'landIceMask' in dsRestart:
                # only the region outside of ice-shelf cavities
                dsIn['openOceanMask'] = dsRestart.landIceMask == 0

            dsIn['zMid'] = compute_zmid(dsRestart.bottomDepth,
                                        dsRestart.maxLevelCell,
                                        dsRestart.layerThickness)

            regionMaskFileName = self.masksSubtask.maskFileName

            dsRegionMask = xarray.open_dataset(regionMaskFileName)

            maskRegionNames = decode_strings(dsRegionMask.regionNames)

            datasets = []
            regionIndices = []
            for regionName in self.regionNames:

                self.logger.info('    region: {}'.format(regionName))
                regionIndex = maskRegionNames.index(regionName)
                regionIndices.append(regionIndex)

                chunk = {'nCells': cellsChunk}
                dsMask = dsRegionMask.isel(nRegions=regionIndex).chunk(chunk)

                cellMask = dsMask.regionCellMasks == 1
                if 'openOceanMask' in dsIn:
                    cellMask = numpy.logical_and(cellMask, dsIn.openOceanMask)
                dsRegion = dsIn.where(cellMask, drop=True)

                totalArea = dsRegion['areaCell'].sum()
                self.logger.info('      totalArea: {} mil. km^2'.format(
                    1e-12 * totalArea.values))

                self.logger.info("Don't worry about the following dask "
                                 "warnings.")
                if config_zmin is None:
                    zmin = dsMask.zmin
                else:
                    zmin = config_zmin

                if config_zmax is None:
                    zmax = dsMask.zmax
                else:
                    zmax = config_zmax

                depthMask = numpy.logical_and(dsRegion.zMid >= zmin,
                                              dsRegion.zMid <= zmax)
                depthMask.compute()
                self.logger.info("Dask warnings should be done.")
                dsRegion['depthMask'] = depthMask

                layerThickness = dsRegion.timeMonthly_avg_layerThickness
                dsRegion['volCell'] = (dsRegion.areaCell *
                                       layerThickness).where(depthMask)
                totalVol = dsRegion.volCell.sum(dim='nVertLevels').sum(
                    dim='nCells')
                totalVol.compute()
                self.logger.info('      totalVol (mil. km^3): {}'.format(
                    1e-15 * totalVol.values))

                dsRegion = dsRegion.transpose('Time', 'nCells', 'nVertLevels')

                dsOut = xarray.Dataset()
                dsOut['totalVol'] = totalVol
                dsOut.totalVol.attrs['units'] = 'm^3'
                dsOut['totalArea'] = totalArea
                dsOut.totalArea.attrs['units'] = 'm^2'
                dsOut['zbounds'] = ('nbounds', [zmin, zmax])
                dsOut.zbounds.attrs['units'] = 'm'

                for var in variables:
                    outName = var['name']
                    self.logger.info('      {}'.format(outName))
                    mpasVarName = var['mpas']
                    timeSeries = dsRegion[mpasVarName]
                    units = timeSeries.units
                    description = timeSeries.long_name

                    is3d = 'nVertLevels' in timeSeries.dims
                    if is3d:
                        timeSeries = \
                            (dsRegion.volCell*timeSeries.where(depthMask)).sum(
                                dim='nVertLevels').sum(dim='nCells') / totalVol
                    else:
                        timeSeries = \
                            (dsRegion.areaCell*timeSeries).sum(
                                dim='nCells') / totalArea

                    timeSeries.compute()

                    dsOut[outName] = timeSeries
                    dsOut[outName].attrs['units'] = units
                    dsOut[outName].attrs['description'] = description
                    dsOut[outName].attrs['is3d'] = str(is3d)

                datasets.append(dsOut)

            # combine data sets into a single data set
            dsOut = xarray.concat(datasets, 'nRegions')

            dsOut.coords['regionNames'] = dsRegionMask.regionNames.isel(
                nRegions=regionIndices)
            dsOut.coords['year'] = (('Time'), years)
            dsOut['year'].attrs['units'] = 'years'
            dsOut.coords['month'] = (('Time'), months)
            dsOut['month'].attrs['units'] = 'months'

            write_netcdf(dsOut, outFileName)