def _update_time_series_bounds_from_file_names(self): # {{{ """ Update the start and end years and dates for time series based on the years actually available in the list of files. """ # Authors # ------- # Xylar Asay-Davis config = self.config section = self.section requestedStartYear = config.getint(section, 'startYear') requestedEndYear = config.getint(section, 'endYear') fileNames = sorted(self.inputFiles) years, months = get_files_year_month(fileNames, self.historyStreams, 'timeSeriesStatsMonthlyOutput') # search for the start of the first full year firstIndex = 0 while(firstIndex < len(years) and months[firstIndex] != 1): firstIndex += 1 startYear = years[firstIndex] # search for the end of the last full year lastIndex = len(years)-1 while(lastIndex >= 0 and months[lastIndex] != 12): lastIndex -= 1 endYear = years[lastIndex] if startYear != requestedStartYear or endYear != requestedEndYear: print("Warning: {} start and/or end year different from " "requested\n" "requestd: {:04d}-{:04d}\n" "actual: {:04d}-{:04d}\n".format(section, requestedStartYear, requestedEndYear, startYear, endYear)) config.set(section, 'startYear', str(startYear)) config.set(section, 'endYear', str(endYear)) startDate = '{:04d}-01-01_00:00:00'.format(startYear) config.set(section, 'startDate', startDate) endDate = '{:04d}-12-31_23:59:59'.format(endYear) config.set(section, 'endDate', endDate) else: startDate = config.get(section, 'startDate') endDate = config.get(section, 'endDate') self.startDate = startDate self.endDate = endDate self.startYear = startYear self.endYear = endYear
def _create_symlinks(self): # {{{ """ Create symlinks to monthly mean files so they have the expected file naming convention for ncclimo. Returns ------- symlinkDirectory : str The path to the symlinks created for each timeSeriesStatsMonthly input file """ # Authors # ------- # Xylar Asay-Davis config = self.config fileNames = sorted(self.inputFiles) years, months = get_files_year_month(fileNames, self.historyStreams, 'timeSeriesStatsMonthlyOutput') climatologyBaseDirectory = build_config_full_path( config, 'output', 'mpasClimatologySubdirectory') symlinkDirectory = '{}/source_symlinks'.format( climatologyBaseDirectory) make_directories(symlinkDirectory) for inFileName, year, month in zip(fileNames, years, months): outFileName = '{}/{}.hist.am.timeSeriesStatsMonthly.{:04d}-' \ '{:02d}-01.nc'.format(symlinkDirectory, self.ncclimoModel, year, month) if not os.path.exists(outFileName): os.symlink(inFileName, outFileName) return symlinkDirectory
def _create_symlinks(self): # {{{ """ Create symlinks to monthly mean files so they have the expected file naming convention for ncclimo. Returns ------- symlinkDirectory : str The path to the symlinks created for each timeSeriesStatsMonthly input file """ # Authors # ------- # Xylar Asay-Davis config = self.config fileNames = sorted(self.inputFiles) years, months = get_files_year_month(fileNames, self.historyStreams, self.streamName) climatologyOpDirectory = get_climatology_op_directory(config, self.op) symlinkDirectory = '{}/source_symlinks'.format(climatologyOpDirectory) make_directories(symlinkDirectory) for inFileName, year, month in zip(fileNames, years, months): outFileName = '{}/{}.hist.am.timeSeriesStatsMonthly.{:04d}-' \ '{:02d}-01.nc'.format(symlinkDirectory, self.ncclimoModel, year, month) try: os.symlink(inFileName, outFileName) except OSError: pass return symlinkDirectory
def _compute_climatologies_with_xarray(self, inDirectory, outDirectory): # {{{ ''' Uses xarray to compute seasonal and/or annual climatologies. Parameters ---------- inDirectory : str The run directory containing timeSeriesStatsMonthly output outDirectory : str The output directory where climatologies will be written ''' # Authors # ------- # Xylar Asay-Davis def _preprocess(ds): # drop unused variables during preprocessing because only the # variables we want are guaranteed to be in all the files return ds[variableList] season = self.season parentTask = self.parentTask variableList = parentTask.variableList[season] chunkSize = self.config.getint('input', 'maxChunkSize') if season in constants.abrevMonthNames: # this is an individual month, so create a climatology from # timeSeriesStatsMonthlyOutput fileNames = sorted(parentTask.inputFiles) years, months = get_files_year_month( fileNames, self.historyStreams, 'timeSeriesStatsMonthlyOutput') with xarray.open_mfdataset(parentTask.inputFiles, combine='nested', concat_dim='Time', chunks={'nCells': chunkSize}, decode_cf=False, decode_times=False, preprocess=_preprocess) as ds: ds.coords['year'] = ('Time', years) ds.coords['month'] = ('Time', months) month = constants.abrevMonthNames.index(season) + 1 climatologyFileName = parentTask.get_file_name(season) self.logger.info('computing climatology {}'.format( os.path.basename(climatologyFileName))) ds = ds.where(ds.month == month, drop=True) ds = ds.mean(dim='Time') ds.compute(num_workers=self.subprocessCount) write_netcdf(ds, climatologyFileName) else: outFileName = parentTask.get_file_name(season=season) self.logger.info('computing climatology {}'.format( os.path.basename(outFileName))) fileNames = [] weights = [] for month in constants.monthDictionary[season]: monthName = constants.abrevMonthNames[month - 1] fileNames.append(parentTask.get_file_name(season=monthName)) weights.append(constants.daysInMonth[month - 1]) with xarray.open_mfdataset(fileNames, concat_dim='weight', combine='nested', chunks={'nCells': chunkSize}, decode_cf=False, decode_times=False, preprocess=_preprocess) as ds: ds.coords['weight'] = ('weight', weights) ds = ((ds.weight * ds).sum(dim='weight') / ds.weight.sum(dim='weight')) ds.compute(num_workers=self.subprocessCount) write_netcdf(ds, outFileName)
def run_task(self): # {{{ """ Compute the regional-mean time series """ # Authors # ------- # Xylar Asay-Davis config = self.config self.logger.info("\nCompute time series of regional means...") startDate = '{:04d}-01-01_00:00:00'.format(self.startYear) endDate = '{:04d}-12-31_23:59:59'.format(self.endYear) regionGroup = self.regionGroup sectionSuffix = regionGroup[0].upper() + \ regionGroup[1:].replace(' ', '') timeSeriesName = sectionSuffix[0].lower() + sectionSuffix[1:] sectionName = 'timeSeries{}'.format(sectionSuffix) outputDirectory = '{}/{}/'.format( build_config_full_path(config, 'output', 'timeseriesSubdirectory'), timeSeriesName) try: os.makedirs(outputDirectory) except OSError: pass outFileName = '{}/{}_{:04d}-{:04d}.nc'.format(outputDirectory, timeSeriesName, self.startYear, self.endYear) inputFiles = sorted( self.historyStreams.readpath('timeSeriesStatsMonthlyOutput', startDate=startDate, endDate=endDate, calendar=self.calendar)) years, months = get_files_year_month(inputFiles, self.historyStreams, 'timeSeriesStatsMonthlyOutput') variables = config.getExpression(sectionName, 'variables') variableList = [var['mpas'] for var in variables] + \ ['timeMonthly_avg_layerThickness'] outputExists = os.path.exists(outFileName) outputValid = outputExists if outputExists: with open_mpas_dataset(fileName=outFileName, calendar=self.calendar, timeVariableNames=None, variableList=None, startDate=startDate, endDate=endDate) as dsOut: for inIndex in range(dsOut.dims['Time']): mask = numpy.logical_and( dsOut.year[inIndex].values == years, dsOut.month[inIndex].values == months) if numpy.count_nonzero(mask) == 0: outputValid = False break if outputValid: self.logger.info(' Time series exists -- Done.') return regionMaskFileName = '{}/depthMasks{}.nc'.format( outputDirectory, timeSeriesName) dsRegionMask = xarray.open_dataset(regionMaskFileName) nRegions = dsRegionMask.sizes['nRegions'] areaCell = dsRegionMask.areaCell datasets = [] nTime = len(inputFiles) for tIndex in range(nTime): self.logger.info(' {}/{}'.format(tIndex + 1, nTime)) dsIn = open_mpas_dataset(fileName=inputFiles[tIndex], calendar=self.calendar, variableList=variableList, startDate=startDate, endDate=endDate).isel(Time=0) layerThickness = dsIn.timeMonthly_avg_layerThickness innerDatasets = [] for regionIndex in range(nRegions): self.logger.info(' region: {}'.format( self.regionNames[regionIndex])) dsRegion = dsRegionMask.isel(nRegions=regionIndex) cellMask = dsRegion.cellMask totalArea = dsRegion.totalArea depthMask = dsRegion.depthMask.where(cellMask, drop=True) localArea = areaCell.where(cellMask, drop=True) localThickness = layerThickness.where(cellMask, drop=True) volCell = (localArea * localThickness).where(depthMask) volCell = volCell.transpose('nCells', 'nVertLevels') totalVol = volCell.sum(dim='nVertLevels').sum(dim='nCells') self.logger.info(' totalVol (mil. km^3): {}'.format( 1e-15 * totalVol.values)) dsOut = xarray.Dataset() dsOut['totalVol'] = totalVol dsOut.totalVol.attrs['units'] = 'm^3' for var in variables: outName = var['name'] self.logger.info(' {}'.format(outName)) mpasVarName = var['mpas'] timeSeries = dsIn[mpasVarName].where(cellMask, drop=True) units = timeSeries.units description = timeSeries.long_name is3d = 'nVertLevels' in timeSeries.dims if is3d: timeSeries = \ (volCell*timeSeries.where(depthMask)).sum( dim='nVertLevels').sum(dim='nCells') / totalVol else: timeSeries = \ (localArea*timeSeries).sum( dim='nCells') / totalArea dsOut[outName] = timeSeries dsOut[outName].attrs['units'] = units dsOut[outName].attrs['description'] = description dsOut[outName].attrs['is3d'] = str(is3d) innerDatasets.append(dsOut) datasets.append(innerDatasets) # combine data sets into a single data set dsOut = xarray.combine_nested(datasets, ['Time', 'nRegions']) dsOut['totalArea'] = dsRegionMask.totalArea dsOut.totalArea.attrs['units'] = 'm^2' dsOut['zbounds'] = dsRegionMask.zbounds dsOut.zbounds.attrs['units'] = 'm' dsOut.coords['regionNames'] = dsRegionMask.regionNames dsOut.coords['year'] = (('Time'), years) dsOut['year'].attrs['units'] = 'years' dsOut.coords['month'] = (('Time'), months) dsOut['month'].attrs['units'] = 'months' write_netcdf(dsOut, outFileName) # }}}
def _compute_time_series_with_ncrcat(self): # {{{ ''' Uses ncrcat to extact time series from timeSeriesMonthlyOutput files Raises ------ OSError If ``ncrcat`` is not in the system path. Author ------ Xylar Asay-Davis ''' if find_executable('ncrcat') is None: raise OSError('ncrcat not found. Make sure the latest nco ' 'package is installed: \n' 'conda install nco\n' 'Note: this presumes use of the conda-forge ' 'channel.') inputFiles = self.inputFiles append = False if os.path.exists(self.outputFile): # make sure all the necessary variables are also present with xr.open_dataset(self.outputFile) as ds: if ds.sizes['Time'] == 0: updateSubset = False else: updateSubset = True for variableName in self.variableList: if variableName not in ds.variables: updateSubset = False break if updateSubset: # add only input files wiht times that aren't already in # the output file append = True fileNames = sorted(self.inputFiles) inYears, inMonths = get_files_year_month( fileNames, self.historyStreams, 'timeSeriesStatsMonthlyOutput') inYears = numpy.array(inYears) inMonths = numpy.array(inMonths) totalMonths = 12 * inYears + inMonths dates = decode_strings(ds.xtime_startMonthly) lastDate = dates[-1] lastYear = int(lastDate[0:4]) lastMonth = int(lastDate[5:7]) lastTotalMonths = 12 * lastYear + lastMonth inputFiles = [] for index, inputFile in enumerate(fileNames): if totalMonths[index] > lastTotalMonths: inputFiles.append(inputFile) if len(inputFiles) == 0: # nothing to do return else: # there is an output file but it has the wrong variables # so we need ot delete it. self.logger.warning('Warning: deleting file {} because ' 'it is empty or some variables were ' 'missing'.format(self.outputFile)) os.remove(self.outputFile) variableList = self.variableList + ['xtime_startMonthly', 'xtime_endMonthly'] args = ['ncrcat', '-4', '--no_tmp_fl', '-v', ','.join(variableList)] if append: args.append('--record_append') printCommand = '{} {} ... {} {}'.format(' '.join(args), inputFiles[0], inputFiles[-1], self.outputFile) args.extend(inputFiles) args.append(self.outputFile) self.logger.info('running: {}'.format(printCommand)) for handler in self.logger.handlers: handler.flush() process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = process.communicate() if stdout: stdout = stdout.decode('utf-8') for line in stdout.split('\n'): self.logger.info(line) if stderr: stderr = stderr.decode('utf-8') for line in stderr.split('\n'): self.logger.error(line) if process.returncode != 0: raise subprocess.CalledProcessError(process.returncode, ' '.join(args))
def run_task(self): # {{{ """ Compute time series of regional profiles """ # Authors # ------- # Milena Veneziani, Mark Petersen, Phillip J. Wolfram, Xylar Asay-Davis self.logger.info("\nCompute time series of regional profiles...") startDate = '{:04d}-01-01_00:00:00'.format(self.startYear) endDate = '{:04d}-12-31_23:59:59'.format(self.endYear) timeSeriesName = self.masksSubtask.regionGroup.replace(' ', '') outputDirectory = '{}/{}/'.format( build_config_full_path(self.config, 'output', 'timeseriesSubdirectory'), timeSeriesName) try: os.makedirs(outputDirectory) except OSError: pass outputFileName = '{}/regionalProfiles_{}_{:04d}-{:04d}.nc'.format( outputDirectory, timeSeriesName, self.startYear, self.endYear) inputFiles = sorted(self.historyStreams.readpath( 'timeSeriesStatsMonthlyOutput', startDate=startDate, endDate=endDate, calendar=self.calendar)) years, months = get_files_year_month(inputFiles, self.historyStreams, 'timeSeriesStatsMonthlyOutput') variableList = [field['mpas'] for field in self.fields] outputExists = os.path.exists(outputFileName) outputValid = outputExists if outputExists: with open_mpas_dataset(fileName=outputFileName, calendar=self.calendar, timeVariableNames=None, variableList=None, startDate=startDate, endDate=endDate) as dsIn: for inIndex in range(dsIn.dims['Time']): mask = np.logical_and( dsIn.year[inIndex].values == years, dsIn.month[inIndex].values == months) if np.count_nonzero(mask) == 0: outputValid = False break if outputValid: self.logger.info(' Time series exists -- Done.') return # get areaCell restartFileName = \ self.runStreams.readpath('restart')[0] dsRestart = xr.open_dataset(restartFileName) dsRestart = dsRestart.isel(Time=0) areaCell = dsRestart.areaCell nVertLevels = dsRestart.sizes['nVertLevels'] vertIndex = \ xr.DataArray.from_dict({'dims': ('nVertLevels',), 'data': np.arange(nVertLevels)}) vertMask = vertIndex < dsRestart.maxLevelCell # get region masks regionMaskFileName = self.masksSubtask.maskFileName dsRegionMask = xr.open_dataset(regionMaskFileName) # figure out the indices of the regions to plot regionNames = decode_strings(dsRegionMask.regionNames) regionIndices = [] for regionToPlot in self.regionNames: for index, regionName in enumerate(regionNames): if regionToPlot == regionName: regionIndices.append(index) break # select only those regions we want to plot dsRegionMask = dsRegionMask.isel(nRegions=regionIndices) cellMasks = dsRegionMask.regionCellMasks regionNamesVar = dsRegionMask.regionNames totalArea = (cellMasks * areaCell * vertMask).sum('nCells') datasets = [] for timeIndex, fileName in enumerate(inputFiles): dsLocal = open_mpas_dataset( fileName=fileName, calendar=self.calendar, variableList=variableList, startDate=startDate, endDate=endDate) dsLocal = dsLocal.isel(Time=0) time = dsLocal.Time.values date = days_to_datetime(time, calendar=self.calendar) self.logger.info(' date: {:04d}-{:02d}'.format(date.year, date.month)) # for each region and variable, compute area-weighted sum and # squared sum for field in self.fields: variableName = field['mpas'] prefix = field['prefix'] self.logger.info(' {}'.format(field['titleName'])) var = dsLocal[variableName].where(vertMask) meanName = '{}_mean'.format(prefix) dsLocal[meanName] = \ (cellMasks * areaCell * var).sum('nCells') / totalArea meanSquaredName = '{}_meanSquared'.format(prefix) dsLocal[meanSquaredName] = \ (cellMasks * areaCell * var**2).sum('nCells') / totalArea # drop the original variables dsLocal = dsLocal.drop_vars(variableList) datasets.append(dsLocal) # combine data sets into a single data set dsOut = xr.concat(datasets, 'Time') dsOut.coords['regionNames'] = regionNamesVar dsOut['totalArea'] = totalArea dsOut.coords['year'] = (('Time',), years) dsOut['year'].attrs['units'] = 'years' dsOut.coords['month'] = (('Time',), months) dsOut['month'].attrs['units'] = 'months' # Note: restart file, not a mesh file because we need refBottomDepth, # not in a mesh file try: restartFile = self.runStreams.readpath('restart')[0] except ValueError: raise IOError('No MPAS-O restart file found: need at least one ' 'restart file for plotting time series vs. depth') with xr.open_dataset(restartFile) as dsRestart: depths = dsRestart.refBottomDepth.values z = np.zeros(depths.shape) z[0] = -0.5 * depths[0] z[1:] = -0.5 * (depths[0:-1] + depths[1:]) dsOut.coords['z'] = (('nVertLevels',), z) dsOut['z'].attrs['units'] = 'meters' write_netcdf(dsOut, outputFileName)
def _compute_moc_time_series_postprocess(self): # {{{ '''compute MOC time series as a post-process''' # Compute and plot time series of Atlantic MOC at 26.5N (RAPID array) self.logger.info('\n Compute and/or plot post-processed Atlantic MOC ' 'time series...') self.logger.info(' Load data...') outputDirectory = build_config_full_path(self.config, 'output', 'timeseriesSubdirectory') try: os.makedirs(outputDirectory) except OSError: pass outputFileTseries = '{}/mocTimeSeries.nc'.format(outputDirectory) dvEdge, areaCell, refBottomDepth, latCell, nVertLevels, \ refTopDepth, refLayerThickness = self._load_mesh() latAtlantic = self.lat['Atlantic'] dLat = latAtlantic - 26.5 indlat26 = np.where(np.abs(dLat) == np.amin(np.abs(dLat))) dictRegion = self.dictRegion['Atlantic'] maxEdgesInTransect = dictRegion['maxEdgesInTransect'] transectEdgeGlobalIDs = dictRegion['transectEdgeGlobalIDs'] transectEdgeMaskSigns = dictRegion['transectEdgeMaskSigns'] regionCellMask = dictRegion['cellMask'] streamName = 'timeSeriesStatsMonthlyOutput' inputFilesTseries = sorted( self.historyStreams.readpath(streamName, startDate=self.startDateTseries, endDate=self.endDateTseries, calendar=self.calendar)) years, months = get_files_year_month(inputFilesTseries, self.historyStreams, 'timeSeriesStatsMonthlyOutput') mocRegion = np.zeros(len(inputFilesTseries)) times = np.zeros(len(inputFilesTseries)) computed = np.zeros(len(inputFilesTseries), bool) continueOutput = os.path.exists(outputFileTseries) if continueOutput: self.logger.info(' Read in previously computed MOC time series') with open_mpas_dataset(fileName=outputFileTseries, calendar=self.calendar, timeVariableNames=None, variableList=['mocAtlantic26'], startDate=self.startDateTseries, endDate=self.endDateTseries) as dsMOCIn: dsMOCIn.load() # first, copy all computed data for inIndex in range(dsMOCIn.dims['Time']): mask = np.logical_and( dsMOCIn.year[inIndex].values == years, dsMOCIn.month[inIndex].values == months) outIndex = np.where(mask)[0][0] mocRegion[outIndex] = dsMOCIn.mocAtlantic26[inIndex] times[outIndex] = dsMOCIn.Time[inIndex] computed[outIndex] = True if np.all(computed): # no need to waste time writing out the data set again return dsMOCIn for timeIndex, fileName in enumerate(inputFilesTseries): if computed[timeIndex]: continue dsLocal = open_mpas_dataset(fileName=fileName, calendar=self.calendar, variableList=self.variableList, startDate=self.startDateTseries, endDate=self.endDateTseries) dsLocal = dsLocal.isel(Time=0) time = dsLocal.Time.values times[timeIndex] = time date = days_to_datetime(time, calendar=self.calendar) self.logger.info(' date: {:04d}-{:02d}'.format( date.year, date.month)) if self.includeBolus: dsLocal['avgNormalVelocity'] = \ dsLocal['timeMonthly_avg_normalVelocity'] + \ dsLocal['timeMonthly_avg_normalGMBolusVelocity'] dsLocal['avgVertVelocityTop'] = \ dsLocal['timeMonthly_avg_vertVelocityTop'] + \ dsLocal['timeMonthly_avg_vertGMBolusVelocityTop'] else: # rename some variables for convenience dsLocal = dsLocal.rename({ 'timeMonthly_avg_normalVelocity': 'avgNormalVelocity', 'timeMonthly_avg_vertVelocityTop': 'avgVertVelocityTop' }) horizontalVel = dsLocal.avgNormalVelocity.values verticalVel = dsLocal.avgVertVelocityTop.values velArea = verticalVel * areaCell[:, np.newaxis] transportZ = self._compute_transport(maxEdgesInTransect, transectEdgeGlobalIDs, transectEdgeMaskSigns, nVertLevels, dvEdge, refLayerThickness, horizontalVel) mocTop = self._compute_moc(latAtlantic, nVertLevels, latCell, regionCellMask, transportZ, velArea) mocRegion[timeIndex] = np.amax(mocTop[:, indlat26]) description = 'Max MOC Atlantic streamfunction nearest to RAPID ' \ 'Array latitude (26.5N)' dictonary = { 'dims': ['Time'], 'coords': { 'Time': { 'dims': ('Time'), 'data': times, 'attrs': { 'units': 'days since 0001-01-01' } }, 'year': { 'dims': ('Time'), 'data': years, 'attrs': { 'units': 'year' } }, 'month': { 'dims': ('Time'), 'data': months, 'attrs': { 'units': 'month' } } }, 'data_vars': { 'mocAtlantic26': { 'dims': ('Time'), 'data': mocRegion, 'attrs': { 'units': 'Sv (10^6 m^3/s)', 'description': description } } } } dsMOCTimeSeries = xr.Dataset.from_dict(dictonary) write_netcdf(dsMOCTimeSeries, outputFileTseries) return dsMOCTimeSeries # }}}
def _compute_moc_time_series_analysismember(self): # {{{ '''compute MOC time series from analysis member''' # Compute and plot time series of Atlantic MOC at 26.5N (RAPID array) self.logger.info( '\n Compute Atlantic MOC time series from analysis member...') self.logger.info(' Load data...') outputDirectory = build_config_full_path(self.config, 'output', 'timeseriesSubdirectory') try: os.makedirs(outputDirectory) except OSError: pass outputFileTseries = '{}/mocTimeSeries.nc'.format(outputDirectory) streamName = 'timeSeriesStatsMonthlyOutput' # Get bin latitudes and index of 26.5N binBoundaryMocStreamfunction = None # first try timeSeriesStatsMonthly for bin boundaries, then try # mocStreamfunctionOutput stream as a backup option for streamName in [ 'timeSeriesStatsMonthlyOutput', 'mocStreamfunctionOutput' ]: try: inputFile = self.historyStreams.readpath(streamName)[0] except ValueError: raise IOError('At least one file from stream {} is needed ' 'to compute MOC'.format(streamName)) with xr.open_dataset(inputFile) as ds: if 'binBoundaryMocStreamfunction' in ds.data_vars: binBoundaryMocStreamfunction = \ ds.binBoundaryMocStreamfunction.values break if binBoundaryMocStreamfunction is None: raise ValueError('Could not find binBoundaryMocStreamfunction in ' 'either timeSeriesStatsMonthlyOutput or ' 'mocStreamfunctionOutput streams') binBoundaryMocStreamfunction = np.rad2deg(binBoundaryMocStreamfunction) dLat = binBoundaryMocStreamfunction - 26.5 indlat26 = np.where(np.abs(dLat) == np.amin(np.abs(dLat))) inputFilesTseries = sorted( self.historyStreams.readpath(streamName, startDate=self.startDateTseries, endDate=self.endDateTseries, calendar=self.calendar)) years, months = get_files_year_month(inputFilesTseries, self.historyStreams, 'timeSeriesStatsMonthlyOutput') mocRegion = np.zeros(len(inputFilesTseries)) times = np.zeros(len(inputFilesTseries)) computed = np.zeros(len(inputFilesTseries), bool) continueOutput = os.path.exists(outputFileTseries) if continueOutput: self.logger.info(' Read in previously computed MOC time series') with open_mpas_dataset(fileName=outputFileTseries, calendar=self.calendar, timeVariableNames=None, variableList=['mocAtlantic26'], startDate=self.startDateTseries, endDate=self.endDateTseries) as dsMOCIn: dsMOCIn.load() # first, copy all computed data for inIndex in range(dsMOCIn.dims['Time']): mask = np.logical_and( dsMOCIn.year[inIndex].values == years, dsMOCIn.month[inIndex].values == months) outIndex = np.where(mask)[0][0] mocRegion[outIndex] = dsMOCIn.mocAtlantic26[inIndex] times[outIndex] = dsMOCIn.Time[inIndex] computed[outIndex] = True if np.all(computed): # no need to waste time writing out the data set again return dsMOCIn for timeIndex, fileName in enumerate(inputFilesTseries): if computed[timeIndex]: continue dsLocal = open_mpas_dataset(fileName=fileName, calendar=self.calendar, variableList=self.variableList, startDate=self.startDateTseries, endDate=self.endDateTseries) dsLocal = dsLocal.isel(Time=0) time = dsLocal.Time.values times[timeIndex] = time date = days_to_datetime(time, calendar=self.calendar) self.logger.info(' date: {:04d}-{:02d}'.format( date.year, date.month)) # hard-wire region=0 (Atlantic) for now indRegion = 0 mocTop = dsLocal.timeMonthly_avg_mocStreamvalLatAndDepthRegion[ indRegion, :, :].values mocRegion[timeIndex] = np.amax(mocTop[:, indlat26]) description = 'Max MOC Atlantic streamfunction nearest to RAPID ' \ 'Array latitude (26.5N)' dictonary = { 'dims': ['Time'], 'coords': { 'Time': { 'dims': ('Time'), 'data': times, 'attrs': { 'units': 'days since 0001-01-01' } }, 'year': { 'dims': ('Time'), 'data': years, 'attrs': { 'units': 'year' } }, 'month': { 'dims': ('Time'), 'data': months, 'attrs': { 'units': 'month' } } }, 'data_vars': { 'mocAtlantic26': { 'dims': ('Time'), 'data': mocRegion, 'attrs': { 'units': 'Sv (10^6 m^3/s)', 'description': description } } } } dsMOCTimeSeries = xr.Dataset.from_dict(dictonary) write_netcdf(dsMOCTimeSeries, outputFileTseries) return dsMOCTimeSeries # }}}
def run_task(self): # {{{ """ Computes time-series of transport through transects. """ # Authors # ------- # Xylar Asay-Davis, Stephen Price self.logger.info("Computing time series of transport through " "transects...") config = self.config startDate = '{:04d}-01-01_00:00:00'.format(self.startYear) endDate = '{:04d}-12-31_23:59:59'.format(self.endYear) outputDirectory = '{}/transport/'.format( build_config_full_path(config, 'output', 'timeseriesSubdirectory')) try: os.makedirs(outputDirectory) except OSError: pass outFileName = '{}/transport_{:04d}-{:04d}.nc'.format( outputDirectory, self.startYear, self.endYear) inputFiles = sorted( self.historyStreams.readpath('timeSeriesStatsMonthlyOutput', startDate=startDate, endDate=endDate, calendar=self.calendar)) years, months = get_files_year_month(inputFiles, self.historyStreams, 'timeSeriesStatsMonthlyOutput') variableList = ['timeMonthly_avg_layerThickness'] with open_mpas_dataset(fileName=inputFiles[0], calendar=self.calendar, startDate=startDate, endDate=endDate) as dsIn: if 'timeMonthly_avg_normalTransportVelocity' in dsIn: variableList.append('timeMonthly_avg_normalTransportVelocity') elif 'timeMonthly_avg_normalGMBolusVelocity' in dsIn: variableList = variableList + \ ['timeMonthly_avg_normalVelocity', 'timeMonthly_avg_normalGMBolusVelocity'] else: self.logger.warning('Cannot compute transport velocity. ' 'Using advection velocity.') variableList.append('timeMonthly_avg_normalVelocity') outputExists = os.path.exists(outFileName) outputValid = outputExists if outputExists: with open_mpas_dataset(fileName=outFileName, calendar=self.calendar, timeVariableNames=None, variableList=None, startDate=startDate, endDate=endDate) as dsOut: for inIndex in range(dsOut.dims['Time']): mask = numpy.logical_and( dsOut.year[inIndex].values == years, dsOut.month[inIndex].values == months) if numpy.count_nonzero(mask) == 0: outputValid = False break if outputValid: self.logger.info(' Time series exists -- Done.') return transectMaskFileName = self.masksSubtask.maskFileName dsTransectMask = xarray.open_dataset(transectMaskFileName) # figure out the indices of the transects to plot maskTransectNames = decode_strings(dsTransectMask.transectNames) dsMesh = xarray.open_dataset(self.restartFileName) dvEdge = dsMesh.dvEdge cellsOnEdge = dsMesh.cellsOnEdge - 1 timeDatasets = [] self.logger.info(' Computing transport...') for fileName in inputFiles: self.logger.info(' input file: {}'.format(fileName)) dsTimeSlice = open_mpas_dataset(fileName=fileName, calendar=self.calendar, variableList=variableList, startDate=startDate, endDate=endDate) transectDatasets = [] transectIndices = [] for transect in self.transectsToPlot: self.logger.info(' transect: {}'.format(transect)) try: transectIndex = maskTransectNames.index(transect) except ValueError: self.logger.warning(' Not found in masks. ' 'Skipping.') continue transectIndices.append(transectIndex) # select the current transect dsMask = dsTransectMask.isel(nTransects=[transectIndex]) edgeIndices = dsMask.transectEdgeGlobalIDs - 1 edgeIndices = edgeIndices.where(edgeIndices >= 0, drop=True).astype(int) edgeSign = dsMask.transectEdgeMaskSigns.isel( nEdges=edgeIndices) dsIn = dsTimeSlice.isel(nEdges=edgeIndices) dv = dvEdge.isel(nEdges=edgeIndices) coe = cellsOnEdge.isel(nEdges=edgeIndices) # work on data from simulations if 'timeMonthly_avg_normalTransportVelocity' in dsIn: vel = dsIn.timeMonthly_avg_normalTransportVelocity elif 'timeMonthly_avg_normalGMBolusVelocity' in dsIn: vel = (dsIn.timeMonthly_avg_normalVelocity + dsIn.timeMonthly_avg_normalGMBolusVelocity) else: vel = dsIn.timeMonthly_avg_normalVelocity # get layer thickness on edges by averaging adjacent cells h = 0.5 * dsIn.timeMonthly_avg_layerThickness.isel( nCells=coe).sum(dim='TWO') edgeTransport = edgeSign * vel * h * dv # convert from m^3/s to Sv transport = (constants.m3ps_to_Sv * edgeTransport.sum( dim=['maxEdgesInTransect', 'nVertLevels'])) dsOut = xarray.Dataset() dsOut['transport'] = transport dsOut.transport.attrs['units'] = 'Sv' dsOut.transport.attrs['description'] = \ 'Transport through transects' transectDatasets.append(dsOut) dsOut = xarray.concat(transectDatasets, 'nTransects') timeDatasets.append(dsOut) # combine data sets into a single data set dsOut = xarray.concat(timeDatasets, 'Time') dsOut.coords['transectNames'] = dsTransectMask.transectNames.isel( nTransects=transectIndices) dsOut.coords['year'] = (('Time'), years) dsOut['year'].attrs['units'] = 'years' dsOut.coords['month'] = (('Time'), months) dsOut['month'].attrs['units'] = 'months' write_netcdf(dsOut, outFileName)
def update_time_bounds_from_file_names(config, section, componentName): # {{{ """ Update the start and end years and dates for time series, climatologies or climate indices based on the years actually available in the list of files. """ # Authors # ------- # Xylar Asay-Davis # read parameters from config file # the run directory contains the restart files runDirectory = build_config_full_path(config, 'input', 'runSubdirectory') # if the history directory exists, use it; if not, fall back on # runDirectory historyDirectory = build_config_full_path( config, 'input', '{}HistorySubdirectory'.format(componentName), defaultPath=runDirectory) errorOnMissing = config.getboolean('input', 'errorOnMissing') namelistFileName = build_config_full_path( config, 'input', '{}NamelistFileName'.format(componentName)) try: namelist = NameList(namelistFileName) except (OSError, IOError): # this component likely doesn't have output in this run return streamsFileName = build_config_full_path( config, 'input', '{}StreamsFileName'.format(componentName)) try: historyStreams = StreamsFile(streamsFileName, streamsdir=historyDirectory) except (OSError, IOError): # this component likely doesn't have output in this run return calendar = namelist.get('config_calendar_type') requestedStartYear = config.getint(section, 'startYear') requestedEndYear = config.get(section, 'endYear') if requestedEndYear == 'end': requestedEndYear = None else: # get it again as an integer requestedEndYear = config.getint(section, 'endYear') startDate = '{:04d}-01-01_00:00:00'.format(requestedStartYear) if requestedEndYear is None: endDate = None else: endDate = '{:04d}-12-31_23:59:59'.format(requestedEndYear) streamName = 'timeSeriesStatsMonthlyOutput' try: inputFiles = historyStreams.readpath(streamName, startDate=startDate, endDate=endDate, calendar=calendar) except ValueError: # this component likely doesn't have output in this run return if len(inputFiles) == 0: raise ValueError('No input files found for stream {} in {} between ' '{} and {}'.format(streamName, componentName, requestedStartYear, requestedEndYear)) years, months = get_files_year_month(sorted(inputFiles), historyStreams, streamName) # search for the start of the first full year firstIndex = 0 while (firstIndex < len(years) and months[firstIndex] != 1): firstIndex += 1 startYear = years[firstIndex] # search for the end of the last full year lastIndex = len(years) - 1 while (lastIndex >= 0 and months[lastIndex] != 12): lastIndex -= 1 endYear = years[lastIndex] if requestedEndYear is None: config.set(section, 'endYear', str(endYear)) requestedEndYear = endYear if startYear != requestedStartYear or endYear != requestedEndYear: if errorOnMissing: raise ValueError( "{} start and/or end year different from requested\n" "requested: {:04d}-{:04d}\n" "actual: {:04d}-{:04d}\n".format(section, requestedStartYear, requestedEndYear, startYear, endYear)) else: print("Warning: {} start and/or end year different from " "requested\n" "requested: {:04d}-{:04d}\n" "actual: {:04d}-{:04d}\n".format(section, requestedStartYear, requestedEndYear, startYear, endYear)) config.set(section, 'startYear', str(startYear)) config.set(section, 'endYear', str(endYear)) startDate = '{:04d}-01-01_00:00:00'.format(startYear) config.set(section, 'startDate', startDate) endDate = '{:04d}-12-31_23:59:59'.format(endYear) config.set(section, 'endDate', endDate)
def run_task(self): # {{{ ''' Compute the regional-mean time series ''' # Authors # ------- # Xylar Asay-Davis config = self.config self.logger.info("\nCompute time series of regional means...") startDate = '{:04d}-01-01_00:00:00'.format(self.startYear) endDate = '{:04d}-12-31_23:59:59'.format(self.endYear) regionGroup = self.regionGroup sectionSuffix = regionGroup[0].upper() + \ regionGroup[1:].replace(' ', '') timeSeriesName = sectionSuffix[0].lower() + sectionSuffix[1:] sectionName = 'timeSeries{}'.format(sectionSuffix) outputDirectory = '{}/{}/'.format( build_config_full_path(config, 'output', 'timeseriesSubdirectory'), timeSeriesName) try: os.makedirs(outputDirectory) except OSError: pass outFileName = '{}/{}_{:04d}-{:04d}.nc'.format(outputDirectory, timeSeriesName, self.startYear, self.endYear) inputFiles = sorted( self.historyStreams.readpath('timeSeriesStatsMonthlyOutput', startDate=startDate, endDate=endDate, calendar=self.calendar)) years, months = get_files_year_month(inputFiles, self.historyStreams, 'timeSeriesStatsMonthlyOutput') variables = config.getExpression(sectionName, 'variables') variableList = [var['mpas'] for var in variables] + \ ['timeMonthly_avg_layerThickness'] outputExists = os.path.exists(outFileName) outputValid = outputExists if outputExists: with open_mpas_dataset(fileName=outFileName, calendar=self.calendar, timeVariableNames=None, variableList=None, startDate=startDate, endDate=endDate) as dsOut: for inIndex in range(dsOut.dims['Time']): mask = numpy.logical_and( dsOut.year[inIndex].values == years, dsOut.month[inIndex].values == months) if numpy.count_nonzero(mask) == 0: outputValid = False break if outputValid: self.logger.info(' Time series exists -- Done.') return # Load mesh related variables try: restartFileName = self.runStreams.readpath('restart')[0] except ValueError: raise IOError('No MPAS-O restart file found: need at least one ' 'restart file for ocean region time series') cellsChunk = 32768 timeChunk = 1 datasets = [] for timeIndex, fileName in enumerate(inputFiles): dsTimeSlice = open_mpas_dataset(fileName=fileName, calendar=self.calendar, variableList=variableList, startDate=startDate, endDate=endDate) datasets.append(dsTimeSlice) chunk = {'Time': timeChunk, 'nCells': cellsChunk} if config.has_option(sectionName, 'zmin'): config_zmin = config.getfloat(sectionName, 'zmin') else: config_zmin = None if config.has_option(sectionName, 'zmax'): config_zmax = config.getfloat(sectionName, 'zmax') else: config_zmax = None with dask.config.set(schedular='threads', pool=ThreadPool(self.daskThreads)): # combine data sets into a single data set dsIn = xarray.concat(datasets, 'Time').chunk(chunk) chunk = {'nCells': cellsChunk} dsRestart = xarray.open_dataset(restartFileName) dsRestart = dsRestart.isel(Time=0).chunk(chunk) dsIn['areaCell'] = dsRestart.areaCell if 'landIceMask' in dsRestart: # only the region outside of ice-shelf cavities dsIn['openOceanMask'] = dsRestart.landIceMask == 0 dsIn['zMid'] = compute_zmid(dsRestart.bottomDepth, dsRestart.maxLevelCell, dsRestart.layerThickness) regionMaskFileName = self.masksSubtask.maskFileName dsRegionMask = xarray.open_dataset(regionMaskFileName) maskRegionNames = decode_strings(dsRegionMask.regionNames) datasets = [] regionIndices = [] for regionName in self.regionNames: self.logger.info(' region: {}'.format(regionName)) regionIndex = maskRegionNames.index(regionName) regionIndices.append(regionIndex) chunk = {'nCells': cellsChunk} dsMask = dsRegionMask.isel(nRegions=regionIndex).chunk(chunk) cellMask = dsMask.regionCellMasks == 1 if 'openOceanMask' in dsIn: cellMask = numpy.logical_and(cellMask, dsIn.openOceanMask) dsRegion = dsIn.where(cellMask, drop=True) totalArea = dsRegion['areaCell'].sum() self.logger.info(' totalArea: {} mil. km^2'.format( 1e-12 * totalArea.values)) self.logger.info("Don't worry about the following dask " "warnings.") if config_zmin is None: zmin = dsMask.zmin else: zmin = config_zmin if config_zmax is None: zmax = dsMask.zmax else: zmax = config_zmax depthMask = numpy.logical_and(dsRegion.zMid >= zmin, dsRegion.zMid <= zmax) depthMask.compute() self.logger.info("Dask warnings should be done.") dsRegion['depthMask'] = depthMask layerThickness = dsRegion.timeMonthly_avg_layerThickness dsRegion['volCell'] = (dsRegion.areaCell * layerThickness).where(depthMask) totalVol = dsRegion.volCell.sum(dim='nVertLevels').sum( dim='nCells') totalVol.compute() self.logger.info(' totalVol (mil. km^3): {}'.format( 1e-15 * totalVol.values)) dsRegion = dsRegion.transpose('Time', 'nCells', 'nVertLevels') dsOut = xarray.Dataset() dsOut['totalVol'] = totalVol dsOut.totalVol.attrs['units'] = 'm^3' dsOut['totalArea'] = totalArea dsOut.totalArea.attrs['units'] = 'm^2' dsOut['zbounds'] = ('nbounds', [zmin, zmax]) dsOut.zbounds.attrs['units'] = 'm' for var in variables: outName = var['name'] self.logger.info(' {}'.format(outName)) mpasVarName = var['mpas'] timeSeries = dsRegion[mpasVarName] units = timeSeries.units description = timeSeries.long_name is3d = 'nVertLevels' in timeSeries.dims if is3d: timeSeries = \ (dsRegion.volCell*timeSeries.where(depthMask)).sum( dim='nVertLevels').sum(dim='nCells') / totalVol else: timeSeries = \ (dsRegion.areaCell*timeSeries).sum( dim='nCells') / totalArea timeSeries.compute() dsOut[outName] = timeSeries dsOut[outName].attrs['units'] = units dsOut[outName].attrs['description'] = description dsOut[outName].attrs['is3d'] = str(is3d) datasets.append(dsOut) # combine data sets into a single data set dsOut = xarray.concat(datasets, 'nRegions') dsOut.coords['regionNames'] = dsRegionMask.regionNames.isel( nRegions=regionIndices) dsOut.coords['year'] = (('Time'), years) dsOut['year'].attrs['units'] = 'years' dsOut.coords['month'] = (('Time'), months) dsOut['month'].attrs['units'] = 'months' write_netcdf(dsOut, outFileName)