def create_empty_array(yS, yE, mS, mE, opt_type): '''create blank array for use when something may be/is wrong.''' if yS is None or yE is None: yS.values[:] = 1 yE.values[:] = 50 timeT = ncl.yyyymm_time(yS, yE, int) time = timeT.sel(time=slice(yS * 100 + mS, yE * 100 + mE)) if opt_type == 'time_lat_lon': blank_array_values = np.empty((time.sizes['time'], 90, 180), dtype=np.float32) lat = xr.DataArray(np.linspace(-89, 89, 90), dims=('lat'), attrs={'units': 'degrees_north'}) lon = xr.DataArray(np.linspace(0, 358, 180), dims=('lon'), attrs={'units': 'degrees_east'}) blank_array = xr.DataArray(blank_array_values, dims=('time', 'lat', 'lon'), coords={ 'time': time, 'lat': lat, 'lon': lon }) elif opt_type == 'time_lev_lat': blank_array_values = np.empty((time.sizes['time'], 41, 90), dtype=np.float32) lat = xr.DataArray(np.linspace(-89, 89, 90), dims=('lat'), attrs={'units': 'degrees_north'}) lev = xr.DataArray(np.linspace(0, 500, 41), dims=('lev'), attrs={ 'units': 'm', 'positive': 'down' }) blank_array = xr.DataArray(blank_array_values, dims=('time', 'lev', 'lat'), coords={ 'time': time, 'lev': lev, 'lat': lat }) blank_array.attrs['units'] = '' blank_array.attrs['is_all_missing'] = True return blank_array
import pyncl.nclfuncs as ncl # # tests of yyyymm_time # t = ncl.yyyymm_time(1800, 1900) print(len(t)) print(t) t = ncl.yyyymm_time(1800, 1900, float) print(len(t)) print(t.sel(time=slice(180001, 180012)))
def data_read_in_ice(zpath, vn, yearS, yearE): ''' read in ice data from given files assign time coordinate variables, check for issues with the array, assign _FillValue (if needed) assign dimension names (for ease-of-use), check and modify units ''' arr = None if vn == 'aice_nh': # line 620 of ncl script vname = ("aice_nh", "aice", "sic", "SIC", "CN", "ice", "icec") if vn == 'aice_sh': vname = ("aice_sh", "aice", "sic", "SIC", "CN", "ice", "icec") if zpath is None: print( 'File missing, creating blank array of data. View {vn} namelist for details.' ) arr = create_empty_array(yearS, yearE, 1, 12, "time_lat_lon") sydata = yearS # assign these variables based on yearS/yearE provided in namelist. Doesn't matter eydata = yearE # as data array is totally missing.. smdata = 1 emdata = 12 else: tfiles = glob.glob(zpath) # line 638 of ncl script c = xr.open_mfdataset(tfiles) for v in vname: if v in c: if v.name == 'aice_nh' or v.name == 'aice_sh' or v.name == 'aice': # CCSM/CESM file arr = c[v] if 'coordinates' in arr.attrs: strarr = arr.attrs['coordinates'].split() if 'TLON' in strarr: # CESM longitude 2D coordinate if len(c.TLON.dims) == 3: arr.attrs['lon2d'] = c.TLON[0, :, :] else: arr.attrs['lon2d'] = c.TLON if 'TLAT' in strarr: # CESM latitude 2D coordinate if len(c.TLAT.dims) == 3: arr.attrs['lat2d'] = c.TLAT[0, :, :] else: arr.attrs['lat2d'] = c.TLAT if 'cell_measures' in arr and 'tarea' in c: # if an attribute named cell_measures exists, and tarea is on file(0) if arr.attrs['cell_measures'] == 'area: tarea': arr.attrs['area'] = c.tarea.values.astype( arr.dtype) # in units of m^2 else: if vname == 'CN': # GFDL file line 674 in ncl script arrT = c.variables[vname] arr = arrT.sum(1) arr = xr.where(arr > 1, 1, arr) # optional else: arr = c.variables[vname] if "coordinates" in arr.attrs: strarr = arr.attrs['coordinates'].split() if 'lon' in strarr: #IPCC longitude 2D coordinate arr.attrs['lon2d'] = c.lon if 'lat' in strarr: # IPCC latitude 2D coordinate arr.attrs['lat2d'] = c.lat if 'longitude' in strarr: # NSIDC longitude 2D coordinate arr.attrs['lon2d'] = c.longitude if 'latitude' in strarr: # NSIDC latitude 2D coordinate arr.attrs['lat2d'] = c.latitude # else: # print("2D coordinates for ice data are not detected") dir_name = tfiles[0].split('/') # line 700 ncl script if len(dir_name) >= 8: dir_name_new = "/".join( dir_name[:5] ) + '/fx/areacello/' + dir_name[7] + '/r0i0p0/*.nc' ufile = glob.glob(dir_name_new) else: ufile = '' if len(ufile) > 0: d = xr.open_dataset(ufile) arr.attrs['area'] = d.areacello.values.astype( arr.dtype.name) dimQ = arr.shape if arr.attrs['area'].size != ( dimQ[1] * dimQ[2]): # the dimension sizes of areacello arr.attrs.pop( 'area' ) # do not match sizes of area j,i dimensions if 'AREA' in c: # check to see if there is an AREA array present and if so use it areaT = c.AREA if areaT.attr['units'] == 'km^2': area_unit_km2_to_m2 = True areaT = areaT * 1000000. areaT.attr['units'] = "m^2" areaT.attr['_FillValue'] = 1.e20 arr.attr['area'] = areaT.astype(arr.values.dtype.name) if "pole_hole_area" in areaT.attrs: # format of ystart, yend, value, ystart, yend, value try: "area_unit_km2_to_m2" # ensure this variable exists... extra_area = areaT.attrs[ 'pole_hole_area'].astype("float32") extra_area[2::3] = extra_area[ 2:: 3] * 1000000. # convert pole hole area from km^2->m^2 arr.attrs[ 'pole_hole_area'] = extra_area.astype( arr.values.dtype.name) except NameError: arr.attrs['pole_hole_area'] = areaT.attrs[ 'pole_hole_area'].astype( arr.values.dtype.name) break cpathS = tfiles[0] # line 745 of NCL script cpathE = tfiles[len(tfiles) - 1] ncharS = len(cpathS) ncharE = len(cpathE) sydata = cpathS[ncharS - 17:ncharS - 14] smdata = cpathS[ncharS - 13:ncharS - 12] eydata = cpathE[ncharE - 10:ncharE - 7] emdata = cpathE[ncharE - 6:ncharE - 5] if arr is None: print( f'Variable ({vn}) not found. Examine input file {zpath}. Creating empty array and continuing' ) arr = create_empty_array(yearS, yearE, 1, 12, "time_lat_lon") if 'area' not in arr.attrs: # calculate grid cell areas manually (not implemented) # print("Grid cell areas not found.") pass if arr.dtype == 'int16': # line 765 of NCL script arr = arr.values.astype('float32') if "_FillValue" not in arr.attrs: # assign _FillValue if one is not present if 'missing_value' in arr.attrs: arr.attrs['_FillValue'] = arr.attrs['missing_value'] else: arr.attrs['_FillValue'] = ncl.default_fillvalues[arr.dtype.name] arr.rename({arr.dims[0]: 'time'}) # line 778 of NCL script arr.rename({arr.dims[1]: 'j'}) arr.rename({arr.dims[2]: 'i'}) if 'lat2d' not in arr.attrs: # if latitudes are 1D, make sure latitudes run from south to north + if arr.coords['j'][0] >= 0: # calculate area of 1D lat/lon arrays tarr = arr[:, ::-1, :] arr = tarr if arr.coords['i'].min() >= 0 and arr.coords['i'].max() <= 360: fctr = 111120 # how many meters per degree of latitude (approximate) pi = 4. * math.atan(1.0) rad = (pi / 180.) lat = arr.coords['j'].astype('float32') dimlat = lat.sizes latr = np.array(dimlat, lat.dtype) for gg in range(0, dimlat - 1): # line 797 in NCL script if gg == 0: latr[gg] = abs(-90 - (lat[1] + lat[0]) / 2.) elif gg == (dimlat - 1): latr[gg] = abs(90 - (lat[dimlat - 2] + lat[dimlat - 1]) / 2.) else: latr[gg] = abs((lat[gg - 1] + lat[gg]) / 2. - (lat[gg] + lat[gg + 1]) / 2.) lon = arr.coords['i'].astype('float32') dimlon = lon.shape lonr = np.array(dimlon, 'float32') for gg in range(0, dimlon - 1): if gg == 0: lonr[gg] = abs((lon[1] + lon[0]) / 2. - (((lon[dimlon - 1] + (lon[0] + 360)) / 2.) - 360)) elif gg == (dimlon - 1): lonr[gg] = abs(((lon[dimlon - 1] + (lon[0] + 360)) / 2.) - (lon[gg - 1] + lon[gg]) / 2.) else: lonr[gg] = abs((lon[gg] + lon[gg + 1]) / 2. - (lon[gg - 1] + lon[gg]) / 2.) area = arr[0, :, :].astype('float32') area = area.attrs['_FillValue'] area.attrs['long_name'] = "Area of grid box" area.attrs['units'] = "m2" # printVarSummary(area) for ff in range(0, dimlat - 1): # line 828 in NCL script for gg in range(0, dimlon - 1): ###RLB the following line had (/ ... /) around the expression ?? area[ff, gg] = (fctr * latr[ff]) * (math.cos( rad * lat[ff]) * lonr[gg] * fctr) # cosine weighting # print("Total area = "+sum(area)) arr.attrs['area'] = area.astype(arr.dtype.name) if 'is_all_missing' not in arr.attrs: # erase data in hemisphere not specified via vn if 'lat2d' in arr.attr: tlat2 = ncl.conform(arr, arr.coords['lat2d'], (1, 2)) tlon2 = ncl.conform(arr, arr.coords['lon2d'], (1, 2)) if vn == 'aice_nh': arr = xr.where(tlat2 >= 0, arr, arr.attrs['_FillValue']) if vn == 'aice_sh': arr = xr.where(tlat2 < 0, arr, arr.attrs['_FillValue']) else: if vn == 'aice_nh': arr.loc[:, :-1., :] = arr.attrs[ '_FillValue'] ##RLB not so sure about coordinate indexing here? if vn == 'aice_sh': arr.loc[:, 0:, :] = arr.attrs[ '_FillValue'] ##RLB not so sure about coordinate indexing here? if yearS < int(sydata) or yearE > int(eydata): # line 860 of NCL script... print( f'Requested {yearS}-{yearE} time span is outside the input file {zpath} time span of {sydata}-{eydata}' ) print("Setting array to all missing") arr = create_empty_array(yearS, yearE, smdata, emdata, "time_lat_lon") sydata = yearS # assign these variables based on yearS/yearE provided in namelist. Doesn't matter eydata = yearE # as data array is totally missing.. smdata = 1 emdata = 12 else: timeT = ncl.yyyymm_time(sydata, eydata, "integer") time = timeT.loc[sydata * 100 + smdata, eydata * 100 + emdata] if 'time' in arr.coords: arr.coords.pop('time') dimz = arr.shape if dimz[0] == time.sizes: arr.coords['time'] = time else: print( 'Possible mismatch detected between time specified in file name and file variables, setting array to missing' ) print(f'File = {zpath}') print("Read from file name: " + min(time) + "-" + max(time)) arr = create_empty_array(yearS, yearE, smdata, emdata, "time_lat_lon") sydata = yearS # assign these variables based on yearS/yearE provided in namelist. Doesn't matter eydata = yearE # as data array is totally missing.. smdata = 1 emdata = 12 farr = arr.loc[yearS * 100 + 1:yearE * 100 + 12, :, :] # printVarSummary(farr) mocheck = np.array((yearS * 100 + 1) - min(farr.coords['time']), (yearE * 100 + 12) - max(farr.coords['time'])) # line 897 of NCL script if [True for m in mocheck if m != 0 ]: # previously: if (mod(dimsizes(farr&time),12).ne.0) then if mocheck[0] != 0: print("First requested year is incomplete") if mocheck[1] != 0: print("Last requested year is incomplete") print( f'Incomplete data year(s) requested for file {zpath}, printing out time and creating blank array' ) print(f'Time requested: {yearS}-{yearE}') print( f'From file: Times present from {farr.coords["time"].min()}-{farr.coords["time"].max()})' ) farr = create_empty_array(yearS, yearE, 1, 12, "time_lat_lon") if farr.attrs['units'] == "0-1" or farr.attrs[ 'units'] == "1": # GFDL units, NSIDC units farr.values = farr.values * 100 ###RLB -- was (/ farr*100. /) metadata stripped? farr.attrs['units'] = "%" date = farr.coords['time'] # switch time to be CF-conforming yyyy = date / 100 mm = date - (yyyy * 100) days = (ncl.days_in_month(yyyy, mm)) / 2 hms = days hms = 0 # hours, minutes, seconds all the same (=0) time = ncl.cd_inv_calendar(yyyy, mm, days, hms, hms, hms, "months since " + min(yyyy) + "-01-15 00:00:00", 0) time.attrs['long_name'] = "Time" time.attrs['standard_name'] = "time" time.rename({time.dims[0]: 'time'}) time.coords['time'] = time farr.coords['time'] = time return farr
def data_read_in(zpath, vn, yearS, yearE): ''' read in atmospheric / land data from selected files assign time coordinate variables, check for issues with the array, assign _FillValue ( if needed) assign dimension names(for ease - of - use), check and modify units vname settings at top of this script can be modified if a different variable name is encountered. For instance, if a TS data file has the TS array named as "sfc_t", one could add "sfc_t" to the vname TS coding as follows: if (vn.eq."TS") then vname = (/ "TS", "ts", "sst", "sfc_t" /) end if ''' # path for TS file(s), variable name, start year, and end year are read in. arr = None vname = None if vn == 'TS': vname = ("TS", "ts", "sst", "t_surf", "skt") elif vn == "PSL": vname = ("PSL", "psl", "slp", "SLP", "prmsl", "msl", "slp_dyn") elif vn == "TREFHT": vname = ("TREFHT", "tas", "temp", "air", "temperature_anomaly", "temperature", "t2m", "t_ref", "T2", "tempanomaly") elif vn == "PRECT": vname = ("PRECC", "PRECL", "PRECT", "pr", "PPT", "ppt", "p", "P", "precip", "PRECIP", "tp", "prcp", "prate") elif vn == "SNOWDP": vname = ("SNOWDP", "snd") if zpath is None: print( f'File missing, creating blank array of data. View {vn} namelist for details.' ) arr = create_empty_array(yearS, yearE, 1, 12, "time_lat_lon") sydata = yearS # assign these variables based on yearS/yearE provided in namelist. Doesn't matter eydata = yearE # as data array is totally missing.. smdata = 1 emdata = 12 else: if '*' in zpath or '{' in zpath: # check for "*" and "{" denoting multiple files tfiles = glob.glob(zpath) if vn == 'PRECT': # special section for precip, as might need to do PRECC+PRECL b = xr.open_dataset(tfiles[0]) if 'PRECC' in b or 'PRECL' in b: # PRECC/PRECL section fils_precc = [] fils_precl = [] for f in b: if 'PRECC' in b: fils_precc.append(f) elif 'PRECL' in b: fils_precl.append(b) if len(fils_precc) == 0 or len(fils_precl) == 0: print( 'Fatal: Need both PRECC and PRECL file(s), creating blank array' ) print(fils_precc) print(fils_precl) arr = create_empty_array(yearS, yearE, 1, 12, "time_lat_lon") sydata = yearS # assign these variables based on yearS/yearE provided in namelist. Doesn't matter eydata = yearE # as data array is totally missing.. smdata = 1 emdata = 12 ## ## In the NCL code, there is a break here, however I don't ## think it works as intended. Replacing with a return -- RLB ## return with xr.open_mfdataset(fils_precc) as c: arr1 = c.data_vars['PRECC'] with xr.open_mfdataset('fils_precl') as c: arr2 = c.data_vars['PRECL'] arr = xr.concat(arr1, arr2) arr.attrs[ 'long_name'] = 'Large-scale (stable) + convective precipitation rate (liq + ice)' else: # pr, ppt, PPT, PRECT multiple/single file read-in here.. c = xr.open_mfdataset(tfiles) for v in vname: if v in b: arr = c.data_vars[v] break else: c = xr.open_mfdatatset(tfiles) for v in vname: if v in c: arr = c.data_vars[v] break cpathS = tfiles[0] cpathE = tfiles[len(tfiles) - 1] sydata = cpathS[len(cpathS) - 17:len(cpathS) - 13] smdata = cpathS[len(cpathS) - 13:len(cpathS) - 11] eydata = cpathE[len(cpathE) - 10:len(cpathE) - 6] emdata = cpathE[len(cpathE) - 6:len(cpathS) - 4] else: # single file case... c = xr.open_dataset(zpath) for v in vname: if v in c: arr = c.data_vars[v] break sydata = zpath[len(zpath) - 17:len(zpath) - 13] smdata = zpath[len(zpath) - 13:len(zpath) - 11] eydata = zpath[len(zpath) - 10:len(zpath) - 6] emdata = zpath[len(zpath) - 6:len(zpath) - 4] if arr is None: print( f'Variable {vn} not found. Examine input file {zpath}. Creating empty array and continuing' ) arr = create_empty_array(yearS, yearE, 1, 12, "time_lat_lon") if arr.values.dtype == 'int16': arr.values = arr.values.astype('float32') #### HOW TO DEAL WITH MISSING_VALUES IN XARRAY? --RLB if '_FillValue' not in arr.attrs: # assign _FillValue if one is not present if 'missing_value' in arr.attrs: arr.attrs['_FillValue'] = arr.attrs['missing_value'] else: arr.attrs['_FillValue'] = ncl.default_fillvalues[arr.dtype.name] if [True for dimsize in arr.sizes if dimsize == 1]: arr = arr.squeeze() if len(arr.dims) <= 2: print( 'Possible curvilinear (or unstructured) grid detected. The CVDP cannot analyze curvilinear data. Please regrid to a rectilinear grid for inclusion in CVDP comparisons.' ) print(f'Input file: {zpath}') print('Setting array to all missing') arr = create_empty_array(yearS, yearE, smdata, emdata, "time_lat_lon") sydata = yearS # assign these variables based on yearS/yearE provided in namelist. Doesn't matter eydata = yearE # as data array is totally missing.. smdata = 1 emdata = 12 ###RLB - is this the correct idiom in Xarrays for NCL's dimension (re)naming? arr.rename({arr.dims[0]: 'time'}) arr.rename({arr.dims[1]: 'lat'}) arr.rename({arr.dims[2]: 'lon'}) if 'valid_range' in arr.attrs: # check to make sure data is in valid range. Reset to stay within the valid range if needed. arr.data = xr.where(arr.data < arr.attrs['valid_range'][0], arr.attrs['valid_range'][0], arr.data) arr.data = xr.where(arr.data > arr.attrs['valid_range'][1], arr.attrs['valid_range'][1], arr.data) ###RLB - what are we doing with _FillValues? ### if arr.abs() > 1.e20: # check for inf values or values way out of range, reset to _FillValue. ### print(f'Values greater than 1.e20 or less than -1.e20 detected in {zpath}, resetting to _FillValue') ### arr = xr.where(arr.abs() > 1.e20, arr@_FillValue,arr) print(arr) if yearS < int(sydata) or yearE > int(eydata): print( f'Requested {yearS}-{yearE} time span is outside the input file {zpath} time span of {sydata}-{eydata}' ) print('Setting array to all missing') arr = create_empty_array(yearS, yearE, smdata, emdata, "time_lat_lon") sydata = yearS # assign these variables based on yearS/yearE provided in namelist. Doesn't matter eydata = yearE # as data array is totally missing.. smdata = 1 emdata = 12 else: timeT = ncl.yyyymm_time(sydata, eydata, "integer") time = timeT[slice(sydata * 100 + smdata, eydata * 100 + emdata)] if 'time' in arr.coords: arr.drop('time') dimz = arr.values.shape if dimz[0] == time.values.shape[0]: #### THIS MAY NOT BE RIGHT -- MAY NEED TO REASSIGN ARR RESULTS OF ASSIGN_COORDS arr.assign_coords(time=time) else: print( 'Possible mismatch detected between time specified in file name and file variables, setting array to missing' ) print(f'File = {zpath}') print(f'Read from file name: {min(time)}-{max(time)}') arr = create_empty_array(yearS, yearE, smdata, emdata, "time_lat_lon") sydata = yearS # assign these variables based on yearS/yearE provided in namelist. Doesn't matter eydata = yearE # as data array is totally missing.. smdata = 1 emdata = 12 if arr.coords['lat'][0] >= 0: farr = arr[slice(yearS * 100 + 1, yearE * 100 + 12), ::-1, :] # flip the latitudes else: farr = arr[slice(yearS * 100 + 1, yearE * 100 + 12), :, :] mocheck = np.array((yearS * 100 + 1) - min(farr.coords['time'], (yearE * 100 + 12) - max(farr.coords[time]))) if [True for mon in mocheck if mon != 0 ]: # previously: if (mod(dimsizes(farr&time),12).ne.0) then if mocheck[0] != 0: print("First requested year is incomplete") if mocheck[1] != 0: print("Last requested year is incomplete") print( f'Incomplete data year(s) requested for file {zpath}, printing out time and creating blank array' ) print(f'Time requested: {yearS}-{yearE}') print(farr.coords['time']) farr = create_empty_array(yearS, yearE, 1, 12, "time_lat_lon") if farr.coords['lon'][0] < 0: farr = ncl.lonFlip( farr) # lon flip #####RLB: THIS IS STUBBED FOR RIGHT NOW!!! if farr.coords['lon'].min() < 0 or farr.coords['lon'].max() > 360: print(farr.coords['lon']) print(f'path = {zpath}') print( "Fatal: Longitudes not in expected 0-360E range, creating blank array" ) farr = create_empty_array(yearS, yearE, 1, 12, "time_lat_lon") if vn == 'TREFHT' or vn == 'TS': # units check if farr.attrs['units'] == 'K' or farr.attrs['units'] == 'Kelvin' or \ farr.attrs['units'] == 'deg_k' or farr.attrs['units'] == 'deg_K': if farr.max( ) >= 100: # data sets can be anomalies with units of K, so check for range before subtracting farr.values = farr.values - 273.15 farr.attrs['units'] = 'C' if farr.attrs['units'] == 'degrees_C' or farr.attrs['units'] == 'degrees C' or \ farr.attrs['units'] == 'degree_C' or farr.attrs['units'] == 'degree C': farr.attrs['units'] = 'C' if vn == 'PSL': if farr.attrs['units'] == 'Pa' or farr.attrs[ 'units'] == 'Pascals' or farr.attrs['units'] == 'Pascal': farr.values = farr.values / 100. farr.attrs['units'] = 'hPa' if vn == 'PRECT': # convert (if necessary) to mm/day if farr.attrs['units'] == 'm/s' or farr.attrs['units'] == 'm s-1': farr.values = farr.values * 86400000. if farr.attrs['units'] == 'kg m-2 s-1' or farr.attrs['units'] == 'kg/m2/s' or \ farr.attrs['units'] == 'kg/m^2/s ' or farr.attrs['units'] == 'kg/(s*m2)' or \ farr.attrs['units'] == 'mm/s': farr.values = farr.values * 86400. if farr.attrs['units'] == 'm' or farr.attrs['units'] == 'm/month' or \ farr.attrs['units'] == 'cm' or farr.attrs['units'] == 'cm/month' or \ farr.attrs['units'] == 'mm' or farr.attrs['units'] == 'mm/month': yr = farr.coords['time'].values.astype(int) / 100 mo = farr.coords['time'].values.astype( int) - yr * 100 ##RLB THE WHOLE EXPRESSION WAS toint()! days = ncl.days_in_month(yr, mo) for gg in range(0, farr.coords['time'].sizes - 1): farr.values[gg, :, :] = farr.values[gg, :, :] / days( gg) ##RLB IS THIS RIGHT? LINE 322 if farr.attrs['units'] == 'cm' or farr.attrs['units'] == 'cm/month': farr.values = farr.values * 10. # convert from cm/day to mm/day if farr.attrs['units'] == 'm' or farr.attrs['units'] == 'm/month': farr.values = farr.values * 1000. # convert from m/day to mm/day if farr.attrs['units'] == 'm/day' or farr.attrs['units'] == 'm day-1': farr.values = farr.values * 1000. farr.attrs['units'] = 'mm/day' if vn == 'SNOWDP': if 'is_all_missing' not in farr.attrs: if farr.attrs['units'] != 'm' and farr.attrs['units'] != 'meters': print( f'Warning: SNOWDP/snd units may not be in meters. listed units = {farr.attrs["units"]}' ) date = farr.coords['time'] # switch time to be CF-conforming yyyy = date / 100 mm = date - (yyyy * 100) days = (ncl.days_in_month(yyyy, mm)) / 2 hms = days hms = 0 # hours, minutes, seconds all the same (=0) time = ncl.cd_inv_calendar(yyyy, mm, days, hms, hms, hms, "months since " + min(yyyy) + "-01-15 00:00:00", 0) time.attrs['long_name'] = 'Time' time.attrs['standard_name'] = 'time' time.attrs['actual_range'] = np.array(min(time), max(time)) time.rename({time.dims[0]: 'time'}) time.coords['time'] = time farr.coords['time'] = time return farr
def data_read_in_ocean_MOC(zpath, vn, yearS, yearE): ''' read in MOC ocean data from given files assign time coordinate variables, check for issues with the array, assign _FillValue (if needed) assign dimension names (for ease-of-use), check and modify units path for MOC file(s), variable name, start year, and end year are read in. ''' arr = None vname = None if vn == 'MOC': # line 375 in ncl version vname = np.array('MOC', 'msftmyz', 'stfmmc') if zpath is None: print( f'File missing, creating blank array of data. View {vn} namelist for details.' ) arr = create_empty_array(yearS, yearE, 1, 12, "time_lev_lat") sydata = yearS # assign these variables based on yearS/yearE provided in namelist. Doesn't matter eydata = yearE # as data array is totally missing.. smdata = 1 emdata = 12 else: if '*' in zpath or '{' in zpath: # check for "*" and "{" denoting multiple files tfiles = glob.glob(zpath) c = xr.open_mfdataset(tfiles) for v in vname: if v in c: dimC = c.variables['MOC'].shape if v == 'MOC': # CCSM/CESM file if dimC[2] >= 2: arr = c[v][:, 1, :, :, :].sum( axis=1 ) # select Atl+Med+Labrador+GIN sea+Arctic+Hudson Bay transport region and sum over moc_comp else: arr = c[ v][:, 1, 0, :, :] # select Atl+Med+Labrador+GIN sea+Arctic+Hudson Bay transport region and the only moc_comp dimension else: # CMIP file arr = c[ v][:, 0, :, :] # CMIP file: 0th basin/region = atlantic_ocean (CMIP3) or atlantic_arctic_ocean (CMIP5) break cpathS = tfiles[0] cpathE = tfiles[len(tfiles) - 1] sydata = cpathS[len(cpathS) - 17:len(cpathS) - 14] smdata = cpathS[len(cpathS) - 13:len(cpathS) - 12] eydata = cpathE[len(cpathE) - 10:len(cpathE) - 7] emdata = cpathE[len(cpathE) - 6:len(cpathE) - 5] else: c = xr.open_dataset(zpath) for v in vname: if v in c: dimC = c.variables["MOC"].shape if v == 'MOC': # CCSM/CESM file if dimC[2] >= 2: arr = c[v][:, 1, :, :, :].sum( axis=1 ) # select Atl+Med+Labrador+GIN sea+Arctic+Hudson Bay transport region and sum over moc_comp else: arr = c[ v][:, 1, 0, :, :] # select Atl+Med+Labrador+GIN sea+Arctic+Hudson Bay transport region else: # CMIP file arr = c[ v][:, 0, :, :] # CMIP file: 0th basin/region = atlantic_ocean (CMIP3) or atlantic_arctic_ocean (CMIP5) break sydata = zpath[len(zpath) - 17:len(zpath) - 14] smdata = zpath[len(zpath) - 13:len(zpath) - 12] eydata = zpath[len(zpath) - 10:len(zpath) - 7] emdata = zpath[len(zpath) - 6:len(zpath) - 5] if arr is None: # line 450 in ncl version print( f'Variable {vn} not found. Examine input file {zpath}. Creating empty array and continuing' ) arr = create_empty_array(yearS, yearE, 1, 12, "time_lev_lat") if arr.values.dtype == 'int16': arr.values = arr.values.astype('float32') if '_FillValue' not in arr.attrs: # assign _FillValue if one is not present if 'missing_value' in arr.attrs: arr.attrs['_FillValue'] = arr.attrs['missing_value'] else: arr.attrs['_FillValue'] = ncl.default_fillvalues(arr.dtype.name) arr.rename({arr.dims[0]: 'time'}) arr.rename({arr.dims[1]: 'lev'}) arr.rename({arr.dims[2]: 'lat'}) if 'coordinates' in arr.attrs: arr.attrs.pop('coordinates') if arr.coords['lev'].attrs['units'] == 'centimeters' or arr.coords[ 'lev'].attrs['units'] == 'cm': # line 475 of ncl version lev = arr.coords['lev'] lev.attrs['units'] = "m" lev = lev / 100. ##RLB do thes next two statements carry over automatically? ## lev&lev = lev ## arr&lev = lev if arr.coords['lev'][ 2] < 0: # check for negative levels line 488 ncl script arr.coords['lev'] = arr.coords['lev'] * -1. if [True for l in arr.coords['lev'] if l < 0]: print("Error detected in MOC level sign conversion") print(arr.coords['lev']) arr.coords['lev'].attrs['positive'] = 'down' ##RLB again, are these necessary? ##lev&lev = lev ##arr&lev = lev if 'valid_range' in arr.attrs: # check to make sure data is in valid range. Reset to stay within the valid range if needed. arr.data = xr.where(arr.data < arr.attrs['valid_range'][0], arr.attrs['valid_range'][0], arr.data) arr.data = xr.where(arr.data > arr.attrs['valid_range'][1], arr.attrs['valid_range'][1], arr.data) if [ True for a in arr.data if abs(a) >= 1.e20 ]: # check for inf values or values way out of range, reset to _FillValue. print( f'Values greater than 1.e20 or less than -1.e20 detected in {zpath}, resetting to _FillValue' ) arr.data = xr.where( abs(arr.data) >= 1.e20, arr.attrs['_FillValue'], arr.data) if yearS < int(sydata) or yearE > int(eydata): # line 517 of ncl script print( f'Requested {yearS}-{yearE} time span is outside the input file {zpath} time span of {sydata}-{eydata}' ) print('Setting array to all missing') arr = create_empty_array(yearS, yearE, smdata, emdata, "time_lev_lat") sydata = yearS # assign these variables based on yearS/yearE provided in namelist. Doesn't matter eydata = yearE # as data array is totally missing.. smdata = 1 emdata = 12 else: timeT = ncl.yyyymm_time(sydata, eydata, "integer") time = timeT[slice(sydata * 100 + smdata, eydata * 100 + emdata)] if 'time' in arr.coords: arr.drop('time') dimz = arr.values.shape if dimz[0] == time.values.shape[0]: #### THIS MAY NOT BE RIGHT -- MAY NEED TO REASSIGN ARR RESULTS OF ASSIGN_COORDS arr.assign_coords(time=time) else: print( 'Possible mismatch detected between time specified in file name and file variables, setting array to missing' ) print(f'File = {zpath}') print(f'Read from file name: {min(time)}-{max(time)}') arr = create_empty_array(yearS, yearE, smdata, emdata, "time_lev_lat") sydata = yearS # assign these variables based on yearS/yearE provided in namelist. Doesn't matter eydata = yearE # as data array is totally missing.. smdata = 1 emdata = 12 if arr.coords['lat'][0] >= 0: # line 553 in ncl script farr = arr[slice(yearS * 100 + 1, yearE * 100 + 12), :, ::-1] # flip the latitudes else: farr = arr[slice(yearS * 100 + 1, yearE * 100 + 12), :, :] mocheck = np.array((yearS * 100 + 1) - min(farr.coords['time'], (yearE * 100 + 12) - max(farr.coords['time']))) if [True for mon in mocheck if mon != 0 ]: # previously: if (mod(dimsizes(farr&time),12).ne.0) then if mocheck[0] != 0: print("First requested year is incomplete") if mocheck[1] != 0: print("Last requested year is incomplete") print( f'Incomplete data year(s) requested for file {zpath}, printing out time and creating blank array' ) print(f'Time requested: {yearS}-{yearE}') print(farr.coords['time']) farr = create_empty_array(yearS, yearE, 1, 12, "time_lat_lon") # check units for MOC array. CMIP5 = "kg s-1" CMIP3 = "m3 s-1" CCSM3 = "Sverdrups" CCSM4 = "Sverdrups" if farr.attrs['units'] == 'Sverdrups': # line 579 of ncl script farr.attrs['units'] = "Sv" if farr.attrs['units'] == "kg s-1" or farr.attrs['units'] == "KG S-1" or \ farr.attrs['units'] == "kg/s" or farr.attrs['units'] == "KG/S": # 1 Sv = 1.e9 kg/s farr.values = farr.values / 1.e9 ###RLB -- original strips metadata??? farr.attrs['units'] = "Sv" if farr.attrs['units'] == 'm3 s-1' or farr.attrs['units'] == 'M3 S-1' or \ farr.attrs['units'] == 'm3/s' or farr.attrs['units'] == 'M3/S': # 1 Sv = 1.e6 m3/s farr.values = farr.values / 1.e6 ###RLB -- original strips metadata??? farr.attrs['units'] = "Sv" date = farr.coords['time'] # switch time to be CF-conforming yyyy = date / 100 mm = date - (yyyy * 100) days = (ncl.days_in_month(yyyy, mm)) / 2 hms = days hms = 0 # hours, minutes, seconds all the same (=0) time = ncl.cd_inv_calendar(yyyy, mm, days, hms, hms, hms, "months since " + min(yyyy) + "-01-15 00:00:00", 0) time.attrs['long_name'] = 'Time' time.attrs['standard_name'] = 'time' time.attrs['actual_range'] = np.array(min(time), max(time)) time.rename({time.dims[0]: 'time'}) time.coords['time'] = time farr.coords['time'] = time return farr