def subset_merra_data(url, llat=23., ulat=51., llon=-130., rlon=-65., year=2010): ''' Function to slice MERRA Reanalysis Precipitation data over particular region (default region is centered over the contiguous U.S.) for a specified year. It returns a monthly and summertime average daily rainfall for plotting purposes. ''' nc = Dataset(url) time = nc.variables['time'] lon = nc.variables['lon'][:] lat = nc.variables['lat'][:] # indices for slicing the data over particular year and region strt_time = int(date2num(datetime(year,01,01),time.units) - time[0]) stp_time = int(date2num(datetime(year,12,31),time.units) - time[0]) dates = num2date(time[strt_time:stp_time],time.units) llat_ind = np.argwhere(lat == llat)[0,0] ulat_ind = np.argwhere(lat == ulat)[0,0] llon_ind = np.argwhere(lon >= llon)[0,0] rlon_ind = np.argwhere(lon >= rlon)[0,0] # downloading sliced rainfall data precip = nc.variables['prectot'][strt_time:stp_time+1, llat_ind:ulat_ind,llon_ind:rlon_ind] precip = 24*60*60*precip # converting to mm/day lon = lon[llon_ind:rlon_ind] lat = lat[llat_ind:ulat_ind] # computing monthly averages m_precip = [] mm = [dd.month for dd in dates] for months in np.arange(1,13): m_precip.append(precip[np.argwhere(mm == months)].mean(axis=0)[0]) m_precip = np.array(m_precip) # summertime average daily rainfall for 2-D plotting over map jja_precip = m_precip[5:8,:,:].mean(axis=0) return lat,lon,m_precip, jja_precip
def dictionary_of_data_to_netcdf(self, ncFileName, dataDictionary, timeBounds, timeStamp = None, posCnt = None): rootgrp = nc.Dataset(ncFileName, 'a') lowerTimeBound = timeBounds[0] upperTimeBound = timeBounds[1] if timeStamp == None: timeStamp = lowerTimeBound + (upperTimeBound - lowerTimeBound) / 2 # time date_time = rootgrp.variables['time'] if posCnt == None: posCnt = len(date_time) date_time[posCnt] = nc.date2num(timeStamp, date_time.units, date_time.calendar) # time bounds time_bounds = rootgrp.variables['time_bounds'] time_bounds[posCnt, 0] = nc.date2num(lowerTimeBound, date_time.units, date_time.calendar) time_bounds[posCnt, 1] = nc.date2num(upperTimeBound, date_time.units, date_time.calendar) shortVarNameList = dataDictionary.keys() for shortVarName in shortVarNameList: varField = dataDictionary[shortVarName] # flip variable if necessary (to follow cf_convention) if self.netcdf_y_orientation_follow_cf_convention: varField = np.flipud(varField) # the variable rootgrp.variables[shortVarName][posCnt,:,:] = varField rootgrp.sync() rootgrp.close()
def get_numtime(self, arr): """ :param arr: An array of ``datetime``-like objects to convert to numeric time. :type arr: :class:`numpy.ndarray` :returns: An array of numeric values with same shape as ``arr``. :rtype: :class:`numpy.ndarray` """ arr = np.atleast_1d(arr) try: ret = nc.date2num(arr, str(self.units), calendar=self.calendar) except (ValueError, TypeError): # Special behavior for conversion of time units with months. if self._has_months_units: ret = get_num_from_months_time_units(arr, self.units, dtype=None) else: # Odd behavior in netcdftime objects? Try with datetime objects. flat_arr = arr.flatten() fill = np.zeros(flat_arr.shape, dtype=object) for idx, element in enumerate(flat_arr): fill[idx] = datetime.datetime(element.year, element.month, element.day, element.hour, element.minute, element.second, element.microsecond) fill = fill.reshape(arr.shape) ret = np.atleast_1d(nc.date2num(fill, str(self.units), calendar=self.calendar)) return ret
def _check_dates_outside(ifile, start_date, end_date): """ Checks if the comparison data is outside of the dates for the plot Returns True if the dates of the data are completely outside of the desired dates. Returns False if the dates overlap at all, but prints a warning if it is only a subset. """ # Load data from file into Dataset object nc = Dataset(ifile, 'r') nc_time = nc.variables['time'] try: cal = nc_time.calendar except: cal = 'standard' # convert dates to datetime object start = datetime.datetime(*year_mon_day(start_date)) end = datetime.datetime(*year_mon_day(end_date)) # convert datetime objects to integers start = date2num(start, nc_time.units, calendar=cal) end = date2num(end, nc_time.units, calendar=cal) # get start and end dates of file compstart = nc_time[:][0] compend = nc_time[:][-1] # make comparison if compstart > end or compend < start: return True elif compstart > start or compend < end: with open('logs/log.txt', 'a') as outfile: outfile.write('WARNING: Comparison data does not cover entire time period... Used subset\n') return False
def convert_osu_file(file, sheet_names): for s in sheet_names: name = str(s) print '\nConverting sheet name: %s' %name f = pd.read_excel(file, sheetname=name, skiprows=1) for i,j in f.iterrows(): ctd_sn = int(j['SN']) ctd_uid = 'CGINS-CTDGVM-' + '{0:05d}'.format(ctd_sn) print ctd_uid ctd_inst = 'CTDGVM' ctd_caldate_str = str(j['Cal Date'])[0:10].replace('-','') ctd_caldate_num = int(nc.date2num(j['Cal Date'],'seconds since 1970-01-01'))*1000 ctd_sdir = os.path.join(dir,ctd_inst) create_dir(ctd_sdir) write_csv(ctd_uid, ctd_caldate_str, ctd_caldate_num, ctd_sn, ctd_sdir) do_sn = int(j['SN.3']) do_uid = 'CGINS-DOSTAM-' + '{0:05d}'.format(do_sn) print do_uid do_inst = 'DOSTAM' do_caldate_str = str(j['Cal Date.3'])[0:10].replace('-','') do_caldate_num = int(nc.date2num(j['Cal Date.3'],'seconds since 1970-01-01'))*1000 do_sdir = os.path.join(dir,do_inst) create_dir(do_sdir) write_csv(do_uid, do_caldate_str, do_caldate_num, do_sn, do_sdir)
def setUp(self): self.standardtime = self.TestTime(datetime(1950, 1, 1), 366, 24, 'hours since 1900-01-01', 'standard') self.file = tempfile.NamedTemporaryFile(suffix='.nc', delete=False).name f = Dataset(self.file, 'w') f.createDimension('time', None) time = f.createVariable('time', float, ('time',)) time.units = 'hours since 1900-01-01' time[:] = self.standardtime[:] f.createDimension('time2', 1) time2 = f.createVariable('time2', 'f8', ('time2',)) time2.units = 'days since 1901-01-01' self.first_timestamp = datetime(2000, 1, 1) time2[0] = date2num(self.first_timestamp, time2.units) ntimes = 21 f.createDimension("record", ntimes) time3 = f.createVariable("time3", numpy.int32, ("record", )) time3.units = "seconds since 1970-01-01 00:00:00" date = datetime(2037,1,1,0) dates = [date] for ndate in range(ntimes-1): date += (ndate+1)*timedelta(hours=1) dates.append(date) time3[:] = date2num(dates,time3.units) f.close()
def setUp(self): self.standardtime = self.TestTime(datetime(1950, 1, 1), 366, 24, "hours since 1900-01-01", "standard") self.file = tempfile.mktemp(".nc") f = Dataset(self.file, "w") f.createDimension("time", None) time = f.createVariable("time", float, ("time",)) time.units = "hours since 1900-01-01" time[:] = self.standardtime[:] f.createDimension("time2", 1) time2 = f.createVariable("time2", "f8", ("time2",)) time2.units = "days since 1901-01-01" self.first_timestamp = datetime(2000, 1, 1) time2[0] = date2num(self.first_timestamp, time2.units) ntimes = 21 f.createDimension("record", ntimes) time3 = f.createVariable("time3", numpy.int32, ("record",)) time3.units = "seconds since 1970-01-01 00:00:00" date = datetime(2037, 1, 1, 0) dates = [date] for ndate in range(ntimes - 1): date += (ndate + 1) * timedelta(hours=1) dates.append(date) time3[:] = date2num(dates, time3.units) f.close()
def test_select_nc(self): f = Dataset(self.file, 'r') nutime = f.variables['time'] dates = [datetime(1950, 1, 2, 6), datetime( 1950, 1, 3), datetime(1950, 1, 3, 18)] t = date2index(dates, nutime, select='before') assert_equal(t, [1, 2, 2]) t = date2index(dates, nutime, select='after') assert_equal(t, [2, 2, 3]) t = date2index(dates, nutime, select='nearest') assert_equal(t, [1, 2, 3]) # Test dates outside the support with select t = date2index(datetime(1949, 12, 1), nutime, select='nearest') assert_equal(t, 0) t = date2index(datetime(1978, 1, 1), nutime, select='nearest') assert_equal(t, 365) # Test dates outside the support with before self.assertRaises( ValueError, date2index, datetime(1949, 12, 1), nutime, select='before') t = date2index(datetime(1978, 1, 1), nutime, select='before') assert_equal(t, 365) # Test dates outside the support with after t = date2index(datetime(1949, 12, 1), nutime, select='after') assert_equal(t, 0) self.assertRaises( ValueError, date2index, datetime(1978, 1, 1), nutime, select='after') # test microsecond and millisecond units unix_epoch = "milliseconds since 1970-01-01T00:00:00Z" d = datetime(2038, 1, 19, 3, 14, 7) millisecs = int( date2num(d, unix_epoch, calendar='proleptic_gregorian')) assert_equal(millisecs, (2 ** 32 / 2 - 1) * 1000) unix_epoch = "microseconds since 1970-01-01T00:00:00Z" microsecs = int(date2num(d, unix_epoch)) assert_equal(microsecs, (2 ** 32 / 2 - 1) * 1000000) # test microsecond accuracy in date2num/num2date roundtrip # note: microsecond accuracy lost for time intervals greater # than about 270 years. units = 'microseconds since 1776-07-04 00:00:00-12:00' dates =\ [datetime(1962, 10, 27, 6, 1, 30, 9001), datetime( 1993, 11, 21, 12, 5, 25, 999), datetime(1995, 11, 25, 18, 7, 59, 999999)] times2 = date2num(dates, units) dates2 = num2date(times2, units) for date, date2 in zip(dates, dates2): assert_equal(date, date2) f.close()
def fix_time(cgfile, time): times = cgfile.createVariable('time', 'double', ('time', )) times.units = 'seconds since 1970-01-01 00:00:00.0' times.calendar = 'gregorian' times.standard_name = 'time' print "time here is" print time print netCDF4.date2num(time, units=times.units, calendar=times.calendar) times[0] = netCDF4.date2num(time, units=times.units, calendar=times.calendar) print times
def setup_getfeatureinfo(self, ncd, variable_object, request, location=None): location = location or 'face' try: latitude = request.GET['latitude'] longitude = request.GET['longitude'] # Find closest cell or node (only node for now) if location == 'face': tree = rtree.index.Index(self.face_tree_root) elif location == 'node': tree = rtree.index.Index(self.node_tree_root) else: raise NotImplementedError("No RTree for location '{}'".format(location)) nindex = list(tree.nearest((longitude, latitude, longitude, latitude), 1, objects=True))[0] closest_x, closest_y = tuple(nindex.bbox[2:]) geo_index = nindex.object except BaseException: raise finally: tree.close() # Get time indexes time_var_name = find_appropriate_time(variable_object, ncd.get_variables_by_attributes(standard_name='time')) time_var = ncd.variables[time_var_name] if hasattr(time_var, 'calendar'): calendar = time_var.calendar else: calendar = 'gregorian' start_nc_num = round(nc4.date2num(request.GET['starting'], units=time_var.units, calendar=calendar)) end_nc_num = round(nc4.date2num(request.GET['ending'], units=time_var.units, calendar=calendar)) all_times = time_var[:] start_nc_index = bisect.bisect_right(all_times, start_nc_num) end_nc_index = bisect.bisect_right(all_times, end_nc_num) try: all_times[start_nc_index] except IndexError: start_nc_index = all_times.size - 1 try: all_times[end_nc_index] except IndexError: end_nc_index = all_times.size - 1 if start_nc_index == end_nc_index: if start_nc_index > 0: start_nc_index -= 1 elif end_nc_index < all_times.size: end_nc_index += 1 return_dates = nc4.num2date(all_times[start_nc_index:end_nc_index], units=time_var.units, calendar=calendar) return geo_index, closest_x, closest_y, start_nc_index, end_nc_index, return_dates
def download_time(self, dataset): """ """ self.logger.debug("Downloading time") t = dataset['time'][:] if 't_ini' not in self.cfg['limits']: if 'd_ini' in self.cfg['limits']: assert type(self.cfg['limits']['d_ini']) == datetime, \ "limits:d_ini must be a datetime" d = date2num(self.cfg['limits']['d_ini'], dataset['time'].attributes['units']) self.cfg['limits']['t_ini'] = np.nonzero(t>=d)[0][0] else: self.cfg['limits']['t_ini'] = 0 self.logger.debug("Setting t_ini: %s" % self.cfg['limits']['t_ini']) if 't_step' not in self.cfg['limits']: self.cfg['limits']['t_step'] = 1 self.logger.debug("Setting t_step: %s" % self.cfg['limits']['t_step']) if 't_fin' not in self.cfg['limits']: if 'd_fin' in self.cfg['limits']: assert type(self.cfg['limits']['d_fin']) == datetime, \ "limits:d_ini must be a datetime" d = date2num(self.cfg['limits']['d_fin'], dataset['time'].attributes['units']) self.cfg['limits']['t_fin'] = np.nonzero(t>d)[0][0] else: self.cfg['limits']['t_fin'] = dataset['time'].shape[0] self.logger.debug("Setting t_fin: %s" % self.cfg['limits']['t_fin']) t_ini = self.cfg['limits']['t_ini'] t_fin = self.cfg['limits']['t_fin'] t_step = self.cfg['limits']['t_step'] # ---- data={} # #from coards import from_udunits #t0=datetime(1950,1,1) #if (re.match('^hours since \d{4}-\d{2}-\d{2}$',dataset_h['time'].attributes['units'])): #if (re.match('^hours since 1950-01-01',self.dataset['h']['time'].attributes['units'])): # t = self.dataset['h']['time'][t_ini:t_fin:t_step].tolist() # data['datetime'] = numpy.array([t0+timedelta(hours=h) for h in t]) #else: # self.logger.error("Problems interpreting the time") t = dataset['time'][t_ini:t_fin:t_step].tolist() self.nc.createDimension('time', len(range(t_ini,t_fin,t_step))) nct = self.nc.createVariable('time', 'f8', ('time', )) nct[:] = t nct.units = dataset['time'].attributes['units']
def __next_update_out_data(self): """ Determine the count for when the next write should occur """ # ------------------------------------------------------------ # # If monthly, write at (YYYY,MM,1,0,0) # b0 is first timestep of next period # b1 is end of last timestep of next period b0 = self._time_ord self._begtime = b0 if self._nhtfrq == 0: if self._timestamp.month == 12: b1 = date2num(datetime(self._timestamp.year + 1, 2, 1), TIMEUNITS, calendar=self._calendar) else: b1 = date2num(datetime(self._timestamp.year, self._timestamp.month + 1, 1), TIMEUNITS, calendar=self._calendar) # If some hours in the future elif self._nhtfrq < 0: b1 = b0 - (self._nhtfrq / HOURSPERDAY) # If some dts in the future else: b1 = b0 + (self._nhtfrq * self._dt / SECSPERDAY) # ------------------------------------------------------------ # # ------------------------------------------------------------ # # Get the number of timesteps and datestamp for the next write # next_ord is the ord_time when the write will happen self._update_count = int(round((b1 - b0) / (self._dt / SECSPERDAY))) # ------------------------------------------------------------ # # ------------------------------------------------------------ # # Get next file names and timeord if self._avgflag == 'I': self._write_ord = b1 self.filename = num2date(b1, TIMEUNITS, calendar=self._calendar).strftime(self._fname_format) else: self._time_bnds = np.array([[b0, b1]]) self._write_ord = np.average(self._time_bnds) self.filename = num2date(b0, TIMEUNITS, calendar=self._calendar).strftime(self._fname_format) self.rest_filename = num2date(b1, TIMEUNITS, calendar=self._calendar).strftime(self._fname_format) # ------------------------------------------------------------ # # ------------------------------------------------------------ # # Set the count to zero self._count = 0
def check_dates(beginDate,endDate): begin_DT = datetime.strptime(beginDate,'%Y-%m-%dT%H:%M:%SZ') beginDT = int(nc.date2num(begin_DT,'seconds since 1970-01-01')*1000) try: end_DT = datetime.strptime(endDate,'%Y-%m-%dT%H:%M:%SZ') endDT = int(nc.date2num(end_DT,'seconds since 1970-01-01')*1000) if endDT >= beginDT: return beginDT, endDT else: raise Exception('beginDate (%s) is after endDate (%s)' %(begin_DT,end_DT)) except ValueError: endDT = '' return beginDT, endDT
def read_variable(source_file, variable, date, date_to=None): """Gets images from a netCDF file. Reads the image for a specific date. If date_to is given, it will return multiple images in a multidimensional numpy.ndarray Parameters ---------- source_file : str Path to source file. variable : str Requested variable of image. date : datetime.datetime Date of the image, start date of data cube if date_to is set. date_to : datetime.date, optional End date of data cube to slice from NetCDF file. Returns ------- image : numpy.ndarray Image for a specific date. lon : numpy.array Longitudes of the image. lat : numpy.array Latgitudes of the image. metadata : dict of strings Metadata from source netCDF file. """ with Dataset(source_file, 'r', format='NETCDF4') as nc: times = nc.variables['time'] lon = nc.variables['lon'][:] lat = nc.variables['lat'][:] var = nc.variables[variable] metadata = {} for attr in var.ncattrs(): if attr[0] != '_' and attr != 'scale_factor': metadata[attr] = var.getncattr(attr) numdate = date2num(date, units=times.units, calendar=times.calendar) if date_to is None: image = var[np.where(times[:] == numdate)[0][0]] else: numdate_to = date2num(date_to, units=times.units, calendar=times.calendar) subset = np.where((times[:] >= numdate) & (times[:] <= numdate_to)) image = var[subset] return image, lon, lat, metadata
def plot_sta(self, ax, vname, station, date, label): print vname, station, date, label if label == 'Free': nc = self.free if self.t_free is None: ocean_time = nc.variables['ocean_time'][:] time = netCDF4.date2num(date, self.sta_JST) if type(time) == np.int64: t = np.where(ocean_time == time)[0][0] else: t0 = np.where(ocean_time == time[0])[0][0] t1 = np.where(ocean_time == time[1])[0][0] t = np.arange(t0, t1) self.t_free = t else: t = self.t_free elif label == 'Assi': nc = self.assi if self.t_assi is None: ocean_time = nc.variables['ocean_time'][:] time = netCDF4.date2num(date, self.sta_JST) t = np.where(ocean_time == time)[0][0] self.t_assi = t else: t = self.t_assi if vname == 'DIN': NH4 = nc.variables['NH4'][t,station-1,:] NO3 = nc.variables['NO3'][t,station-1,:] var = NH4 + NO3 elif vname == 'PON': LDeN = nc.variables['LdetritusN'][t,station-1,:] SDeN = nc.variables['SdetritusN'][t,station-1,:] var = LDeN + SDeN elif vname == 'POP': LDeP = nc.variables['LdetritusP'][t,station-1,:] SDeP = nc.variables['SdetritusP'][t,station-1,:] var = LDeP + SDeP else: var = nc.variables[vname][t,station-1,:] line = {'Free': '--', 'Assi': '-'} if type(t) == np.int64: depth = self.calculate_depth(nc, t, station) ax.plot(var, depth, line[label], label=label) else: depth = self.calculate_depth(nc, t[0], station) #ax.errorbar(np.mean(var, axis=0), depth, xerr=np.std(var, axis=0), fmt=line[label], label=label) mean = np.mean(var, axis=0) std = np.std(var, axis=0) #ax.fill_betweenx(depth, mean-std, mean+std, label=label) #ax.plot(mean, depth, line[label]) special.errorfill(mean, depth, xerr=std, label=label, ax=ax, alpha_fill=0.1)
def subset(self, **kwargs): newargs = {} for name, value in kwargs.items(): if name in self.coords: coord = self.coords[name] #print "subset ", name, " using ", coord[:], coord._subset, value if type(value) == tuple: start, stop = value else: start = stop = value # try and coerce into datetimes try: start = netCDF4.date2num(parser.parse(start), coord.attributes['units']) except: pass try: stop = netCDF4.date2num(parser.parse(stop), coord.attributes['units']) except: pass print "subset start, stop ", start, stop # Get the actual coordinate values coord_vals = coord[:] # If we have dates then we try and convert coordinate values to dates: if type(start) == datetime.datetime: try: coord_vals = netCDF4.num2date(coord_vals, units=coord.units, calendar=coord.calendar) except: pass start_index = np.argmin(np.abs(coord_vals - start)) stop_index = np.argmin(np.abs(coord_vals - stop)) # We might need to swap around if stop_index < start_index: tmp = stop_index stop_index = start_index start_index = tmp newargs[name] = slice(start_index, stop_index+1) # print 'newargs = ', newargs return self.isubset_copy(**newargs)
def get_climatologic_field(self, varname = "mrro", gcm = "", rcm = "", start_year = None, end_year = None, months = None ): """ for time t: start_year <= t <= end_year """ mfds = MFDataset("{0}/{1}-{2}/current/{3}_*.nc".format(self.folder_with_nc_data, gcm, rcm, varname)) self.lon2d = mfds.variables[self.lon_name][:].transpose() self.lat2d = mfds.variables[self.lat_name][:].transpose() self._init_kd_tree() cache_file = self._get_clim_cache_file_path(varname = varname, gcm=gcm, rcm = rcm, start_year=start_year, end_year=end_year, months=months) cache_file = os.path.join(self.cache_files_folder, cache_file) if os.path.isfile(cache_file): f = open(cache_file) mfds.close() return pickle.load(f) t = mfds.variables["time"] t_units = t.units t_calendar = t.calendar t_start = date2num(datetime(start_year, 1,1), t_units, calendar=t_calendar) t_end = date2num(datetime(end_year+1, 1,1), t_units, calendar=t_calendar) t = t[:] t_sel = t[(t_start <= t) & (t < t_end)] dates_sel = num2date(t_sel, t_units, calendar=t_calendar) bool_vect = np.array( [x.month in months for x in dates_sel], dtype=np.bool ) data_sel = mfds.variables[varname][ np.where( (t_start <= t) & (t < t_end) )[0],:,:] #save results to a cache file for reuse result = data_sel[bool_vect,:,:].mean(axis = 0).transpose() pickle.dump(result, open(cache_file,"w")) mfds.close() return result #because in the file the axes are inversed
def prepare_nc(trgFile, timeList, x, y, metadata, logger, units='Days since 1900-01-01 00:00:00', calendar='gregorian',Format="NETCDF4",complevel=9,zlib=True,least_significant_digit=None): """ This function prepares a NetCDF file with given metadata, for a certain year, daily basis data The function assumes a gregorian calendar and a time unit 'Days since 1900-01-01 00:00:00' """ import datetime as dt logger.info('Setting up netcdf output: ' + trgFile) startDayNr = netCDF4.date2num(timeList[0].replace(tzinfo=None), units=units, calendar=calendar) endDayNr = netCDF4.date2num(timeList[-1].replace(tzinfo=None), units=units, calendar=calendar) time = arange(startDayNr,endDayNr+1) nc_trg = netCDF4.Dataset(trgFile,'w',format=Format,zlib=zlib,complevel=complevel) logger.info('Setting up dimensions and attributes. Steps: ' + str(len(timeList)) + ' lat: ' + str(len(y))+ " lon: " + str(len(x))) if len(time) ==1: nc_trg.createDimension('time', 1) else: nc_trg.createDimension('time', 0) #NrOfDays*8 nc_trg.createDimension('lat', len(y)) nc_trg.createDimension('lon', len(x)) DateHour = nc_trg.createVariable('time','f8',('time',),fill_value=-9999., zlib=zlib,complevel=complevel) DateHour.units = units DateHour.calendar = calendar DateHour.standard_name = 'time' DateHour.long_name = 'time' DateHour.axis = 'T' DateHour[:] = time y_var = nc_trg.createVariable('lat','f4',('lat',),fill_value=-9999., zlib=zlib,complevel=complevel) y_var.standard_name = 'latitude' y_var.long_name = 'latitude' y_var.units = 'degrees_north' y_var.axis = 'Y' x_var = nc_trg.createVariable('lon','f4',('lon',),fill_value=-9999., zlib=zlib,complevel=complevel) x_var.standard_name = 'longitude' x_var.long_name = 'longitude' x_var.units = 'degrees_east' x_var.axis = 'X' y_var[:] = y x_var[:] = x projection= nc_trg.createVariable('projection','c') projection.long_name = 'wgs84' projection.EPSG_code = 'EPSG:4326' projection.proj4_params = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs' projection.grid_mapping_name = 'latitude_longitude' # now add all attributes from user-defined metadata for attr in metadata: nc_trg.setncattr(attr, metadata[attr]) nc_trg.sync() nc_trg.close()
def add_time_bounds(self, delta=None, position=None): self.nc.createDimension("bounds") time_bounds = self.nc.createVariable('{}_bounds'.format(self.time_axis_name), "f8", ("time", "bounds",), chunksizes=(1000, 2,)) time_bounds.units = "seconds since 1970-01-01T00:00:00Z" time_bounds.calendar = "gregorian" time_objs = netCDF4.num2date(self.time[:], units=self.time.units, calendar=self.time.calendar) bounds_kwargs = dict(units=time_bounds.units, calendar=time_bounds.calendar) if position == "start": time_bounds[:] = np.asarray(zip(self.time[:], netCDF4.date2num(time_objs + delta, **bounds_kwargs))) elif position == "middle": time_bounds[:] = np.asarray(zip(netCDF4.date2num(time_objs - delta/2, **bounds_kwargs), netCDF4.date2num(time_objs + delta/2, **bounds_kwargs))) elif position == "end": time_bounds[:] = np.asarray(zip(netCDF4.date2num(time_objs - delta, **bounds_kwargs), self.time[:]))
def date2num(python_datetime, tzinfo=None): tz = tzinfo or pytz.utc try: python_datetime[0] except TypeError: python_datetime = [python_datetime] return netCDF4.date2num([ d.astimezone(tz).replace(tzinfo=None) for d in python_datetime ], timevar_units, calendar='proleptic_gregorian')
def read_values(self, fieldname, slices=None): """ Read the values of a field. `slices` is optional. When provided, give for each dimension the corresponding python slice object to subset this dimension. Only the dimensions to be subsetted need to be provided, by default the full dilmension length is read. .. code-block:: python # extracting a subset of a field with slices data = fd.read_values('owiWindSpeed', slices={'row':slice(10,20), 'cell':slice(30, 40)}) :param fieldname: name of the field :type fieldname: str :param slices: dimensions slices, when reading a subset only for some dimensions :type slices: Dict<str, slice> """ if fieldname == 'time': # accounts for virtual variable timeval = self.get_start_time() return numpy.ma.array([date2num( timeval, TIME_CONVENTION) ]) else: return super(SAFEOCNNCFile, self).read_values(fieldname, slices=slices)
def write(self, f, idx, stid, date, meteo, ctx): """ Write single record to file """ f.variables['Time'][idx] = date2num(date, units=f.variables['Time'].units, calendar='standard') f.variables['STID'][idx] = int(stid) for key, val in meteo.items(): f.variables[key][idx,:len(val)] = val for key, val in ctx.items(): if key == 'SOBS': f.variables[key][idx] = date2num(val, units=f.variables[key].units, calendar=f.variables[key].calendar) else: f.variables[key][idx] = val
def _unify_times_for_radars(radars): """ Return unified start times and units for a number of radars. """ dates = [netCDF4.num2date(radar.time['data'][0], radar.time['units']) for radar in radars] units = make_time_unit_str(min(dates)) times = netCDF4.date2num(dates, units) return times, units
def get_tind_from_bounds(self, var, bounds, convert=False, use_cache=True): assert var in self._current_variables time = self.gettimevar(var, use_cache) if convert: bounds = netCDF4.date2num(bounds, time._units + " since " + time.origin.isoformat()) inds = np.where(np.logical_and(time.dates >= bounds[0].replace(tzinfo=time.dates[0].tzinfo), time.dates <= bounds[1].replace(tzinfo=time.dates[0].tzinfo))) return inds
def BIL2netCDF(BILfile, BILdtype='uint16', outdir=os.getcwd(), theme_name='undefined', theme_unit='undefined', long_name='undefined'): ''' Convenience function converting the given BIL file to netCDF. @param BILfile: Input BIl file. @param outdir: Output directory. @param theme_name: Short name for the theme. @param theme_unit: Metric unit of the theme data. @param long_name: Descriptive name for the theme. @return: netCDF file in the output directory. ''' ncfilename = os.path.splitext(os.path.basename(BILfile))[0] + '.nc' bd = BILdata(BILfile, BILdtype) bd.read() yy, mm, dd = "13", "09", "11" tstring = yy+'-'+mm+'-'+dd+' 06:00:00' secs = date2num(iso2datetime(tstring), timeunit) mask = flipud(senorge_mask()) ncdata = flipud(int16(bd.data)) # array needs to be flipped ud-down and transposed to fit the coordinate system ncdata[mask] = get_FillValue(ncdata.dtype) ncfile = NCdata(os.path.join(outdir, ncfilename)) # ncfile.zip = True ncfile.new(secs) print ncdata.dtype.str ncfile.add_variable(theme_name, ncdata.dtype.str, theme_unit, long_name, ncdata, lsd=1) ncfile.close()
def read_osaka(csvfile): """ csvfile is from dbfile. """ global time_out data = pd.read_csv(csvfile, parse_dates=[['date', 'hour']], date_parser=_parse, a_values='--') data = data.interpolate() data = data.fillna(method='bfill') data['mjd'] = data['date_hour'].apply( lambda t: netCDF4.date2num(t, 'days since 1968-05-23 09:00:00 GMT')) print data.describe() # set data nt = len(data) time_out = data.mjd.values tair_out = np.ndarray(shape=[nt, eta_rho, xi_rho]) pair_out = np.ndarray(shape=[nt, eta_rho, xi_rho]) qair_out = np.ndarray(shape=[nt, eta_rho, xi_rho]) cloud_out = np.ndarray(shape=[nt, eta_rho, xi_rho]) rain_out = np.ndarray(shape=[nt, eta_rho, xi_rho]) swrad_out = np.ndarray(shape=[nt, eta_rho, xi_rho]) for eta in xrange(eta_rho): for xi in xrange(xi_rho): tair_out[:, eta, xi] = data.temperature # C pair_out[:, eta, xi] = data.air_pressure # 1 millibar = 1 hPa qair_out[:, eta, xi] = data.humidity # % cloud_out[:, eta, xi] = data.cloud/10.0 # 0-10 rain_out[:, eta, xi] = data.precipitation/3600.0 # 1 kg/m2/s = 3600 mm/h swrad_out[:, eta, xi] = data.radiation*10**6/3600 # 1 W/m2 = 3600/(10**6) MJ/m2/h return tair_out, pair_out, qair_out, cloud_out, rain_out, swrad_out
def read_values(self, fieldname, slices=None): """Read the data of a field. Args: fieldname (str): name of the field which to read the data from slices (list of slice, optional): list of slices for the field if subsetting is requested. A slice must then be provided for each field dimension. The slices are relative to the opened view (see :func:open) if a view was set when opening the file. Return: MaskedArray: array of data read. Array type is the same as the storage type. """ if fieldname == 'time': dummy = datetime.datetime(2000,1,1) # dummy = datetime.datetime.max values = netCDF4.date2num(dummy,VIRTUALFIELD_UNITS[fieldname]) elif fieldname =='landmask': values = super(LandMaskNCFile, self).read_values(VIRTUALFIELD_STDNAME[fieldname], slices) #apply convention sea=-1 and land=0 values[values==0] = -1 values[values==1] = 0 else: values = super(LandMaskNCFile, self).read_values(fieldname, slices) values.mask = False return values
def convertAllDataToNetcdf(self, pathToNetCDF='data/swe_ross_brown/swe.nc4'): ds = Dataset(pathToNetCDF, mode='w', format='NETCDF4_CLASSIC') ds.createDimension('time', len(self.all_dates)) ds.createDimension('lon', self.lons2d.shape[0]) ds.createDimension('lat', self.lons2d.shape[1]) lonVariable = ds.createVariable('longitude', 'f4', ('lon', 'lat')) latVariable = ds.createVariable('latitude', 'f4', ('lon', 'lat')) sweVariable = ds.createVariable('SWE', 'f4', ('time', 'lon', 'lat')) sweVariable.units = 'mm of equivalent water' timeVariable = ds.createVariable('time', 'i4', ('time',)) ncSinceFormat = '%Y-%m-%d %H:%M:%S' timeVariable.units = 'hours since ' + self.getStartDate().strftime(ncSinceFormat) lonVariable[:] = self.lons2d latVariable[:] = self.lats2d nDates = len(self.all_dates) for i, theDate in enumerate(self.all_dates): filePath = os.path.join(self.root_path, theDate.strftime(self.file_name_format) + ".txt") data = self._readFromTxtFile(filePath) sweVariable[i, :, :] = data[:, :] print ' {0} / {1} '.format(i, nDates) timeVariable[:] = date2num(self.all_dates, units=timeVariable.units) ds.close()
def encode_cf_datetime(dates, units=None, calendar=None): """Given an array of datetime objects, returns the tuple `(num, units, calendar)` suitable for a CF complient time variable. Unlike encode_cf_datetime, this function does not (yet) speedup encoding of datetime64 arrays. However, unlike `date2num`, it can handle datetime64 arrays. See also -------- netCDF4.date2num """ import netCDF4 as nc4 dates = np.asarray(dates) if units is None: units = guess_time_units(dates) if calendar is None: calendar = 'proleptic_gregorian' if np.issubdtype(dates.dtype, np.datetime64): # for now, don't bother doing any trickery like decode_cf_datetime to # convert dates to numbers faster # note: numpy's broken datetime conversion only works for us precision dates = dates.astype('M8[us]').astype(datetime) if hasattr(dates, 'ndim') and dates.ndim == 0: # unpack dates because date2num doesn't like 0-dimensional arguments dates = dates.item() num = nc4.date2num(dates, units, calendar) return (num, units, calendar)
def create_monthly_climate_file(self, time, prcp, temp, grad, hgt): """Creates a netCDF4 file with climate data. See climate.distribute_climate_data""" nc = netCDF4.Dataset(self.get_filepath('climate_monthly'), 'w', format='NETCDF4') nc.ref_hgt = hgt dtime = nc.createDimension('time', None) nc.author = 'OGGM' nc.author_info = 'Open Global Glacier Model' timev = nc.createVariable('time','i4',('time',)) timev.setncatts({'units':'days since 1801-01-01 00:00:00'}) timev[:] = netCDF4.date2num([t for t in time], 'days since 1801-01-01 00:00:00') v = nc.createVariable('prcp', 'f4', ('time',), zlib=True) v.units = 'kg m-2' v.long_name = 'total precipitation amount' v[:] = prcp v = nc.createVariable('temp', 'f4', ('time',), zlib=True) v.units = 'degC' v.long_name = '2m temperature at height ref_hgt' v[:] = temp v = nc.createVariable('grad', 'f4', ('time',), zlib=True) v.units = 'degC m-1' v.long_name = 'temperature gradient' v[:] = grad nc.close()
with Dataset(f_out, mode='w', format='NETCDF3_64BIT_OFFSET') as ds_dest: # Dimensions for name in ['latitude', 'longitude']: dim_src = ds_src.dimensions[name] ds_dest.createDimension(name, dim_src.size) var_src = ds_src.variables[name] var_dest = ds_dest.createVariable(name, var_src.datatype, (name, )) var_dest[:] = var_src[:] var_dest.setncattr('units', var_src.units) var_dest.setncattr('long_name', var_src.long_name) ds_dest.createDimension('time', None) var = ds_dest.createVariable('time', np.int32, ('time', )) time_units = 'hours since 1900-01-01 00:00:00' time_cal = 'gregorian' var[:] = date2num([d], units=time_units, calendar=time_cal) var.setncattr('units', time_units) var.setncattr('long_name', 'time') var.setncattr('calendar', time_cal) # Variables var = ds_dest.createVariable(var_tp.name, np.double, var_tp.dimensions) var[0, :, :] = data var.setncattr('units', var_tp.units) var.setncattr('long_name', var_tp.long_name) # Attributes ds_dest.setncattr('Conventions', 'CF-1.6') ds_dest.setncattr( 'history', '%s %s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), ' '.join(
# Convert the profile data to standard units in TAMOC profile, units = ambient.convert_units(data, var_units) # Create an empty netCDF4-classic dataset to store this CTD data __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__), '../../test/output')) nc_file = os.path.join(__location__, 'BM54.nc') summary = 'Dataset created by profile_from_ctd in the ./bin directory' \ + ' of TAMOC' source = 'R/V Brooks McCall, station BM54' sea_name = 'Gulf of Mexico' p_lat = 28.0 + 43.945 / 60.0 p_lon = 360 - (88.0 + 22.607 / 60.0) p_time = date2num(datetime(2010, 5, 30, 18, 22, 12), units='seconds since 1970-01-01 00:00:00 0:00', calendar='julian') nc = ambient.create_nc_db(nc_file, summary, source, sea_name, p_lat, p_lon, p_time) # Insert the CTD data into the netCDF dataset comments = ['measured'] * len(var_names) nc = ambient.fill_nc_db(nc, profile, var_names, units, comments, z_col) # Create an ambient.Profile object for this dataset bm54 = ambient.Profile(nc, chem_names=['oxygen'], err=0.00001) # Close the netCDF dataset bm54.nc.close() # Since the netCDF file is now fully stored on the hard drive in the
# assign values to levels dimension variable. levels[:] = [1000., 850., 700., 500., 300., 250., 200., 150., 100., 50.] # fancy slicing tempdat = temp[::2, [1, 3, 6], lats > 0, lons > 0] print('shape of fancy temp slice = ', tempdat.shape) print(temp[0, 0, [0, 1, 2, 3], [0, 1, 2, 3]].shape) # fill in times. from datetime import datetime, timedelta from netCDF4 import num2date, date2num, date2index dates = [ datetime(2001, 3, 1) + n * timedelta(hours=12) for n in range(temp.shape[0]) ] times[:] = date2num(dates, units=times.units, calendar=times.calendar) print('time values (in units %s): ' % times.units + '\\n', times[:]) dates = num2date(times[:], units=times.units, calendar=times.calendar) print('dates corresponding to time values:\\n', dates) rootgrp.close() # create a series of netCDF files with a variable sharing # the same unlimited dimension. for nfile in range(10): f = Dataset('mftest' + repr(nfile) + '.nc', 'w', format='NETCDF4_CLASSIC') f.createDimension('x', None) x = f.createVariable('x', 'i', ('x', )) x[0:10] = numpy.arange(nfile * 10, 10 * (nfile + 1)) f.close() # now read all those files in at once, in one Dataset.
def remapBC(src_file, src_varname, wts_file, src_grd, dst_grd, dst_file, dxy=20, cdepth=0, kk=2, verbose=True): if src_varname == 'surf_el': pos = 't' Cpos = 'rho' Mp, Lp = dst_grd.hgrid.mask_rho.shape z = src_grd.z_t #wts_file = 'remap_weights_%s_to_%s_bilinear_t_to_rho.nc'%(src_name,dst_name) dst_varname = 'zeta' dimensions = ('ocean_time', 'eta_rho', 'xi_rho') long_name = 'free-surface' dst_varname_north = 'zeta_north' dimensions_north = ('ocean_time', 'xi_rho') long_name_north = 'free-surface north boundary condition' field_north = 'zeta_north, scalar, series' dst_varname_south = 'zeta_south' dimensions_south = ('ocean_time', 'xi_rho') long_name_south = 'free-surface south boundary condition' field_south = 'zeta_south, scalar, series' dst_varname_east = 'zeta_east' dimensions_east = ('ocean_time', 'eta_rho') long_name_east = 'free-surface east boundary condition' field_east = 'zeta_east, scalar, series' dst_varname_west = 'zeta_west' dimensions_west = ('ocean_time', 'eta_rho') long_name_west = 'free-surface west boundary condition' field_west = 'zeta_west, scalar, series' units = 'meter' elif src_varname == 'water_temp': pos = 't' Cpos = 'rho' Mp, Lp = dst_grd.hgrid.mask_rho.shape z = src_grd.z_t #wts_file = 'remap_weights_%s_to_%s_bilinear_t_to_rho.nc'%(src_name,dst_name) dst_varname = 'temp' dst_varname_north = 'temp_north' dimensions_north = ('ocean_time', 's_rho', 'xi_rho') long_name_north = 'potential temperature north boundary condition' field_north = 'temp_north, scalar, series' dst_varname_south = 'temp_south' dimensions_south = ('ocean_time', 's_rho', 'xi_rho') long_name_south = 'potential temperature south boundary condition' field_south = 'temp_south, scalar, series' dst_varname_east = 'temp_east' dimensions_east = ('ocean_time', 's_rho', 'eta_rho') long_name_east = 'potential temperature east boundary condition' field_east = 'temp_east, scalar, series' dst_varname_west = 'temp_west' dimensions_west = ('ocean_time', 's_rho', 'eta_rho') long_name_west = 'potential temperature west boundary condition' field_west = 'temp_west, scalar, series' units = 'Celsius' elif src_varname == 'salinity': pos = 't' Cpos = 'rho' Mp, Lp = dst_grd.hgrid.mask_rho.shape z = src_grd.z_t #wts_file = 'remap_weights_%s_to_%s_bilinear_t_to_rho.nc'%(src_name,dst_name) dst_varname = 'salt' dst_varname_north = 'salt_north' dimensions_north = ('ocean_time', 's_rho', 'xi_rho') long_name_north = 'salinity north boundary condition' field_north = 'salt_north, scalar, series' dst_varname_south = 'salt_south' dimensions_south = ('ocean_time', 's_rho', 'xi_rho') long_name_south = 'salinity south boundary condition' field_south = 'salt_south, scalar, series' dst_varname_east = 'salt_east' dimensions_east = ('ocean_time', 's_rho', 'eta_rho') long_name_east = 'salinity east boundary condition' field_east = 'salt_east, scalar, series' dst_varname_west = 'salt_west' dimensions_west = ('ocean_time', 's_rho', 'eta_rho') long_name_west = 'salinity west boundary condition' field_west = 'salt_west, scalar, series' units = 'PSU' else: raise ValueError('Undefined src_varname') src_nc = Dataset(src_file) # read time and change units to 'days' for ROMS dtime = num2date(src_nc.variables['time'][0], units=src_nc['time'].units, calendar=src_nc['time'].calendar) time = date2num(dtime, units='days since 2000-01-01 00:00:00', calendar='gregorian') # get time nctime.long_name = 'time' nctime.units = 'days' # for ROMS #nctime.calendar = 'gregorian' # create destination file print('==>Info: Creating file', dst_file) if os.path.exists(dst_file) is True: os.remove(dst_file) pyroms_toolbox.nc_create_roms_bdry_file(dst_file, dst_grd, nctime) # open BC file dst_nc = Dataset(dst_file, 'a', format='NETCDF3_64BIT') #load var & get missing value spval = src_nc[src_varname]._FillValue src_var = src_nc[src_varname][0] # remove time # determine variable dimension #ndim = len(src_var.dimensions) ndim = len(src_var.shape) if ndim == 3: # build intermediate zgrid zlevel = -z[::-1, 0, 0] nzlevel = len(zlevel) dst_zcoord = pyroms.vgrid.z_coordinate(dst_grd.vgrid.h, zlevel, nzlevel) dst_grdz = pyroms.grid.ROMS_Grid(dst_file + '_Z', dst_grd.hgrid, dst_zcoord) src_var = src_var[:, 1:-1, 1:-1] # remove 4 corners else: src_var = src_var[1:-1, 1:-1] # remove 4 corners # create variable in boudary file if verbose: print('==>Info: Creating variable', dst_varname_north) _ = dst_nc.createVariable(dst_varname_north, 'f8', dimensions_north, fill_value=spval) dst_nc.variables[dst_varname_north].long_name = long_name_north dst_nc.variables[dst_varname_north].units = units dst_nc.variables[dst_varname_north].field = field_north if verbose: print('==>Info: Creating variable', dst_varname_south) _ = dst_nc.createVariable(dst_varname_south, 'f8', dimensions_south, fill_value=spval) dst_nc.variables[dst_varname_south].long_name = long_name_south dst_nc.variables[dst_varname_south].units = units dst_nc.variables[dst_varname_south].field = field_south if verbose: print('==>Info: Creating variable', dst_varname_east) _ = dst_nc.createVariable(dst_varname_east, 'f8', dimensions_east, fill_value=spval) dst_nc.variables[dst_varname_east].long_name = long_name_east dst_nc.variables[dst_varname_east].units = units dst_nc.variables[dst_varname_east].field = field_east if verbose: print('==>Info: Creating variable', dst_varname_west) _ = dst_nc.createVariable(dst_varname_west, 'f8', dimensions_west, fill_value=spval) dst_nc.variables[dst_varname_west].long_name = long_name_west dst_nc.variables[dst_varname_west].units = units dst_nc.variables[dst_varname_west].field = field_west # remapping if verbose: print('==>Info: remapping', dst_varname, ', time =', time) if ndim == 3: # flood the grid if verbose: print('==>Info: flood the grid.', src_var.shape) src_varz = pyroms_toolbox.Grid_HYCOM.flood_fast(src_var, src_grd, pos=pos, spval=spval, \ dxy=dxy, cdepth=cdepth, kk=kk, verbose=verbose) else: src_varz = src_var # horizontal interpolation using scrip weights if verbose: print('==>Info: horizontal interpolation using scrip weights\n', ' about to call remap %s\n' % wts_file, ' ', src_varz.shape) dst_varz = pyroms.remapping.remap(src_varz, wts_file, spval=spval) if ndim == 3: # vertical interpolation from standard z level to sigma if verbose: print( '==>Info: vertical interpolation from standard z level to sigma' ) dst_var_north = pyroms.remapping.z2roms(dst_varz[::-1, Mp-1:Mp, :], \ dst_grdz, dst_grd, Cpos=Cpos, spval=spval, \ flood=False, irange=(0,Lp), jrange=(Mp-1,Mp)) dst_var_south = pyroms.remapping.z2roms(dst_varz[::-1, 0:1, :], \ dst_grdz, dst_grd, Cpos=Cpos, spval=spval, \ flood=False, irange=(0,Lp), jrange=(0,1)) dst_var_east = pyroms.remapping.z2roms(dst_varz[::-1, :, Lp-1:Lp], \ dst_grdz, dst_grd, Cpos=Cpos, spval=spval, \ flood=False, irange=(Lp-1,Lp), jrange=(0,Mp)) dst_var_west = pyroms.remapping.z2roms(dst_varz[::-1, :, 0:1], \ dst_grdz, dst_grd, Cpos=Cpos, spval=spval, \ flood=False, irange=(0,1), jrange=(0,Mp)) else: dst_var_north = dst_varz[-1, :] dst_var_south = dst_varz[0, :] dst_var_east = dst_varz[:, -1] dst_var_west = dst_varz[:, 0] # write data in destination file if verbose: print('==>Info: write data in destination file') dst_nc.variables['ocean_time'][0] = time dst_nc.variables[dst_varname_north][0] = np.squeeze(dst_var_north) dst_nc.variables[dst_varname_south][0] = np.squeeze(dst_var_south) dst_nc.variables[dst_varname_east][0] = np.squeeze(dst_var_east) dst_nc.variables[dst_varname_west][0] = np.squeeze(dst_var_west) # close destination file src_nc.close() dst_nc.close() if src_varname == 'surf_el': return dst_varz
# pH ph_var = ds.createVariable('ph', 'f4', ( 'profile', 'z', ), fill_value=-99.) ph_var.long_name = "pH" ph_var.standard_name = "sea_water_ph_reported_on_total_scale" ph_var.units = "1" ph_var.coordinates = "time lat lon z" for idx, filename in enumerate(infiles): f = PFile(filename) f.remove_upcast() time_var[idx] = date2num(f.meta['timestamp'], time_var.units) lat_var[idx] = f.meta['latitude'] lon_var[idx] = f.meta['longitude'] profile_var[idx] = f.meta['id'] ship_var[idx] = f.meta['ship'] trip_var[idx] = f.meta['trip'] cast_var[idx] = f.meta['cast'] mapping = { 'temp': temp_var, 'sal': sal_var, 'cond': cond_var, 'sigt': sigmat_var, 'oxy': oxygen_var, 'flor': flor_var, 'par': par_var,
def write_group(b, dataset, sensor, day, logfile): """Write variables and attributes associated to each group.""" # create group related with the sensor and associate with time grp = dataset.createGroup(sensor) # get sampling frequency and seconds per file N = b._getsecperfile(sensor) fs = b._getsampfreq(sensor) # compute the number of samples per day samples_per_day = int(fs * 60 * 60 * 24) samples_per_file = int(fs * N) # create time dimension and assing attributes grp.createDimension("time", samples_per_day) # write global attributes for each sensor for attr, val in b.metadata["sensors"][sensor].items(): if attr not in ["variables", "seconds_per_file"]: grp.setncattr(attr, val) # create variables write_variables(b.metadata, sensor, grp) # loop for each data date, end = day, day + dt.timedelta(days=1, seconds=-N) i, j = 0, samples_per_file # while date <= end: # progress bar progress = 100 - 100 * (end - date).total_seconds() / (3600 * 24) sys.stdout.write(f" {sensor:10s}: {progress:5.1f}% ---> {date}\r") sys.stdout.flush() # load data dic = b.read(sensor, date, logfile=open(logfile, "a")) # assign data to netctd variable for name, value in dic.items(): # write variables if name not in ["time"]: # check if variables is 2d or 1d array if value.ndim == 1: grp[name][i:j] = value else: grp[name][i:j, :] = value # time variable else: grp["time"][i:j] = nc.date2num(value, global_time_units) # update counter i, j = j, j + samples_per_file # update date date += dt.timedelta(seconds=N) # new line in the progress bar sys.stdout.write("\n")
def from_roms(roms_file, bry_file, grid=None, records=None, clobber=False, cdl=None): """ Given a ROMS history, average, or climatology file, generate boundary conditions on the same grid. Parameters ---------- roms_file : string or list, ROMS source (history, average, climatology file) bry_file : string, output boundary file, grid : seapy.model.grid or string, optional, ROMS grid for boundaries records : array, optional, record indices to put into the boundary clobber: bool, optional If True, clobber any existing files and recreate. If False, use the existing file definition cdl: string, optional, Use the specified CDL file as the definition for the new netCDF file. Returns ------- None """ if grid is None: grid = seapy.model.asgrid(roms_file) else: grid = seapy.model.asgrid(grid) ncroms = seapy.netcdf(roms_file) src_ref, time = seapy.roms.get_reftime(ncroms) records = np.arange(0, len(ncroms.variables[time][:])) \ if records is None else records # Create the boundary file and fill up the descriptive data ncbry = seapy.roms.ncgen.create_bry(bry_file, eta_rho=grid.eta_rho, xi_rho=grid.xi_rho, s_rho=grid.n, reftime=src_ref, cdl=cdl, clobber=clobber, title="generated from " + roms_file) brytime = seapy.roms.get_timevar(ncbry) grid.to_netcdf(ncbry) ncbry.variables["bry_time"][:] = netCDF4.date2num( netCDF4.num2date(ncroms.variables[time][records], ncroms.variables[time].units), ncbry.variables[brytime].units) for var in seapy.roms.fields: if var in ncroms.variables: for bry in sides: ndim = seapy.roms.fields[var]["dims"] if ndim == 3: ncbry.variables["_".join((var, bry))][:] = \ ncroms.variables[var][records, :, sides[bry].indices[0], sides[bry].indices[1]] elif ndim == 2: ncbry.variables["_".join((var, bry))][:] = \ ncroms.variables[var][records, sides[bry].indices[0], sides[bry].indices[1]] ncbry.close() pass
# Save outputs to netCDF files #============================================================================== out_nc.createDimension(x_coords_lab, krige_x_coords.shape[0]) out_nc.createDimension(y_coords_lab, krige_y_coords.shape[0]) out_nc.createDimension(time_dim_lab, fin_date_range.shape[0]) x_coords_nc = out_nc.createVariable(x_coords_lab, 'd', dimensions=x_coords_lab) x_coords_nc[:] = krige_x_coords y_coords_nc = out_nc.createVariable(y_coords_lab, 'd', dimensions=y_coords_lab) y_coords_nc[:] = krige_y_coords time_nc = out_nc.createVariable(time_dim_lab, 'd', dimensions=time_dim_lab) time_nc[:] = nc.date2num(fin_date_range.to_pydatetime(), units=nc_time_units, calendar=nc_calendar) time_nc.units = nc_time_units time_nc.calendar = nc_calendar if ord_krige_flag: ok_nc = out_nc.createVariable('OK', 'd', dimensions=(time_dim_lab, y_coords_lab, x_coords_lab)) ok_nc[:] = ord_krige_flds ok_nc.units = var_units ok_nc.standard_name = var_name + ' (ordinary kriging)' if sim_krige_flag: sim_nc = out_nc.createVariable('SK',
def gen_nc_bom_wave_dm_deployment(filepath, metadata, output_path): """ generate a FV01 NetCDF file of current data. :param filepath: the path to a wave file to parse :param metadata: metadata output from metadata_info function :param output_path: NetCDF file output path :return: output file path """ wave_df = parse_bom_wave(filepath) # only one file # subtract timezone to be in UTC wave_df['datetime'] = wave_df['datetime'].dt.tz_localize(None).astype('O').values - \ datetime.timedelta(hours=metadata['timezone']) var_mapping = param_mapping_parser(BOM_WAVE_PARAMETER_MAPPING) site_code = metadata['site_code'] nc_file_name = 'BOM_W_{date_start}_{site_code}_WAVERIDER_FV01_END-{date_end}.nc'.format( date_start=wave_df.datetime.dt.strftime('%Y%m%dT%H%M%SZ').values.min(), site_code=site_code, date_end=wave_df.datetime.dt.strftime('%Y%m%dT%H%M%SZ').values.max() ) temp_dir = tempfile.mkdtemp() nc_file_path = os.path.join(temp_dir, nc_file_name) try: with Dataset(nc_file_path, 'w', format='NETCDF4') as nc_file_obj: nc_file_obj.createDimension("TIME", wave_df.datetime.shape[0]) nc_file_obj.createDimension("station_id_strlen", 30) nc_file_obj.createVariable("LATITUDE", "d", fill_value=99999.) nc_file_obj.createVariable("LONGITUDE", "d", fill_value=99999.) nc_file_obj.createVariable("STATION_ID", "S1", ("TIME", "station_id_strlen")) nc_file_obj["LATITUDE"][:] = metadata['latitude'] nc_file_obj["LONGITUDE"][:] = metadata['longitude'] nc_file_obj["STATION_ID"][:] = [stringtochar(np.array(metadata['site_name'], 'S30'))] * wave_df.shape[0] var_time = nc_file_obj.createVariable("TIME", "d", "TIME") # add gatts and variable attributes as stored in config files generate_netcdf_att(nc_file_obj, NC_ATT_CONFIG, conf_file_point_of_truth=True) time_val_dateobj = date2num(wave_df.datetime.dt.to_pydatetime(), var_time.units, var_time.calendar) var_time[:] = time_val_dateobj df_varname_ls = list(wave_df[list(wave_df.keys())].columns.values) df_varname_ls.remove("datetime") for df_varname in df_varname_ls: df_varname_mapped_equivalent = df_varname mapped_varname = var_mapping.loc[df_varname_mapped_equivalent]['VARNAME'] dtype = wave_df[df_varname].values.dtype if dtype == np.dtype('int64'): dtype = np.dtype('int16') # short else: dtype = np.dtype('f') nc_file_obj.createVariable(mapped_varname, dtype, "TIME") set_var_attr(nc_file_obj, var_mapping, mapped_varname, df_varname_mapped_equivalent, dtype) setattr(nc_file_obj[mapped_varname], 'coordinates', "TIME LATITUDE LONGITUDE") nc_file_obj[mapped_varname][:] = wave_df[df_varname].values set_glob_attr(nc_file_obj, wave_df, metadata) # we do this for pipeline v2 os.chmod(nc_file_path, 0o664) shutil.move(nc_file_path, output_path) except Exception as err: logger.error(err) shutil.rmtree(temp_dir) return os.path.join(output_path, os.path.basename(nc_file_path))
# Setting Dimensions based on template file for dName, dSize in template.dimensions.items(): pacman.createDimension(dName, (None if dSize.isunlimited() else len(dSize))) # Setting Variable Attributes and Setting values for Latitude and Longitude for vName, vValue in template.variables.items(): ref = pacman.createVariable(vName, vValue.datatype, vValue.dimensions) pacman[vName].setncatts(template[vName].__dict__) if vName in ["lat", "lon"]: pacman[vName][:] = template[vName][:] # Concatenating values for Canint, SWE, and time count = 0 for target in targets: pellet = Dataset(target, 'r', format="netCDF4") for vName in pellet.variables.keys(): if vName in ["Canint", "SWE"]: pacman[vName][count] = pellet[vName][:] if vName == "time": pelletTime = num2date(pellet[vName][:], pellet[vName].units, pellet[vName].calendar)[0] pacman[vName][count] = date2num(pelletTime, pacman[vName].units, pacman[vName].calendar) count += 1 pellet.close() # Done pacman.close()
def netcdf_writer(profile_data, output_dir): """ Create a NetCDF from a parsed BUFR csv file :param profile_data: :param output_dir: :return: """ template = ImosTemplate.from_json(NC_JSON_TEMPLATE_BUFR) template.global_attributes.update(profile_data['profile_metadata']) time_val_dateobj = date2num(profile_data['profile_geotime']['date_utc'], template.variables['TIME']['units'], template.variables['TIME']['calendar']) template.variables['TIME']['_data'] = time_val_dateobj template.variables['LATITUDE']['_data'] = profile_data['profile_geotime'][ 'latitude'] template.variables['LONGITUDE']['_data'] = profile_data['profile_geotime'][ 'longitude'] template.variables['TEMP']['_data'] = profile_data['profile_data'][ 'temp'].values template.variables['DEPTH']['_data'] = profile_data['profile_data'][ 'depth'].values template.variables['TEMP_quality_control']['_data'] = np.int8( profile_data['profile_data']['glob_gtspp_temp']) template.variables['DEPTH_quality_control']['_data'] = np.int8( profile_data['profile_data']['glob_gtspp_depth']) _, _, coef_a, coef_b = get_fallrate_eq_coef(profile_data) template.variables['DEPTH'].update( {'fallrate_equation_coefficient_a': coef_a}) template.variables['DEPTH'].update( {'fallrate_equation_coefficient_b': coef_b}) # issue with the python-aodntools NetCDF writer package not updating some attributes to the correct type. # The following does this manually att_to_convert = ["flag_values", "valid_min", "valid_max"] for att in att_to_convert: for var in ['TEMP_quality_control', 'DEPTH_quality_control']: template.variables[var][att] = np.int8( template.variables[var][att]) template.add_extent_attributes() template.add_date_created_attribute() if get_info_xbt_config( profile_data['profile_metadata']['XBT_line'].replace('-', '/')): info_xbt_config = get_info_xbt_config( profile_data['profile_metadata']['XBT_line'].replace('-', '/')) abstract = info_xbt_config.get('abstract', 'No abstract') title = info_xbt_config.get('title', 'No title') else: raise MissingConfigParameterError( 'No XBT line info found for {line} from {config}'.format( config=XBT_CONFIG, line=profile_data['profile_metadata']['XBT_line'])) template.global_attributes.update({ 'abstract': abstract, 'title': title, 'featureType': 'profile', 'history': "{date_created}: file created".format( date_created=datetime.datetime.utcnow().strftime( '%Y-%m-%dT%H:%M:%SZ')) }) # dealing with dirty files missing info if not 'imo_number' in profile_data['profile_metadata']: nc_filename = 'IMOS_SOOP-XBT_T_{date_start}_{xbt_line}_FV00.nc'.format( date_start=datetime.datetime.strftime( profile_data['profile_geotime']['date_utc'], '%Y%m%dT%H%M%SZ'), xbt_line=profile_data['profile_metadata']['XBT_line']) else: nc_filename = 'IMOS_SOOP-XBT_T_{date_start}_{xbt_line}_FV00_ID_{imo_number}.nc'.format( date_start=datetime.datetime.strftime( profile_data['profile_geotime']['date_utc'], '%Y%m%dT%H%M%SZ'), xbt_line=profile_data['profile_metadata']['XBT_line'], imo_number=profile_data['profile_metadata']['imo_number']) netcdf_path = os.path.join(output_dir, nc_filename) template.to_netcdf(netcdf_path) return netcdf_path
def runTest(self): """testing netcdftime""" # test mixed julian/gregorian calendar # check attributes. self.assertTrue(self.cdftime_mixed.units == 'hours') self.assertTrue( repr(self.cdftime_mixed.origin) == ' 1-01-01 00:00:00') self.assertTrue( self.cdftime_mixed.unit_string == 'hours since 0001-01-01 00:00:00') self.assertTrue(self.cdftime_mixed.calendar == 'standard') # check date2num method. (date before switch) d = datetime(1582, 10, 4, 23) t1 = self.cdftime_mixed.date2num(d) assert_almost_equal(t1, 13865687.0) # check num2date method. d2 = self.cdftime_mixed.num2date(t1) self.assertTrue(str(d) == str(d2)) # this is a non-existant date, should raise ValueError. d = datetime(1582, 10, 5, 0) self.assertRaises(ValueError, self.cdftime_mixed.date2num, d) # check date2num/num2date with date after switch. d = datetime(1582, 10, 15, 0) t2 = self.cdftime_mixed.date2num(d) assert_almost_equal(t2, 13865688.0) d2 = self.cdftime_mixed.num2date(t2) self.assertTrue(str(d) == str(d2)) # check day of year. ndayr = d.timetuple()[7] self.assertTrue(ndayr == 288) # test using numpy arrays. t = numpy.arange(t2, t2 + 240.0, 12.) t = numpy.reshape(t, (4, 5)) d = self.cdftime_mixed.num2date(t) self.assertTrue(d.shape == t.shape) d_check = "1582-10-15 00:00:001582-10-15 12:00:001582-10-16 00:00:001582-10-16 12:00:001582-10-17 00:00:001582-10-17 12:00:001582-10-18 00:00:001582-10-18 12:00:001582-10-19 00:00:001582-10-19 12:00:001582-10-20 00:00:001582-10-20 12:00:001582-10-21 00:00:001582-10-21 12:00:001582-10-22 00:00:001582-10-22 12:00:001582-10-23 00:00:001582-10-23 12:00:001582-10-24 00:00:001582-10-24 12:00:00" d2 = [str(dd) for dd in d.flat] self.assertTrue(d_check == ''.join(d2)) # test julian calendar with numpy arrays d = self.cdftime_julian.num2date(t) self.assertTrue(d.shape == t.shape) d_check = "1582-10-05 00:00:001582-10-05 12:00:001582-10-06 00:00:001582-10-06 12:00:001582-10-07 00:00:001582-10-07 12:00:001582-10-08 00:00:001582-10-08 12:00:001582-10-09 00:00:001582-10-09 12:00:001582-10-10 00:00:001582-10-10 12:00:001582-10-11 00:00:001582-10-11 12:00:001582-10-12 00:00:001582-10-12 12:00:001582-10-13 00:00:001582-10-13 12:00:001582-10-14 00:00:001582-10-14 12:00:00" d2 = [str(dd) for dd in d.flat] self.assertTrue(d_check == ''.join(d2)) # test proleptic gregorian calendar. self.assertTrue(self.cdftime_pg.units == 'seconds') self.assertTrue(repr(self.cdftime_pg.origin) == ' 1-01-01 00:00:00') self.assertTrue( self.cdftime_pg.unit_string == 'seconds since 0001-01-01 00:00:00') self.assertTrue(self.cdftime_pg.calendar == 'proleptic_gregorian') # check date2num method. d = datetime(1990, 5, 5, 2, 17) t1 = numpy.around(self.cdftime_pg.date2num(d)) self.assertTrue(t1 == 62777470620.0) # check num2date method. d2 = self.cdftime_pg.num2date(t1) self.assertTrue(str(d) == str(d2)) # check day of year. ndayr = d.timetuple()[7] self.assertTrue(ndayr == 125) # check noleap calendar. # this is a non-existant date, should raise ValueError. self.assertRaises( ValueError, utime, 'days since 1600-02-29 00:00:00', calendar='noleap') self.assertTrue(self.cdftime_noleap.units == 'days') self.assertTrue( repr(self.cdftime_noleap.origin) == '1600-02-28 00:00:00') self.assertTrue( self.cdftime_noleap.unit_string == 'days since 1600-02-28 00:00:00') self.assertTrue(self.cdftime_noleap.calendar == 'noleap') assert_almost_equal( self.cdftime_noleap.date2num(self.cdftime_noleap.origin), 0.0) # check date2num method. d1 = datetime(2000, 2, 28) d2 = datetime(1600, 2, 28) t1 = self.cdftime_noleap.date2num(d1) t2 = self.cdftime_noleap.date2num(d2) assert_almost_equal(t1, 400 * 365.) assert_almost_equal(t2, 0.) t12 = self.cdftime_noleap.date2num([d1, d2]) assert_almost_equal(t12, [400 * 365., 0]) # check num2date method. d2 = self.cdftime_noleap.num2date(t1) self.assertTrue(str(d1) == str(d2)) # check day of year. ndayr = d2.timetuple()[7] self.assertTrue(ndayr == 59) # non-existant date, should raise ValueError. date = datetime(2000, 2, 29) self.assertRaises(ValueError, self.cdftime_noleap.date2num, date) # check all_leap calendar. self.assertTrue(self.cdftime_leap.units == 'days') self.assertTrue( repr(self.cdftime_leap.origin) == '1600-02-29 00:00:00') self.assertTrue( self.cdftime_leap.unit_string == 'days since 1600-02-29 00:00:00') self.assertTrue(self.cdftime_leap.calendar == 'all_leap') assert_almost_equal( self.cdftime_leap.date2num(self.cdftime_leap.origin), 0.0) # check date2num method. d1 = datetime(2000, 2, 29) d2 = datetime(1600, 2, 29) t1 = self.cdftime_leap.date2num(d1) t2 = self.cdftime_leap.date2num(d2) assert_almost_equal(t1, 400 * 366.) assert_almost_equal(t2, 0.) # check num2date method. d2 = self.cdftime_leap.num2date(t1) self.assertTrue(str(d1) == str(d2)) # check day of year. ndayr = d2.timetuple()[7] self.assertTrue(ndayr == 60) # double check date2num,num2date methods. d = datetime(2000, 12, 31) t1 = self.cdftime_mixed.date2num(d) d2 = self.cdftime_mixed.num2date(t1) self.assertTrue(str(d) == str(d2)) ndayr = d2.timetuple()[7] self.assertTrue(ndayr == 366) # check 360_day calendar. self.assertTrue(self.cdftime_360day.units == 'days') self.assertTrue( repr(self.cdftime_360day.origin) == '1600-02-30 00:00:00') self.assertTrue( self.cdftime_360day.unit_string == 'days since 1600-02-30 00:00:00') self.assertTrue(self.cdftime_360day.calendar == '360_day') assert_almost_equal( self.cdftime_360day.date2num(self.cdftime_360day.origin), 0.0) # check date2num,num2date methods. # use datetime from netcdftime, since this date doesn't # exist in "normal" calendars. d = datetimex(2000, 2, 30) t1 = self.cdftime_360day.date2num(d) assert_almost_equal(t1, 360 * 400.) d2 = self.cdftime_360day.num2date(t1) assert_equal(str(d), str(d2)) # check day of year. d = datetime(2001, 12, 30) t = self.cdftime_360day.date2num(d) assert_almost_equal(t, 144660.0) date = self.cdftime_360day.num2date(t) self.assertTrue(str(d) == str(date)) ndayr = date.timetuple()[7] self.assertTrue(ndayr == 360) # Check fraction d = datetime(1969, 12, 30, 12) t = self.cdftime_360day.date2num(d) date = self.cdftime_360day.num2date(t) assert_equal(str(d), str(date)) # test proleptic julian calendar. d = datetime(1858, 11, 17, 12) t = self.cdftime_jul.date2num(d) assert_almost_equal(t, 7528932.0) d1 = datetime(1582, 10, 4, 23) d2 = datetime(1582, 10, 15, 0) assert_almost_equal( self.cdftime_jul.date2num(d1) + 241.0, self.cdftime_jul.date2num(d2)) date = self.cdftime_jul.num2date(t) self.assertTrue(str(d) == str(date)) # test julian day from date, date from julian day d = datetime(1858, 11, 17) mjd = JulianDayFromDate(d) assert_almost_equal(mjd, 2400000.5) date = DateFromJulianDay(mjd) self.assertTrue(str(date) == str(d)) # test iso 8601 units string d = datetime(1970, 1, 1, 1) t = self.cdftime_iso.date2num(d) assert_equal(numpy.around(t), 3600) # test fix for issue 75 (seconds hit 60 at end of month, # day goes out of range). t = 733499.0 d = num2date(t, units='days since 0001-01-01 00:00:00') dateformat = '%Y-%m-%d %H:%M:%S' assert_equal(d.strftime(dateformat), '2009-04-01 00:00:00') # test edge case of issue 75 for numerical problems for t in (733498.999, 733498.9999, 733498.99999, 733498.999999, 733498.9999999): d = num2date(t, units='days since 0001-01-01 00:00:00') t2 = date2num(d, units='days since 0001-01-01 00:00:00') assert(abs(t2 - t) < 1e-5) # values should be less than second # Check equality testing d1 = datetimex(1979, 6, 21, 9, 23, 12) d2 = datetime(1979, 6, 21, 9, 23, 12) assert(d1 == d2) # check timezone offset d = datetime(2012, 2, 29, 15) # mixed_tz is -6 hours from UTC, mixed is UTC so # difference in elapsed time is 6 hours. assert(self.cdftime_mixed_tz.date2num( d) - self.cdftime_mixed.date2num(d) == 6) # Check comparisons with Python datetime types d1 = num2date(0, 'days since 1000-01-01', 'standard') d2 = datetime(2000, 1, 1) d3 = num2date(0, 'days since 3000-01-01', 'standard') assert d1 < d2 assert d2 < d3 # check all comparisons assert d1 != d2 assert d1 <= d2 assert d2 > d1 assert d2 >= d1 # check datetime hash d1 = datetimex(1995, 1, 1) d2 = datetime(1995, 1, 1) d3 = datetimex(2001, 2, 30) assert hash(d1) == hash(d1) assert hash(d1) == hash(d2) assert hash(d1) != hash(d3) assert hash(d3) == hash(d3) # check datetime immutability with self.assertRaises(AttributeError): d1.year = 1999 with self.assertRaises(AttributeError): d1.month = 6 with self.assertRaises(AttributeError): d1.day = 5 with self.assertRaises(AttributeError): d1.hour = 10 with self.assertRaises(AttributeError): d1.minute = 33 with self.assertRaises(AttributeError): d1.second = 45 with self.assertRaises(AttributeError): d1.dayofwk = 1 with self.assertRaises(AttributeError): d1.dayofyr = 52 with self.assertRaises(AttributeError): d1.format = '%Y' # Check leading white space self.assertEqual( repr(self.cdftime_leading_space.origin), ' 850-01-01 00:00:00') #issue 330 units = "seconds since 1970-01-01T00:00:00Z" t = utime(units) for n in range(10): assert n == int(round(t.date2num(t.num2date(n)))) #issue 344 units = 'hours since 2013-12-12T12:00:00' assert(1.0 == date2num(num2date(1.0, units), units)) # test rountrip accuracy # also tests error found in issue #349 calendars=['standard', 'gregorian', 'proleptic_gregorian', 'noleap', 'julian',\ 'all_leap', '365_day', '366_day', '360_day'] dateformat = '%Y-%m-%d %H:%M:%S' dateref = datetime(2015,2,28,12) ntimes = 1001 for calendar in calendars: eps = 100. units = 'microseconds since 1800-01-30 01:01:01' microsecs1 = date2num(dateref,units,calendar=calendar) for n in range(ntimes): microsecs1 += 1. date1 = num2date(microsecs1, units, calendar=calendar) microsecs2 = date2num(date1, units, calendar=calendar) date2 = num2date(microsecs2, units, calendar=calendar) err = numpy.abs(microsecs1 - microsecs2) assert(err < eps) assert(date1.strftime(dateformat) == date2.strftime(dateformat)) units = 'milliseconds since 1800-01-30 01:01:01' eps = 0.1 millisecs1 = date2num(dateref,units,calendar=calendar) for n in range(ntimes): millisecs1 += 0.001 date1 = num2date(millisecs1, units, calendar=calendar) millisecs2 = date2num(date1, units, calendar=calendar) date2 = num2date(millisecs2, units, calendar=calendar) err = numpy.abs(millisecs1 - millisecs2) assert(err < eps) assert(date1.strftime(dateformat) == date2.strftime(dateformat)) eps = 1.e-4 units = 'seconds since 0001-01-30 01:01:01' secs1 = date2num(dateref,units,calendar=calendar) for n in range(ntimes): secs1 += 0.1 date1 = num2date(secs1, units, calendar=calendar) secs2 = date2num(date1, units, calendar=calendar) date2 = num2date(secs2, units, calendar=calendar) err = numpy.abs(secs1 - secs2) assert(err < eps) assert(date1.strftime(dateformat) == date2.strftime(dateformat)) eps = 1.e-5 units = 'minutes since 0001-01-30 01:01:01' mins1 = date2num(dateref,units,calendar=calendar) for n in range(ntimes): mins1 += 0.01 date1 = num2date(mins1, units, calendar=calendar) mins2 = date2num(date1, units, calendar=calendar) date2 = num2date(mins2, units, calendar=calendar) err = numpy.abs(mins1 - mins2) assert(err < eps) assert(date1.strftime(dateformat) == date2.strftime(dateformat)) eps = 1.e-5 units = 'hours since 0001-01-30 01:01:01' hrs1 = date2num(dateref,units,calendar=calendar) for n in range(ntimes): hrs1 += 0.001 date1 = num2date(hrs1, units, calendar=calendar) hrs2 = date2num(date1, units, calendar=calendar) date2 = num2date(hrs2, units, calendar=calendar) err = numpy.abs(hrs1 - hrs2) assert(err < eps) assert(date1.strftime(dateformat) == date2.strftime(dateformat)) eps = 1.e-5 units = 'days since 0001-01-30 01:01:01' days1 = date2num(dateref,units,calendar=calendar) for n in range(ntimes): days1 += 0.00001 date1 = num2date(days1, units, calendar=calendar) days2 = date2num(date1, units, calendar=calendar) date2 = num2date(days2, units, calendar=calendar) err = numpy.abs(days1 - days2) assert(err < eps) assert(date1.strftime(dateformat) == date2.strftime(dateformat)) # issue 353 assert (num2date(0, 'hours since 2000-01-01 0') == datetime(2000,1,1,0)) # issue 354 num1 = numpy.array([[0, 1], [2, 3]]) num2 = numpy.array([[0, 1], [2, 3]]) dates1 = num2date(num1, 'days since 0001-01-01') dates2 = num2date(num2, 'days since 2001-01-01') assert( dates1.shape == (2,2) ) assert( dates2.shape == (2,2) ) num1b = date2num(dates1, 'days since 0001-01-01') num2b = date2num(dates2, 'days since 2001-01-01') assert( num1b.shape == (2,2) ) assert( num2b.shape == (2,2) ) assert_almost_equal(num1,num1b) assert_almost_equal(num2,num2b) # issue 357 (make sure time zone offset in units done correctly) # Denver time, 7 hours behind UTC units = 'hours since 1682-10-15 -07:00 UTC' # date after gregorian switch, python datetime used date = datetime(1682,10,15) # assumed UTC num = date2num(date,units) # UTC is 7 hours ahead of units, so num should be 7 assert (num == 7) assert (num2date(num, units) == date) units = 'hours since 1482-10-15 -07:00 UTC' # date before gregorian switch, netcdftime datetime used date = datetime(1482,10,15) num = date2num(date,units) date2 = num2date(num, units) assert (num == 7) assert (date2.year == date.year) assert (date2.month == date.month) assert (date2.day == date.day) assert (date2.hour == date.hour) assert (date2.minute == date.minute) assert (date2.second == date.second) # issue 362: case insensitive calendars self.assertTrue(self.cdftime_mixed_capcal.calendar == 'standard') self.assertTrue(self.cdftime_noleap_capcal.calendar == 'noleap') d = datetime(2015, 3, 4, 12, 18, 30) units = 'days since 0001-01-01' for cap_cal, low_cal in (('STANDARD', 'standard'), ('NoLeap', 'noleap'), ('Gregorian', 'gregorian'), ('ALL_LEAP', 'all_leap')): d1 = date2num(d, units, cap_cal) d2 = date2num(d, units, low_cal) self.assertEqual(d1, d2) self.assertEqual(num2date(d1, units, cap_cal), num2date(d1, units, low_cal)) # issue 415 t = datetimex(2001, 12, 1, 2, 3, 4) self.assertEqual(t, copy.deepcopy(t)) # issue 442 units = "days since 0000-01-01 00:00:00" # this should fail (year zero not allowed with real-world calendars) try: date2num(datetime(1, 1, 1), units, calendar='standard') except ValueError: pass # this should not fail (year zero allowed in 'fake' calendars) t = date2num(datetime(1, 1, 1), units, calendar='360_day') self.assertEqual(t, 360) d = num2date(t, units, calendar='360_day') self.assertEqual(d, datetimex(1,1,1)) d = num2date(0, units, calendar='360_day') self.assertEqual(d, datetimex(0,1,1)) # list around missing dates in Gregorian calendar # scalar units = 'days since 0001-01-01 12:00:00' t1 = date2num(datetime(1582, 10, 4), units, calendar='gregorian') t2 = date2num(datetime(1582, 10, 15), units, calendar='gregorian') self.assertEqual(t1+1, t2) # list t1, t2 = date2num([datetime(1582, 10, 4), datetime(1582, 10, 15)], units, calendar='gregorian') self.assertEqual(t1+1, t2) t1, t2 = date2num([datetime(1582, 10, 4), datetime(1582, 10, 15)], units, calendar='standard') self.assertEqual(t1+1, t2) # this should fail: days missing in Gregorian calendar try: t1, t2, t3 = date2num([datetime(1582, 10, 4), datetime(1582, 10, 10), datetime(1582, 10, 15)], units, calendar='standard') except ValueError: pass
def calc_moy(ddirout,ncfile,fshape,deb,fin,pays,niveau,types,sat,prod,res_temp,res,varname,level): # traitement des dates datedeb = datetime.strptime(deb,"%Y-%m-%d") datefin = datetime.strptime(fin,"%Y-%m-%d") ddirin = os.path.join(ddirDB, types, sat, prod, res) os.chdir(ddirin) geodf = gpd.GeoDataFrame.from_file(fshape) nbdist = len(geodf[geodf.columns[1]]) # nombre de districts/aires listdist='//'.join(geodf[geodf.columns[1]].tolist())# liste des districts/aires de santé qui sera chargée dans le .nc comme attribut de la dimension index ############################ CREATION NETCDF ################################################################## ############################################################################################################### output = os.path.join(ddirout, varname + '_' + os.path.basename(ncfile)[:-5] + '_' + niveau + '_' + pays + '_' + deb.replace('-','') + fin.replace('-','') + '_' + res_temp + '.nc') ncnew = Dataset(output, 'w') # dimensions##### ncnew.createDimension('time', None) ncnew.createDimension('index_dist', nbdist) # variables##### tp = ncnew.createVariable('time','f8',('time',)) index = ncnew.createVariable('index_dist','f4',('index_dist',)) index[:] = range(nbdist) nbpx = ncnew.createVariable('count','f4',('time','index_dist')) vmin = ncnew.createVariable('min','f4',('time','index_dist')) vmax = ncnew.createVariable('max','f4',('time','index_dist')) vmean = ncnew.createVariable('mean','f4',('time','index_dist')) vstd = ncnew.createVariable('std','f4',('time','index_dist')) #vmed = ncnew.createVariable('median','f4',('time','index_dist')) # attributs##### ncnew.Convention ='CF-1.5' ncnew.description = 'moyenne districts pour la variable :',varname ncnew.history = 'Created ' + time.ctime(time.time()) ncnew.source = ' ' index.standard_name = listdist tp.standard_name = 'time' tp.calendar = 'gregorian' #fillvalue = np.nan ################################################################################################################ ################################################################################################################ # initialisation des listes pour les variables, augmentées après chaque tour de boucle nbpx_tmp = [] vmin_tmp = [] vmax_tmp = [] vmean_tmp = [] vstd_tmp = [] vmed_tmp = [] nc = Dataset(ncfile, 'r') var_in = nc.variables[varname] dates = nc.variables['time'] # definition des dates de début et fin en format numérique, à partir de l'unité de temps du .nc ndatedeb = date2num(datedeb,dates.units) ndatefin = date2num(datefin,dates.units) if datetime.strftime(num2date(dates[0],dates.units),"%H") != "0": # condition qui vérifie l'heure de la donnée(0h, 3h,6h,...) ndatedeb += 24-int(datetime.strftime(num2date(dates[0],dates.units),"%H")) ndatefin += 24-int(datetime.strftime(num2date(dates[0],dates.units),"%H")) # détermination des indices des dates debut et fin dans la matrice iddeb = np.abs(dates[:]-ndatedeb).argmin() idfin = np.abs(dates[:]-ndatefin).argmin()-1 # extraction du bloc de dates et ajout à la variable time(tp) du newnc serie_dates = dates[iddeb:idfin+1] if level == -1: var = np.array(var_in[iddeb:idfin+1,...]) else: var = np.array(var_in[iddeb:idfin+1,level,...]) # traitement de la matrice avec fillvalue, scalefactor et addoffset if sat == 'toms': var[var==var_in._FillValue]=-999 else: var[var==var_in._FillValue]=np.nan if "scale_factor" in var_in.ncattrs(): var = (var[:]-var_in.add_offset)*var_in.scale_factor # définition des caractéristiques géographiques transform,resolution spatiale, lat max et lon min lat = nc.variables['latitude'][:] lon = nc.variables['longitude'][:] xo = min(lon) yo = max(lat) resx = np.abs(np.mean(np.diff(lon))) resy = np.abs(np.mean(np.diff(lat))) transform = [xo, 0.01, 0.0, yo, 0.0, -0.01] ############################################################################################################# ############################################################################################################# idt = len(serie_dates)//8 if idt == 0: idt = 1 ndt = range(0,len(serie_dates),idt) nb_mat_in = [var[ix:ix+(idt),...] for ix in ndt]# decoupage de la matrice en blocs de 26 jours res = Parallel(n_jobs=-1)(delayed(calc_stats)(resx,resy,geodf,nbdist,transform,temps_x) for temps_x in nb_mat_in)# appel de la fonction calc_stats avec parallélisation # chargement des calculs dans les variables temporaires nbpx_tmp.append(np.concatenate([res[n][0] for n in range(0,len(ndt))], axis=0)) vmax_tmp.append(np.concatenate([res[n][1] for n in range(0,len(ndt))], axis=0)) vmean_tmp.append(np.concatenate([res[n][2] for n in range(0,len(ndt))], axis=0)) vmed_tmp.append(np.concatenate([res[n][3] for n in range(0,len(ndt))], axis=0)) vmin_tmp.append(np.concatenate([res[n][4] for n in range(0,len(ndt))], axis=0)) vstd_tmp.append(np.concatenate([res[n][5] for n in range(0,len(ndt))], axis=0)) # chargement des variables dans le .nc tp[:] = np.append(tp[:],serie_dates) tp.units = dates.units nbpx[:] = np.concatenate([nbpx_tmp[d_t] for d_t in range(0,len(nbpx_tmp))], axis=0) vmax[:] = np.concatenate([vmax_tmp[d_t] for d_t in range(0,len(vmax_tmp))], axis=0) vmean[:] = np.concatenate([vmean_tmp[d_t] for d_t in range(0,len(vmean_tmp))], axis=0) vmin[:] = np.concatenate([vmin_tmp[d_t] for d_t in range(0,len(vmin_tmp))], axis=0) vstd[:] = np.concatenate([vstd_tmp[d_t] for d_t in range(0,len(vstd_tmp))], axis=0) index = [num2date(d,dates.units).date() for d in serie_dates] columns_name = geodf.name.values.tolist() tmpvar_dict = {"nbpx":nbpx_tmp,"vmax":vmax_tmp,"vmean":vmean_tmp,"vmin":vmin_tmp,"vstd":vstd_tmp} list_df = {} for n in tmpvar_dict: list_df[n] = pd.DataFrame (np.concatenate([tmpvar_dict[n][d_t] for d_t in range(0,len(tmpvar_dict[n]))], axis=0), index=index, columns=columns_name).round(4) #df.to_csv(ddirout+'/'+output[:-3]+'_'+n+'.csv', header=True) nc.close() ncnew.close() return list_df, output
def ToPyartRadar(self): dts = self.get_scan_time() units = make_time_unit_str(min(dts)) time = get_metadata('time') time['units'] = units time['data'] = date2num(dts, units).astype('float32') # range _range = get_metadata('range') # assume that the number of gates and spacing from the first ray is # representative of the entire volume _range['data'] = self.range _range['meters_to_center_of_first_gate'] = self.CC.header['CutConfig'][ 'usBindWidth'][0] * 2 _range['meters_between_gates'] = self.CC.header['CutConfig'][ 'usBindWidth'][0] * 2 latitude = get_metadata('latitude') longitude = get_metadata('longitude') altitude = get_metadata('altitude') latitude['data'] = np.array([self.latitude], dtype='float64') longitude['data'] = np.array([self.longitude], dtype='float64') altitude['data'] = np.array([self.altitude], dtype='float64') metadata = get_metadata('metadata') metadata['original_container'] = 'CINRAD/CC' metadata['site_name'] = self.sitename metadata['radar_name'] = "CINRAD/CC" sweep_start_ray_index = get_metadata('sweep_start_ray_index') sweep_end_ray_index = get_metadata('sweep_end_ray_index') sweep_start_ray_index['data'] = self.sweep_start_ray_index sweep_end_ray_index['data'] = self.sweep_end_ray_index sweep_number = get_metadata('sweep_number') sweep_number['data'] = np.arange(self.nsweeps, dtype='int32') scan_type = self.scan_type sweep_mode = get_metadata('sweep_mode') if self.scan_type == "ppi": sweep_mode['data'] = np.array(self.nsweeps * ['azimuth_surveillance'], dtype='S') elif self.scan_type == "rhi": sweep_mode['data'] = np.array(self.nsweeps * ['rhi'], dtype='S') else: sweep_mode['data'] = np.array(self.nsweeps * ['sector'], dtype='S') # elevation elevation = get_metadata('elevation') elevation['data'] = self.elevation # azimuth azimuth = get_metadata('azimuth') azimuth['data'] = self.azimuth # fixed_angle fixed_angle = get_metadata('fixed_angle') fixed_angle['data'] = self.get_fixed_angle() # instrument_parameters instrument_parameters = self._get_instrument_parameters() # fields fields = {} for field_name_abbr in self.fields.keys(): field_name = CINRAD_field_mapping[field_name_abbr] if field_name is None: continue field_dic = get_metadata(field_name) field_dic['data'] = np.ma.masked_array(self.fields[field_name_abbr],\ mask=np.isnan(self.fields[field_name_abbr]), fill_value=get_fillvalue()) field_dic['_FillValue'] = get_fillvalue() fields[field_name] = field_dic return Radar(time, _range, fields, metadata, scan_type, latitude, longitude, altitude, sweep_number, sweep_mode, fixed_angle, sweep_start_ray_index, sweep_end_ray_index, azimuth, elevation, instrument_parameters=instrument_parameters)
def smooth(files): output_names = [] now = datetime.utcnow() for filepath in files: fn_new = filepath dirname = os.path.dirname(fn_new) basename = os.path.basename(fn_new) if basename.startswith("IMOS"): fn_split = basename.split('_') # IMOS_ABOS-SOTS_CPT_20090922_SOFS_FV01_Pulse-6-2009-SBE37SM-RS232-6962-100m_END-20100323_C-20200227.nc # 0 1 2 3 4 5 6 7 8 # rename the file FV00 to FV01 fn_split[6] = fn_split[6] + "-Smooth" # Change the creation date in the filename to today fn_split[8] = now.strftime("C-%Y%m%d.nc") fn_new = os.path.join(dirname, "_".join(fn_split)) # Add the new file name to the list of new file names output_names.append(fn_new) print() print("output", fn_new) ds = Dataset(filepath, 'r') # deal with TIME var_time = ds.variables["TIME"] date_time_start = datetime.strptime( ds.getncattr('time_deployment_start'), '%Y-%m-%dT%H:%M:%SZ') date_time_end = datetime.strptime(ds.getncattr('time_deployment_end'), '%Y-%m-%dT%H:%M:%SZ') time = var_time[:] # create mask for deployment time msk = (time > date2num(date_time_start, units=var_time.units)) & ( time < date2num(date_time_end, units=var_time.units)) t_dt = num2date(time, units=var_time.units) time_masked = var_time[msk] t0 = time_masked[0] t1 = time_masked[1] tend = time_masked[-1] sample_rate = np.round(24 * 3600 * (tend - t0) / len(time_masked)) print('dt ', (t1 - t0) * 24 * 3600, 'sec, sample_rate', sample_rate) # fine number of samples to make 3 hrs of data i = 0 while time_masked[i] < (t0 + 3 / 24): i = i + 1 window = np.max([i, 3]) print('window (points)', window) # create a new time array to sample to d0 = np.ceil(t0 * 24) / 24 dend = np.floor(tend * 24) / 24 d = np.arange(d0, dend, 1 / 24) d_dt = num2date(d, units=var_time.units) # output data to new file ds_new = Dataset(fn_new, 'w') # copy global vars attr_dict = {} for a in ds.ncattrs(): attr_dict[a] = ds.getncattr(a) ds_new.setncatts(attr_dict) # copy dimension ds_new.createDimension(ds.dimensions['TIME'].name, len(d_dt)) # create new time time_var = ds_new.createVariable('TIME', 'f8', 'TIME', fill_value=np.NaN, zlib=True) # variable to smooth degree = 3 in_vars = set([x for x in ds.variables]) # print('input file vars', in_vars) z = in_vars.intersection(['TEMP', 'PSAL', 'DENSITY', 'DOX2']) print('vars to smooth', z) for smooth_var in z: var_to_smooth_in = ds.variables[smooth_var] smooth_in = var_to_smooth_in[msk] print(var_to_smooth_in[msk]) # do the smoothing loess = Loess.Loess(np.array(time_masked), np.array(smooth_in)) y = [ loess.estimate(x, window=int(window), use_matrix=False, degree=degree) for x in d ] print(t0, t1) #y = loess.estimate(t_msk, window=7, use_matrix=False, degree=1) #print(y) # copy times attributes attr_dict = {} for a in var_time.ncattrs(): attr_dict[a] = var_time.getncattr(a) time_var.setncatts(attr_dict) time_var[:] = d # create output variables var_smooth_out = ds_new.createVariable(smooth_var, 'f4', 'TIME', fill_value=np.NaN, zlib=True) attr_dict = {} for a in var_to_smooth_in.ncattrs(): attr_dict[a] = var_to_smooth_in.getncattr(a) var_smooth_out.setncatts(attr_dict) var_smooth_out[:] = y # create history ds_new.history += '\n' + now.strftime( "%Y-%m-%d : ") + 'resampled data created from ' + os.path.basename( filepath) + ' window=' + str(window) + ' degree=' + str(degree) ds_new.close() ds.close() return output_names
for i in range(len(outvar)): vkey = outvar[i] var = outnc.createVariable(vkey,np.float32,("time","level","lat","lon")) var.standard_name = stname[i] var.units = unit[i] outvar_dict[vkey] = var print(outvar_dict) in_scl = netCDF4.Dataset(datadir/nc,'r') print(in_scl.variables["time"].units) outvar_dict["level"][:] = in_scl.variables["level"][:] outvar_dict["lat"][:] = in_scl.variables["latitude"][:] outvar_dict["lon"][:] = in_scl.variables["longitude"][:] for t in range(len(in_scl.variables["time"][:])): date = netCDF4.num2date(in_scl.variables["time"][t],in_scl.variables["time"].units) t0 = netCDF4.date2num(date,outvar_dict["time"].units) print(date,t0) outvar_dict["time"][t] = t0 for name in in_scl.variables.keys(): if(name == "time" or name == "latitude" \ or name == "longitude" or name == "level"): continue if(name in outvar_dict): print(name) outvar_dict[name][t,:] = in_scl.variables[name][t,:] print(outvar_dict[name][:].shape) else: print(int(name[-5:-2])) lev=0 if(int(name[-5:-2]) < 850): lev+=1
def get_ctd_profile(): """ Load the ASCII CTD Data into an 'ambient.Profile' object. This function performs the steps in ./profile_from_ctd.py to read in the CTD data and create a Profile object. This is the data set that will be used to demonstrate how to append data to a Profiile object. """ # Get the path to the input file __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__), '../../tamoc/data')) dat_file = os.path.join(__location__, 'ctd_BM54.cnv') # Load in the data using numpy.loadtxt raw = np.loadtxt(dat_file, comments='#', skiprows=175, usecols=(0, 1, 3, 8, 9, 10, 12)) # Describe the organization of the data in raw. var_names = [ 'temperature', 'pressure', 'wetlab_fluorescence', 'z', 'salinity', 'density', 'oxygen' ] var_units = ['deg C', 'db', 'mg/m^3', 'm', 'psu', 'kg/m^3', 'mg/l'] z_col = 3 # Clean the profile to remove reversals in the depth coordinate data = ambient.extract_profile(raw, z_col, 50.0) # Convert the profile data to standard units in TAMOC profile, units = ambient.convert_units(data, var_units) # Create an empty netCDF4-classic dataset to store this CTD data __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__), '../../test/output')) nc_file = os.path.join(__location__, 'BM54.nc') summary = 'Dataset created by profile_from_ctd in the ./bin directory' \ + ' of TAMOC' source = 'R/V Brooks McCall, station BM54' sea_name = 'Gulf of Mexico' p_lat = 28.0 + 43.945 / 60.0 p_lon = 360 - (88.0 + 22.607 / 60.0) p_time = date2num(datetime(2010, 5, 30, 18, 22, 12), units='seconds since 1970-01-01 00:00:00 0:00', calendar='julian') nc = ambient.create_nc_db(nc_file, summary, source, sea_name, p_lat, p_lon, p_time) # Insert the CTD data into the netCDF dataset comments = ['measured'] * len(var_names) nc = ambient.fill_nc_db(nc, profile, var_names, units, comments, z_col) # Create an ambient.Profile object for this dataset bm54 = ambient.Profile(nc, chem_names=['oxygen']) # Return the Profile object return bm54
def pack_fmc(hdf_file, date, mean_arr, std_arr, q_mask, dest): with netCDF4.Dataset(dest, 'w', format='NETCDF4_CLASSIC') as ds: with open('nc_metadata.json') as data_file: attrs = json.load(data_file) for key in attrs: setattr(ds, key, attrs[key]) setattr(ds, "date_created", datetime.now().strftime("%Y%m%dT%H%M%S")) rast = gdal.Open( 'HDF4_EOS:EOS_GRID:"{}":MOD_Grid_BRDF:Nadir_Reflectance_Band1'. format(hdf_file)) proj_wkt = rast.GetProjection() geot = rast.GetGeoTransform() t_dim = ds.createDimension("time", 1) x_dim = ds.createDimension("x", rast.RasterXSize) y_dim = ds.createDimension("y", rast.RasterYSize) var = ds.createVariable("time", "f8", ("time", )) var.units = "seconds since 1970-01-01 00:00:00.0" var.calendar = "standard" var.long_name = "Time, unix time-stamp" var.standard_name = "time" var[:] = netCDF4.date2num([date], units="seconds since 1970-01-01 00:00:00.0", calendar="standard") var = ds.createVariable("x", "f8", ("x", )) var.units = "m" var.long_name = "x coordinate of projection" var.standard_name = "projection_x_coordinate" var[:] = np.linspace(geot[0], geot[0] + (geot[1] * rast.RasterXSize), rast.RasterXSize) var = ds.createVariable("y", "f8", ("y", )) var.units = "m" var.long_name = "y coordinate of projection" var.standard_name = "projection_y_coordinate" var[:] = np.linspace(geot[3], geot[3] + (geot[5] * rast.RasterYSize), rast.RasterYSize) var = ds.createVariable("lfmc_mean", 'f4', ("time", "y", "x"), fill_value=-9999.9) var.long_name = "LFMC Arithmetic Mean" var.units = '%' var.grid_mapping = "sinusoidal" var[:] = mean_arr[None, ...] var = ds.createVariable("lfmc_stdv", 'f4', ("time", "y", "x"), fill_value=-9999.9) var.long_name = "LFMC Standard Deviation" var.units = '%' var.grid_mapping = "sinusoidal" var[:] = std_arr[None, ...] var = ds.createVariable("quality_mask", 'i1', ("time", "y", "x"), fill_value=0) var.long_name = "Combined Bands Quality Mask" var.units = 'Cat' var.grid_mapping = "sinusoidal" var[:] = q_mask.astype(np.int8)[None, ...] var = ds.createVariable("sinusoidal", 'S1', ()) var.grid_mapping_name = "sinusoidal" var.false_easting = 0.0 var.false_northing = 0.0 var.longitude_of_central_meridian = 0.0 var.longitude_of_prime_meridian = 0.0 var.semi_major_axis = 6371007.181 var.inverse_flattening = 0.0 var.spatial_ref = proj_wkt var.GeoTransform = "{} {} {} {} {} {} ".format( *[geot[i] for i in range(6)])
def pack_flammability_mosaic(date, flam, anom, q_mask, dest): lat0 = -10. lat1 = -44. lon0 = 113. lon1 = 154. res = 0.005 x_size = int((lon1 - lon0) / res) y_size = int((lat1 - lat0) / (-1 * res)) geot = [lon0, res, 0., lat0, 0., -1 * res] with netCDF4.Dataset(dest, 'w', format='NETCDF4_CLASSIC') as ds: with open('nc_metadata.json') as data_file: attrs = json.load(data_file) for key in attrs: setattr(ds, key, attrs[key]) setattr(ds, "date_created", datetime.now().strftime("%Y%m%dT%H%M%S")) t_dim = ds.createDimension("time", 1) x_dim = ds.createDimension("longitude", flam.shape[1]) y_dim = ds.createDimension("latitude", flam.shape[0]) var = ds.createVariable("time", "f8", ("time", )) var.units = "seconds since 1970-01-01 00:00:00.0" var.calendar = "standard" var.long_name = "Time, unix time-stamp" var.standard_name = "time" var[:] = netCDF4.date2num([date], units="seconds since 1970-01-01 00:00:00.0", calendar="standard") var = ds.createVariable("longitude", "f8", ("longitude", )) var.units = "degrees" var.long_name = "longitude" var.standard_name = "longitude" var[:] = np.linspace(lon0, lon1 - res, num=x_size) var = ds.createVariable("latitude", "f8", ("latitude", )) var.units = "degrees" var.long_name = "latitude" var.standard_name = "latitude" var[:] = np.linspace(lat0, lat1 + res, num=y_size) var = ds.createVariable("flammability", 'f4', ("time", "latitude", "longitude"), fill_value=-9999.9) var.long_name = "Flammability Index" var.units = '%' var[:] = flam[None, ...] var = ds.createVariable("anomaly", 'f4', ("time", "latitude", "longitude"), fill_value=-9999.9) var.long_name = "FMC Anomaly" var.units = '%' var[:] = anom[None, ...] var = ds.createVariable("quality_mask", 'i1', ("time", "latitude", "longitude"), fill_value=0) var.long_name = "Quality Mask" var.units = 'Cat' var[:] = q_mask[None, ...]
def aggregate(files, varNames): # split this into createCatalog - copy needed information into structure # createTimeArray (1D, OBS) - from list of structures # createNewFile # copyAttributes # updateAttributes # copyData # # createCatalog - copy needed information into structure # # look over all files, create a time array from all files # TODO: maybe delete files here without variables we're not interested in # TODO: Create set of variables in all files files_to_process = [] for path_file in files: nc = Dataset(path_file, mode="r") if varNames[0] in nc.variables: files_to_process.append(path_file) nc.close() filen = 0 for path_file in files_to_process: print("input file %s" % path_file) nc = Dataset(path_file, mode="r") ncTime = nc.get_variables_by_attributes(standard_name='time') time_deployment_start = nc.time_deployment_start time_deployment_end = nc.time_deployment_end tStart = parse(time_deployment_start) tEnd = parse(time_deployment_end) tStartnum = date2num(tStart.replace(tzinfo=None), units=ncTime[0].units) tEndnum = date2num(tEnd.replace(tzinfo=None), units=ncTime[0].units) maTime = ma.array(ncTime[0][:]) #msk = (maTime < tStartnum) | (maTime > tEndnum) #maTime.mask = msk maTime.mask = 0 timeLen = 1 if len(ncTime[0].shape) > 0: timeLen = ncTime[0].shape[0] if filen == 0: maTimeAll = maTime instrumentIndex = ma.ones(timeLen) * filen else: maTimeAll = ma.append(maTimeAll, maTime) instrumentIndex = ma.append(instrumentIndex, ma.ones(timeLen) * filen) nc.close() filen += 1 instrumentIndex.mask = maTimeAll.mask # same mask for instrument index idx = maTimeAll.argsort(0) # sort by time dimension # # createTimeArray (1D, OBS) - from list of structures # dsTime = Dataset(files_to_process[0], mode="r") ncTime = dsTime.get_variables_by_attributes(standard_name='time') dates = num2date(maTimeAll[idx].compressed(), units=ncTime[0].units, calendar=ncTime[0].calendar) # # createNewFile # # create a new filename # IMOS_<Facility-Code>_<Data-Code>_<Start-date>_<Platform-Code>_FV<File-Version>_ <Product-Type>_END-<End-date>_C-<Creation_date>_<PARTX>.nc # TODO: what to do with <Data-Code> with a reduced number of variables ncTimeFormat = "%Y-%m-%dT%H:%M:%SZ" file_name = os.path.basename(files_to_process[0]) if file_name.startswith('IMOS'): splitParts = file_name.split("_") # get the last path item (the file nanme), split by _ tStartMaksed = num2date(maTimeAll[idx].compressed()[0], units=ncTime[0].units, calendar=ncTime[0].calendar) tEndMaksed = num2date(maTimeAll[idx].compressed()[-1], units=ncTime[0].units, calendar=ncTime[0].calendar) fileProductTypeSplit = splitParts[6].split("-") fileProductType = fileProductTypeSplit[0] # could use the global attribute site_code for the product type fileTimeFormat = "%Y%m%d" outputName = splitParts[0] + "_" + splitParts[1] + "_" + splitParts[2] \ + "_" + tStartMaksed.strftime(fileTimeFormat) \ + "_" + splitParts[4] \ + "_" + "FV02" \ + "_" + fileProductType + "-Aggregate-" + varNames[0] \ + "_END-" + tEndMaksed.strftime(fileTimeFormat) \ + "_C-" + datetime.utcnow().strftime(fileTimeFormat) \ + ".nc" else: outputName = '_'.join(varNames) + '-Aggregate.nc' print("output file : %s" % outputName) ncOut = Dataset(outputName, 'w', format='NETCDF4') # # create additional dimensions needed # # for d in nc.dimensions: # print("Dimension %s " % d) # ncOut.createDimension(nc.dimensions[d].name, size=nc.dimensions[d].size) # tDim = ncOut.createDimension("OBS", len(maTimeAll.compressed())) iDim = ncOut.createDimension("instrument", len(files_to_process)) strDim = ncOut.createDimension("strlen", 256) # netcdf4 allow variable length strings, should we use them, probably not # # copyAttributes # # some of these need re-creating from the combined source data globalAttributeBlackList = ['time_coverage_end', 'time_coverage_start', 'time_deployment_end', 'time_deployment_start', 'compliance_checks_passed', 'compliance_checker_version', 'compliance_checker_imos_version', 'date_created', 'deployment_code', 'geospatial_lat_max', 'geospatial_lat_min', 'geospatial_lon_max', 'geospatial_lon_min', 'geospatial_vertical_max', 'geospatial_vertical_min', 'instrument', 'instrument_nominal_depth', 'instrument_sample_interval', 'instrument_serial_number', 'quality_control_log', 'history', 'netcdf_version'] # global attributes # TODO: get list of variables, global attributes and dimensions from first pass above dsIn = Dataset(files_to_process[0], mode='r') for a in dsIn.ncattrs(): if not (a in globalAttributeBlackList): #print("Attribute %s value %s" % (a, dsIn.getncattr(a))) ncOut.setncattr(a, dsIn.getncattr(a)) for d in dsIn.dimensions: if not(d in 'TIME'): ncOut.createDimension(d, dsIn.dimensions[d].size) varList = dsIn.variables # add the ancillary variables for the ones requested for v in dsIn.variables: if v in varNames: if hasattr(dsIn.variables[v], 'ancillary_variables'): varNames.extend(dsIn.variables[v].ancillary_variables.split(" ")) ncOut.setncattr("data_mode", "A") # something to indicate its an aggregate # TIME variable # TODO: get TIME attributes from first pass above ncTimesOut = ncOut.createVariable("TIME", ncTime[0].dtype, ("OBS",)) # copy TIME variable attributes for a in ncTime[0].ncattrs(): if a not in ('comment',): print("TIME Attribute %s value %s" % (a, ncTime[0].getncattr(a))) ncTimesOut.setncattr(a, ncTime[0].getncattr(a)) ncTimesOut[:] = maTimeAll[idx].compressed() ncOut.setncattr("time_coverage_start", dates[0].strftime(ncTimeFormat)) ncOut.setncattr("time_coverage_end", dates[-1].strftime(ncTimeFormat)) ncOut.setncattr("date_created", datetime.utcnow().strftime(ncTimeFormat)) ncOut.setncattr("history", datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC : Create Aggregate")) # instrument index indexVarType = "i1" if len(files_to_process) > 128: indexVarType = "i2" if len(files_to_process) > 32767: # your really keen then indexVarType = "i4" # # create new variables needed # ncInstrumentIndexVar = ncOut.createVariable("instrument_index", indexVarType, ("OBS",)) ncInstrumentIndexVar.setncattr("long_name", "which instrument this obs is for") ncInstrumentIndexVar.setncattr("instance_dimension", "instrument") ncInstrumentIndexVar[:] = instrumentIndex[idx].compressed() # create a variable with the source file name ncFileNameVar = ncOut.createVariable("source_file", "S1", ("instrument", "strlen")) ncFileNameVar.setncattr("long_name", "source file for this instrument") ncInstrumentTypeVar = ncOut.createVariable("instrument_id", "S1", ("instrument", "strlen")) ncInstrumentTypeVar.setncattr("long_name", "source instrument make, model, serial_number") filen = 0 data = numpy.empty(len(files_to_process), dtype="S256") instrument = numpy.empty(len(files_to_process), dtype="S256") for path_file in files_to_process: data[filen] = path_file ncType = Dataset(path_file, mode='r') instrument[filen] = ncType.deployment_code + " ; " + ncType.instrument + ' ; ' + ncType.instrument_serial_number filen += 1 ncFileNameVar[:] = stringtochar(data) ncInstrumentTypeVar[:] = stringtochar(instrument) # # create a list of variables needed # filen = 0 # variables we want regardless varNames += ['LATITUDE', 'LONGITUDE', 'NOMINAL_DEPTH'] # remove any duplicates varNamesOut = set(varNames) # # copyData # # copy variable data from all files_to_process into output file # should we add uncertainty to variables here if they don't have one from a default set for v in varNamesOut: varOrder = -1 filen = 0 if (v != 'TIME') & (v in varList): # TODO: need to deal with files_to_process that don't have v variable in it for path_file in files_to_process: print("%d : %s file %s" % (filen, v, path_file)) nc1 = Dataset(path_file, mode="r") maVariable = nc1.variables[v][:] varDims = varList[v].dimensions varOrder = len(varDims) if len(varDims) > 0: # need to replace the TIME dimension with the now extended OBS dimension # should we extend this to the CTD case where the variables have a DEPTH dimension and no TIME if varList[v].dimensions[0] == 'TIME': if filen == 0: maVariableAll = maVariable dim = ('OBS',) + varDims[1:len(varDims)] ncVariableOut = ncOut.createVariable(v, varList[v].dtype, dim) else: maVariableAll = ma.append(maVariableAll, maVariable, axis=0) # add new data to end along OBS axis else: if filen == 0: maVariableAll = maVariable maVariableAll.shape = (1,) + maVariable.shape dim = ('instrument',) + varDims[0:len(varDims)] varOrder += 1 ncVariableOut = ncOut.createVariable(v, varList[v].dtype, dim) else: vdata = maVariable vdata.shape = (1,) + maVariable.shape maVariableAll = ma.append(maVariableAll, vdata, axis=0) else: if filen == 0: maVariableAll = maVariable dim = ('instrument',) + varDims[0:len(varDims)] ncVariableOut = ncOut.createVariable(v, varList[v].dtype, dim) else: maVariableAll = ma.append(maVariableAll, maVariable) # copy the variable attributes # this is ends up as the super set of all files_to_process for a in varList[v].ncattrs(): if a not in ('comment', '_FillValue') and not re.match(r"calibration.*", a): #print("%s Attribute %s value %s" % (v, a, varList[v].getncattr(a))) ncVariableOut.setncattr(a, varList[v].getncattr(a)) nc1.close() filen += 1 # write the aggregated data to the output file if varOrder == 2: maVariableAll.mask = maTimeAll.mask # apply the time mask ncVariableOut[:] = maVariableAll[idx][:].compressed() elif varOrder == 1: maVariableAll.mask = maTimeAll.mask # apply the time mask ncVariableOut[:] = maVariableAll[idx].compressed() elif varOrder == 0: ncVariableOut[:] = maVariableAll # create the output global attributes if hasattr(ncVariableOut, 'standard_name'): if ncVariableOut.standard_name == 'latitude': laMax = maVariableAll.max(0) laMin = maVariableAll.max(0) ncOut.setncattr("geospatial_lat_max", laMax) ncOut.setncattr("geospatial_lat_min", laMin) if ncVariableOut.standard_name == 'longitude': loMax = maVariableAll.max(0) loMin = maVariableAll.max(0) ncOut.setncattr("geospatial_lon_max", loMax) ncOut.setncattr("geospatial_lon_min", loMin) if ncVariableOut.standard_name == 'depth': dMax = maVariableAll.max(0) dMin = maVariableAll.max(0) ncOut.setncattr("geospatial_vertical_max", dMax) ncOut.setncattr("geospatial_vertical_min", dMin) dsIn.close() # we're done with the varList now ncOut.close() return outputName
def eco_parse(files, model='FLNTUS', serial='1215'): time = [] value = [] if not model: model = 'FLNTUS' if not serial: serial = '1215' filepath = files[0] number_samples = 0 with open(filepath, 'r', errors='ignore') as fp: hdr_line = fp.readline().replace("\n", "") hdr_split = hdr_line.split("\t") hdr = [re.sub("\(.*\)\n?", "", x) for x in hdr_split[3:]] print (hdr) print(hdr_split) line = fp.readline() while line: line_split = line.split("\t") print(line_split) t = line_split[1][0:4] + " 20" + line_split[1][4:] + " " + line_split[2] #print(t) try: ts = datetime.strptime(t, "%m%d %Y %H%M%S") #print(ts) v = [float(x) for x in line_split[3:]] if len(v) == len(hdr): time.append(ts) value.append(v) number_samples += 1 except ValueError: pass line = fp.readline() print("nSamples %d " % number_samples) # # build the netCDF file # ncTimeFormat = "%Y-%m-%dT%H:%M:%SZ" outputName = filepath + ".nc" print("output file : %s" % outputName) ncOut = Dataset(outputName, 'w', format='NETCDF4_CLASSIC') ncOut.instrument = "WetLABs ; " + model ncOut.instrument_model = model ncOut.instrument_serial_number = serial # TIME:axis = "T"; # TIME:calendar = "gregorian"; # TIME:long_name = "time"; # TIME:units = "days since 1950-01-01 00:00:00 UTC"; tDim = ncOut.createDimension("TIME", number_samples) ncTimesOut = ncOut.createVariable("TIME", "d", ("TIME",), zlib=True) ncTimesOut.long_name = "time" ncTimesOut.units = "days since 1950-01-01 00:00:00 UTC" ncTimesOut.calendar = "gregorian" ncTimesOut.axis = "T" ncTimesOut[:] = date2num(time, calendar=ncTimesOut.calendar, units=ncTimesOut.units) for i in range(0, len(hdr)): print(i, hdr[i], len(value[-1])) ncVarOut = ncOut.createVariable(hdr[i], "f4", ("TIME",), zlib=True) ncVarOut[:] = [v[i] for v in value] # add timespan attributes ncOut.setncattr("time_coverage_start", num2date(ncTimesOut[0], units=ncTimesOut.units, calendar=ncTimesOut.calendar).strftime(ncTimeFormat)) ncOut.setncattr("time_coverage_end", num2date(ncTimesOut[-1], units=ncTimesOut.units, calendar=ncTimesOut.calendar).strftime(ncTimeFormat)) # add creating and history entry ncOut.setncattr("date_created", datetime.utcnow().strftime(ncTimeFormat)) ncOut.setncattr("history", datetime.utcnow().strftime("%Y-%m-%d") + " created from file " + filepath) ncOut.close() return outputName
def _create_tst_data_set(self, tmp_handle, tmp_name, key): ds = Dataset(tmp_name, 'w') time = ds.createDimension('time', None) lat = ds.createDimension('lat', 80) lon_len = 60 if key is "DS_DIM_SIZE_CHANGED": lon_len = 70 time_len = 10 if key is "DS_ADDITIONAL_TIMES": time_len = 12 lon = ds.createDimension('lon', lon_len) ds.creator = "ocean observing initiative" if key is "DS_GLOBAL_ATT_CHANGED": ds.history = "first history entry; second history entry" else: ds.history = "first history entry" if key is "DS_TIME_DIM_VAR_DIFFER": time = ds.createVariable('record', 'i8', ('time', ), fill_value=-1) else: time = ds.createVariable('time', 'i8', ('time', ), fill_value=-1) time.standard_name = "time" time.long_name = "time" time.units = "seconds since 1970-01-01 00:00:00" time.calendar = "gregorian" time.comment = "estimated time of observation" latitudes = ds.createVariable('latitude', 'f4', ('lat', )) latitudes.standard_name = "latitude" if key is "DS_VAR_ATT_CHANGED": latitudes.long_name = "latitude has changed" else: latitudes.long_name = "latitude" latitudes.units = "degrees_north" latitudes[:] = numpy.arange(10, 50, 0.5) longitudes = ds.createVariable('longitude', 'f4', ('lon', )) longitudes.standard_name = "longitude" longitudes.long_name = "longitude" longitudes.units = "degrees_east" if key is "DS_DIM_SIZE_CHANGED": longitudes[:] = numpy.arange(-90, -55, 0.5) else: longitudes[:] = numpy.arange(-90, -60, 0.5) byte_data = ds.createVariable('bytedata', 'i1', ( 'time', 'lat', 'lon', )) byte_data.standard_name = "bytes" byte_data.long_name = "byte_data" byte_data.units = "byte" array_data = numpy.array( numpy.arange( 0, time_len * len(ds.dimensions["lat"]) * len(ds.dimensions["lon"]))) byte_data[0:time_len, :, :] = numpy.reshape( array_data, (time_len, len(ds.dimensions["lat"]), len(ds.dimensions["lon"]))) short_data = ds.createVariable('shortdata', 'i2', ( 'time', 'lat', 'lon', )) short_data.standard_name = "shorts" short_data.long_name = "short_data" short_data.units = "short" short_data[0:time_len, :, :] = numpy.reshape( array_data, (time_len, len(ds.dimensions["lat"]), len(ds.dimensions["lon"]))) int_data = ds.createVariable('intdata', 'i4', ( 'time', 'lat', 'lon', )) int_data.standard_name = "integers" int_data.long_name = "integer_data" int_data.units = "int" int_data[0:time_len, :, :] = numpy.reshape( array_data, (time_len, len(ds.dimensions["lat"]), len(ds.dimensions["lon"]))) array_data = numpy.linspace(start=0, stop=10000, num=time_len * len(ds.dimensions["lat"]) * len(ds.dimensions["lon"])) float_data = ds.createVariable('floatdata', 'f4', ( 'time', 'lat', 'lon', )) float_data.standard_name = "floats" float_data.long_name = "float_data" float_data.units = "flt" float_data[0:time_len, :, :] = numpy.reshape( array_data, (time_len, len(ds.dimensions["lat"]), len(ds.dimensions["lon"]))) double_data = ds.createVariable('doubledata', 'f8', ( 'time', 'lat', 'lon', )) double_data.standard_name = "doubles" double_data.long_name = "double_data" double_data.units = "dbl" double_data[0:time_len, :, :] = numpy.reshape( array_data, (time_len, len(ds.dimensions["lat"]), len(ds.dimensions["lon"]))) # fill in the temporal data from datetime import datetime, timedelta from netCDF4 import date2num dates = [ datetime(2011, 12, 1) + n * timedelta(hours=1) for n in range(float_data.shape[0]) ] time[:] = date2num(dates, units=time.units, calendar=time.calendar) ds.close() os.close(tmp_handle)
longitude = era5_file.variables["longitude"][:] lat_resolution = abs(latitude[-1] - latitude[0]) / (len(latitude) - 1) lon_resolution = abs(longitude[-1] - longitude[0]) / (len(longitude) - 1) # get the time and convert to Python datetime object era5_time = era5_file.variables["time"][:] time_units = getattr(era5_file.variables["time"], "units") dt_era5 = netCDF4.num2date(era5_time, time_units) start_date_era5 = dt_era5[0] end_date_era5 = dt_era5[-1] hour_utc = numpy.array([dt.hour for dt in dt_era5]) # get the datetime in the middle of the accumulation period era5_offset = datetime.timedelta(minutes=float(era5_timestep) / 2) dt_era5_cor = [x - era5_offset for x in dt_era5] # get a series of time, corrected for the offset # NOTE: netCDF4.date2num doesn't handle timezone-aware datetimes era5_time_1hr = netCDF4.date2num(dt_era5_cor, time_units) # make utc_dt timezone aware so we can generate local times later dt_era5_utc_cor = [x.replace(tzinfo=pytz.utc) for x in dt_era5_cor] #site_list = ["Tumbarumba"] # now loop over the sites for site_name in site_list: # get the output file name out_site_path = os.path.join(out_base_path, site_name, "Data", "ERA5") if not os.path.exists(out_site_path): os.makedirs(out_site_path) out_file_name = site_name + "_ERA5_" + start_date_era5.strftime( "%Y%m%d") out_file_name = out_file_name + "_" + end_date_era5.strftime( "%Y%m%d") + ".nc" out_file_path = os.path.join(out_site_path, out_file_name) # get the metadata from the control file
fileOutName = "ndw_speed_intensity.nc" latvar = [] lonvar = [] timevardata = [] station = np.array([], dtype='object') for loc in measurements: latvar.append(measurements[loc]['lat']) lonvar.append(measurements[loc]['lon']) inputdate = measurements[loc]['time'] + 'CEST' date = dateutil.parser.parse(inputdate) #,"%Y-%m-%d %M:%H:%S") utctimestring = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(time.mktime(date.timetuple()))) netcdfDate = netCDF4.date2num(dateutil.parser.parse(utctimestring), "seconds since 1970-01-01 00:00:00") timevardata.append(netcdfDate) station = np.append(station, loc) numpoints = len(station) ncfile = netCDF4.Dataset(fileOutName, 'w') obs_dim = ncfile.createDimension('obs', numpoints) # latitude axis time_dim = ncfile.createDimension('time', 1) lat = ncfile.createVariable('lat', 'd', ('obs')) lat.units = 'degrees_north' lat.standard_name = 'latitude' lon = ncfile.createVariable('lon', 'd', ('obs')) lon.units = 'degrees_east' lon.standard_name = 'longitude'
def append_to_netcdf(fn, data_list, metadata_list): """Append RADOLAN data and metadata to existing NetCDF WIP! Does currently only work for RADOLAN RW data """ if type(data_list) != list: data_list = [ data_list, ] if type(metadata_list) != list: metadata_list = [ metadata_list, ] with netCDF4.Dataset(fn, "a") as nc_fh: current_length = len(nc_fh["time"][:]) for i, (data, metadata) in enumerate(zip(data_list, metadata_list)): i_new = i + current_length nc_fh["time"][i_new] = netCDF4.date2num( metadata["datetime"], units=nc_fh["time"].units, calendar=nc_fh["time"].calendar, ) product_name = metadata["producttype"] product_config_dict = radolan_product_netcdf_config[product_name] if product_name != nc_fh.producttype: raise ValueError("RADOLAN product of data is `%s` and " "is `%s` in existing NetCDF" % (product_name, nc_fh.producttype)) variable_names = list(product_config_dict["variables"].keys()) if len(variable_names) != 1: raise NotImplementedError("Writting the actual RADOLAN data " "to NetCDF is only supported for " "one `variable`.") variable_name = variable_names[0] variable_config = product_config_dict["variables"][variable_name] if "fill_value" in variable_config["variable_parameters"]: fill_value = variable_config["variable_parameters"][ "fill_value"] offset = variable_config["attributes"]["add_offset"] scale_factor = variable_config["attributes"]["scale_factor"] fill_value_float = fill_value * scale_factor + offset temp_data = data.copy() temp_data[np.isnan(temp_data)] = fill_value_float else: temp_data = data nc_fh[variable_name][i_new, :, :] = temp_data # TODO: Remove this hardcoding of writing `secondary` and `nodatamask` secondary = np.zeros_like(data, dtype="bool").flatten() secondary[metadata["secondary"]] = True nc_fh["secondary"][i_new, :, :] = secondary.reshape(data.shape) nodatamask = np.zeros_like(data, dtype="bool").flatten() nodatamask[metadata["nodatamask"]] = True nc_fh["nodatamask"][i_new, :, :] = nodatamask.reshape(data.shape) cluttermask = np.zeros_like(data, dtype="bool").flatten() cluttermask[metadata["cluttermask"]] = True nc_fh["cluttermask"][i_new, :, :] = cluttermask.reshape(data.shape) # TODO make this more flexible and also test for it !!! nc_fh["maxrange"][i_new] = int(metadata["maxrange"].split(" ")[0]) nc_fh["radarlocations"][i_new] = " ".join( metadata["radarlocations"])
def process_ccsm_data(gdir, PI_path): """ First attempt at a method to process the CCSM data into temperature/precip anomalies. """ #Put this in the function def (see process_cesm_data) filesuffix='' if not (('climate_file' in cfg.PATHS) and os.path.exists(cfg.PATHS['climate_file'])): raise IOError('Custom climate file not found') #open dataset for precp use fpath = cfg.PATHS['climate_file'] xr_ccsm = xr.open_dataset(fpath, decode_times=False) #open dataset for tmp use xr_ccsm_ts = xr.open_dataset(fpath) #repeating for pi xr_pi = xr.open_dataset(PI_path, decode_times=False) # selecting location lon = gdir.cenlon lat = gdir.cenlat #Setting the longitude to a 0-360 grid [I think...] "CESM files are in 0-360" if lon <= 0: lon += 360 #"take the closest" #"TODO: consider GCM interpolation?" precp = xr_ccsm.PRECC.sel(lat=lat, lon=lon, method='nearest') + xr_ccsm.PRECL.sel(lat=lat, lon=lon, method='nearest') temp = xr_ccsm_ts.TS.sel(lat=lat, lon=lon, method='nearest') precp_pi = xr_pi.PRECC.sel(lat=lat, lon=lon, method='nearest') + xr_pi.PRECL.sel(lat=lat, lon=lon, method='nearest') temp_pi = xr_pi.TS.sel(lat=lat, lon=lon, method='nearest') #convert temp from K to C temp = temp - 273.15 temp_pi = temp_pi - 273.15 #Take anomaly for CCSM data (with preindustrial control) for i in range(12): temp.values[i] = temp.values[i] - temp_pi.values[i] for i in range(12): precp.values[i] = precp.values[i] - precp_pi.values[i] #from normal years to hydrological years sm = cfg.PARAMS['hydro_month_'+gdir.hemisphere] em = sm - 1 if (sm > 1) else 12 y0 = int(pd.to_datetime(str(temp.time.values[0])).strftime('%Y')) y1 = int(pd.to_datetime(str(temp.time.values[-1])).strftime('%Y')) #time string for temp/precip (hydro years) time = pd.period_range('{}-{:02d}'.format(y0, sm),'{}-{:02d}'.format(y1, em), freq='M') #reorder single year of equil data & add correct time #calculate place in array to concat from (2 for ccsm data) conc_start = sm-2 conc_end = sm-14 temp_hydro = xr.concat([temp[conc_start:],temp[:conc_end]],dim="time") precp_hydro = xr.concat([precp[conc_start:],precp[:conc_end]],dim="time") temp_hydro['time'] = time precp_hydro['time'] = time temp = temp_hydro precp = precp_hydro # Workaround for https://github.com/pydata/xarray/issues/1565 temp['month'] = ('time', time.month) precp['month'] = ('time', time.month) temp['year'] = ('time', time.year) temp['year'] = ('time', time.year) ny, r = divmod(len(temp.time), 12) assert r == 0 #Convert m s-1 to mm mth-1 (for precp) ndays = np.tile(cfg.DAYS_IN_MONTH, y1-y0) precp = precp * ndays * (60 * 60 * 24 * 1000) #"Get CRU to apply the anomaly to" fpath = gdir.get_filepath('climate_monthly') ds_cru = xr.open_dataset(fpath) #"Add the climate anomaly to CRU clim" dscru = ds_cru.sel(time=slice('1961', '1990')) # temp loc_temp = dscru.temp.groupby('time.month').mean() #Oct-Sept format preserved ts_tmp = temp.groupby(temp.month) + loc_temp #for prcp loc_pre = dscru.prcp.groupby('time.month').mean() ts_pre = precp.groupby(precp.month) + loc_pre #load dates into save format fpath = cfg.PATHS['climate_file'] dsindex = salem.GeoNetcdf(fpath, monthbegin=True) time1 = dsindex.variables['time'] #weird recursive way to getting the dates in the correct format to save #only nessisary for 1 year of data, in order to rearrange the months and #get the correct year. time_array = [datetime(temp.time.year[i], temp.time.month[i],1) for i in range(12)] time_nums = netCDF4.date2num(time_array, time1.units, calendar='noleap') time2 = netCDF4.num2date(time_nums[:], time1.units, calendar='noleap') assert np.all(np.isfinite(ts_pre.values)) assert np.all(np.isfinite(ts_tmp.values)) #"back to -180 - 180" loc_lon = precp.lon if precp.lon <= 180 else precp.lon - 360 #write to netcdf gdir.write_monthly_climate_file(time2, ts_pre.values, ts_tmp.values, float(dscru.ref_hgt), loc_lon, precp.lat.values, time_unit=time1.units, file_name='gcm_data', filesuffix=filesuffix) #dsindex._nc.close() xr_ccsm.close() xr_ccsm_ts.close() xr_pi.close() ds_cru.close()
def from_stations(station_file, bry_file, grid=None): """ Construct a boundary forcing file from a stations file generated by a parent-grid. The stations.in file must have been generated by the seapy.roms.gen_stations method; otherwise, the order will be incorrect. Parameters ========== station_file : string Filename of the stations file that is the source for the boundary data bry_file : string Filename of the boundary conditions file to generate grid : string or seapy.model.grid Grid that the boundary conditions are created for Returns ------- None """ grid = seapy.model.asgrid(grid) ncstation = netCDF4.Dataset(station_file) src_ref, time = seapy.roms.get_reftime(ncstation) # Create the boundary file and fill up the descriptive data ncbry = seapy.roms.ncgen.create_bry(bry_file, eta_rho=grid.eta_rho, xi_rho=grid.xi_rho, s_rho=grid.n, reftime=src_ref, clobber=False, title="generated from " + station_file) grid.to_netcdf(ncbry) # Load the times: we need to see if the times are duplicated # because if using assimilation, they may be duplicated for every # outer-loop. Currently, it overwrites the first one each time (this # will need to be fixed if ROMS is fixed). statime = ncstation.variables[time][:] dup = np.where(statime[1:] == statime[0])[0] rng = np.s_[:] if dup.size > 0: rng = np.s_[0:np.min(dup)] statime = statime[rng] brytime = seapy.roms.get_timevar(ncbry) ncbry.variables["bry_time"][:] = netCDF4.date2num( netCDF4.num2date(statime, ncstation.variables[time].units), ncbry.variables[brytime].units) # Set up the indices bry = { "south": range(0, grid.lm), "north": range(grid.lm, 2 * grid.lm), "west": range(2 * grid.lm, 2 * grid.lm + grid.ln), "east": range(2 * grid.lm + grid.ln, 2 * (grid.lm + grid.ln)) } # Get the information to construct the depths of the station data sta_vt = ncstation.variables["Vtransform"][:] sta_hc = ncstation.variables["hc"][:] sta_s_rho = ncstation.variables["s_rho"][:] sta_cs_r = ncstation.variables["Cs_r"][:] sta_h = ncstation.variables["h"][:] sta_angle = ncstation.variables["angle"][:] sta_lon = ncstation.variables["lon_rho"][:] sta_lat = ncstation.variables["lat_rho"][:] sta_mask = np.ones(sta_lat.shape) sta_mask[sta_lon * sta_lat > 1e10] = 0 # Load the station data as we need to manipulate it sta_zeta = np.ma.masked_greater(ncstation.variables["zeta"][rng], 100) sta_ubar = np.ma.masked_greater(ncstation.variables["ubar"][rng], 100) sta_vbar = np.ma.masked_greater(ncstation.variables["vbar"][rng], 100) sta_temp = np.ma.masked_greater(ncstation.variables["temp"][rng], 100) sta_salt = np.ma.masked_greater(ncstation.variables["salt"][rng], 100) sta_u = np.ma.masked_greater(ncstation.variables["u"][rng], 100) sta_v = np.ma.masked_greater(ncstation.variables["v"][rng], 100) ncstation.close() # Create the true positions and mask grid_h = np.concatenate( [grid.h[0, :], grid.h[-1, :], grid.h[:, 0], grid.h[:, -1]]) grid_lon = np.concatenate([ grid.lon_rho[0, :], grid.lon_rho[-1, :], grid.lon_rho[:, 0], grid.lon_rho[:, -1] ]) grid_lat = np.concatenate([ grid.lat_rho[0, :], grid.lat_rho[-1, :], grid.lat_rho[:, 0], grid.lat_rho[:, -1] ]) grid_mask = np.concatenate([ grid.mask_rho[0, :], grid.mask_rho[-1, :], grid.mask_rho[:, 0], grid.mask_rho[:, -1] ]) grid_angle = np.concatenate([ grid.angle[0, :], grid.angle[-1, :], grid.angle[:, 0], grid.angle[:, -1] ]) # Search for bad stations due to child grid overlaying parent mask. # Unfortunately, ROMS will give points that are not at the locations # you specify if those points conflict with the mask. So, these points # are simply replaced with the nearest. dist = np.sqrt((sta_lon - grid_lon)**2 + (sta_lat - grid_lat)**2) bad_pts = np.where(np.logical_and(dist > 0.001, grid_mask == 1))[0] good_pts = np.where(np.logical_and(dist < 0.001, grid_mask == 1))[0] for i in bad_pts: didx = np.sqrt((sta_lon[i] - sta_lon[good_pts])**2 + (sta_lat[i] - sta_lat[good_pts])**2).argmin() index = good_pts[didx] sta_h[i] = sta_h[index] sta_angle[i] = sta_angle[index] sta_lon[i] = sta_lon[index] sta_lat[i] = sta_lat[index] sta_zeta[:, i] = sta_zeta[:, index] sta_ubar[:, i] = sta_ubar[:, index] sta_vbar[:, i] = sta_vbar[:, index] sta_temp[:, i, :] = sta_temp[:, index, :] sta_salt[:, i, :] = sta_salt[:, index, :] sta_u[:, i, :] = sta_u[:, index, :] sta_v[:, i, :] = sta_v[:, index, :] # Construct the boundaries: a dictionary of boundary side and two element # array whether the u[0] or v[1] dimensions need to be averaged sides = { "north": [True, False], "south": [True, False], "east": [False, True], "west": [False, True] } delta_angle = sta_angle - grid_angle sta_ubar, sta_vbar = seapy.rotate(sta_ubar, sta_vbar, delta_angle) sta_u, sta_v = seapy.rotate(sta_u, sta_v, np.tile(delta_angle, (sta_u.shape[-1], 1)).T) # Set up the parameters for depth-interpolated wght = 5 nx = 3 ny = 9 # Build a non-extrapolating field to interpolate. Generate the # position and depth def __expand_field(x): shp = x.shape y = np.zeros((shp[0] + 2, shp[1] + 2)) y[1:-1, 1:-1] = x y[1:-1, 0] = x[:, 0] y[1:-1, -1] = x[:, -1] y[0, :] = y[1, :] y[-1, :] = y[-2, :] return y for side in sides: print(side) # Masks sta_ocean = np.where(sta_mask[bry[side]] == 1)[0] ocean = np.where(grid_mask[bry[side]] == 1)[0] # If we have a masked boundary, skip it if not np.any(ocean): continue # 1) Zeta ncbry.variables["zeta_" + side][:, ocean] = sta_zeta[:, bry[side]][:, ocean] # 2) Ubar if sides[side][0]: ncbry.variables["ubar_" + side][:] = 0.5 * (sta_ubar[:, bry[side][0:-1]] + sta_ubar[:, bry[side][1:]]) else: ncbry.variables["ubar_" + side][:] = sta_ubar[:, bry[side]] # 3) Vbar if sides[side][1]: ncbry.variables["vbar_" + side][:] = 0.5 * (sta_vbar[:, bry[side][0:-1]] + sta_vbar[:, bry[side][1:]]) else: ncbry.variables["vbar_" + side][:] = sta_vbar[:, bry[side]] # For 3D variables, we need to loop through time and interpolate # onto the child grid. Construct the distances x = np.zeros(len(bry[side])) x[1:] = np.cumsum( seapy.earth_distance(grid_lon[bry[side][0:-1]], grid_lat[bry[side][0:-1]], grid_lon[bry[side][1:]], grid_lat[bry[side][1:]])) sta_x = seapy.adddim(x, len(sta_s_rho)) x = seapy.adddim(x, len(grid.s_rho)) for n, t in seapy.progressbar.progress(enumerate(statime), statime.size): sta_depth = seapy.roms.depth(sta_vt, sta_h[bry[side]], sta_hc, sta_s_rho, sta_cs_r, sta_zeta[n, bry[side]]) depth = seapy.roms.depth(grid.vtransform, grid_h[bry[side]], grid.hc, grid.s_rho, grid.cs_r, sta_zeta[n, bry[side]]) in_x = __expand_field(sta_x[:, sta_ocean]) in_x[:, 0] = in_x[:, 0] - 3600 in_x[:, -1] = in_x[:, -1] + 3600 in_depth = __expand_field(sta_depth[:, sta_ocean]) in_depth[0, :] = in_depth[0, :] - 1000 in_depth[-1, :] = in_depth[-1, :] + 10 # 4) Temp in_data = __expand_field( np.transpose(sta_temp[n, bry[side], :][sta_ocean, :])) ncbry.variables["temp_" + side][n, :] = 0.0 ncbry.variables["temp_" + side][n, :, ocean], pmap = seapy.oa.oasurf( in_x, in_depth, in_data, x[:, ocean], depth[:, ocean], nx=nx, ny=ny, weight=wght) # 5) Salt in_data = __expand_field( np.transpose(sta_salt[n, bry[side], :][sta_ocean, :])) ncbry.variables["salt_" + side][n, :] = 0.0 ncbry.variables["salt_" + side][n, :, ocean], pmap = seapy.oa.oasurf( in_x, in_depth, in_data, x[:, ocean], depth[:, ocean], pmap=pmap, nx=nx, ny=ny, weight=wght) # 6) U in_data = __expand_field( np.transpose(sta_u[n, bry[side], :][sta_ocean, :])) data = np.zeros(x.shape) data[:, ocean], pmap = seapy.oa.oasurf(in_x, in_depth, in_data, x[:, ocean], depth[:, ocean], pmap=pmap, nx=nx, ny=ny, weight=wght) if sides[side][0]: ncbry.variables["u_" + side][n, :] = 0.5 * (data[:, 0:-1] + data[:, 1:]) else: ncbry.variables["u_" + side][n, :] = data # 7) V in_data = __expand_field( np.transpose(sta_v[n, bry[side], :][sta_ocean, :])) data = data * 0 data[:, ocean], pmap = seapy.oa.oasurf(in_x, in_depth, in_data, x[:, ocean], depth[:, ocean], pmap=pmap, nx=nx, ny=ny, weight=wght) if sides[side][1]: ncbry.variables["v_" + side][n, :] = 0.5 * (data[:, 0:-1] + data[:, 1:]) else: ncbry.variables["v_" + side][n, :] = data ncbry.sync() ncbry.close() pass
def getTime(confM2R, year, month, day, ntime): """ Create a date object to keep track of Julian dates etc. Also create a reference date starting at 1948/01/01. Go here to check results:http://lena.gsfc.nasa.gov/lenaDEV/html/doy_conv.html """ if confM2R.indatatype == 'SODA': filename = fc.getSODAfilename(confM2R, year, month, None) if confM2R.indatatype == 'SODA3': filename = fc.getSODA3filename(confM2R, year, month, None) if confM2R.indatatype == 'SODAMONTHLY': filename = fc.getSODAMONTHLYfilename(confM2R, year, month, None) if confM2R.indatatype == 'GLORYS': filename = fc.getGLORYSfilename(confM2R, year, month, "S") if confM2R.indatatype == 'WOAMONTHLY': filename = fc.getWOAMONTHLYfilename(confM2R, year, month, "temperature") if confM2R.indatatype == 'NORESM': filename = fc.getNORESMfilename(confM2R, year, month, "salnlvl") if confM2R.indatatype == 'NS8KM': filename = fc.getNS8KMfilename(confM2R, year, month, "salt") if confM2R.indatatype == 'NS8KMZ': filename, readFromOneFile = fc.getNS8KMZfilename( confM2R, year, month, "salt") # Now open the input file and get the time cdf = Dataset(filename) if confM2R.indatatype == 'NORESM': jdref = date2num(datetime(1948, 1, 1), units="days since 1948-01-01 00:00:00", calendar="standard") elif confM2R.indatatype == 'NS8KMZ': jdref = date2num(datetime(1948, 1, 1), units="days since 1948-01-01 00:00:00", calendar="standard") elif confM2R.indatatype == 'GLORYS': jdref = date2num(datetime(1948, 1, 1), cdf.variables["time_counter"].units, calendar=cdf.variables["time_counter"].calendar) elif confM2R.indatatype == 'NS8KM': jdref = date2num(datetime(1948, 1, 1), cdf.variables["ocean_time"].units, calendar=cdf.variables["ocean_time"].calendar) elif confM2R.indatatype == 'SODA3': jdref = date2num(datetime(1948, 1, 1), units="days since 1948-01-01 00:00:00", calendar="standard") else: jdref = date2num(datetime(1948, 1, 1), cdf.variables["time"].units, calendar=cdf.variables["time"].calendar) if confM2R.indatatype == 'SODAMONTHLY': # Find the day and month that the SODAMONTHLY file respresents based on the year and ID number. # Each SODA file represents a 1 month average. mycalendar = cdf.variables["time"].calendar myunits = cdf.variables["time"].units currentdate = datetime(year, month, day) jd = date2num(currentdate, myunits, calendar=mycalendar) if confM2R.indatatype == 'SODA3': # Each SODA file represents 12 month averages. myunits = cdf.variables["time"].units currentdate = datetime(year, month, day) jd = date2num(currentdate, units="days since 1948-01-01 00:00:00", calendar="standard") if confM2R.indatatype == 'GLORYS': # Find the day and month that the GLORYS file respresents based on the year and ID number. # Each file represents a 1 month average. mycalendar = cdf.variables["time_counter"].calendar myunits = cdf.variables["time_counter"].units currentdate = datetime(year, month, day) jd = date2num(currentdate, myunits, calendar=mycalendar) if confM2R.indatatype == 'NS8KM': # Find the day and month that the GLORYS file respresents based on the year and ID number. # Each file represents a 1 month average. mycalendar = cdf.variables["ocean_time"].calendar myunits = cdf.variables["ocean_time"].units currentdate = datetime(year, month, day) jd = date2num(currentdate, myunits, calendar=mycalendar) if confM2R.indatatype == 'NS8KMZ': # Find the day and month that the GLORYS file respresents based on the year and ID number. # Each file represents a 1 month average. currentdate = datetime(year, month, day) myunits = cdf.variables["time"].units jd = date2num(currentdate, myunits, calendar="gregorian") print("Days:", jd, currentdate, year, month, day) if confM2R.indatatype == 'NORESM': # Find the day and month that the NORESM file. We need to use the time modules from # netcdf4 for python as they handle calendars that are no_leap. # http://www.esrl.noaa.gov/psd/people/jeffrey.s.whitaker/python/netcdftime.html#datetime mydays = cdf.variables["time"][ntime] mycalendar = cdf.variables["time"].calendar myunits = cdf.variables["time"].units # For NORESM we switch from in-units of 1800-01-01 to outunits of 1948-01-01 currentdate = num2date(mydays, units=myunits, calendar=mycalendar) jd = date2num(currentdate, 'days since 1948-01-01 00:00:00', calendar='noleap') confM2R.grdROMS.time = (jd - jdref) confM2R.grdROMS.reftime = jdref confM2R.grdROMS.timeunits = myunits cdf.close() print("-------------------------------") print('\nCurrent time of %s file : %s' % (confM2R.indatatype, currentdate)) print("-------------------------------")
# fetch NCEP NARR data grid_date = num2date(grid.time['data'], grid.time['units'])[0] y_m_d = grid_date.strftime('%Y%m%d') y_m = grid_date.strftime('%Y%m') url = ('http://nomads.ncdc.noaa.gov/dods/NCEP_NARR_DAILY/' + y_m + '/' + y_m_d + '/narr-a_221_' + y_m_d + '_0000_000') # Use a local copy of the online NCEP NARR data, this file can be created with # the command: # nccopy http://nomads.ncdc.noaa.gov/dods/NCEP_NARR_DAILY/201105/20110520/narr-a_221_20110520_0000_000?lon,lat,time,prmsl narr-a_221_20110520_0000_000.nc # comment out the next line to retrieve the data from the OPeNDAP server. url = 'narr-a_221_20110520_0000_000.nc' data = Dataset(url) # extract data at correct time data_time = data.variables['time'] t_idx = abs(data_time[:] - date2num(grid_date, data_time.units)).argmin() prmsl = 0.01 * data.variables['prmsl'][t_idx] # plot the reanalysis on the basemap lons, lats = np.meshgrid(data.variables['lon'], data.variables['lat'][:]) x, y = display.basemap(lons, lats) clevs = np.arange(900, 1100., 1.) display.basemap.contour(x, y, prmsl, clevs, colors='k', linewidths=1.) # colorbar cbax = fig.add_axes(colorbar_panel_axes) display.plot_colorbar(cax=cbax) # panel 2, longitude slice. ax2 = fig.add_axes(x_cut_panel_axes) display.plot_longitude_slice('REF',
tunit = 'seconds since 1970-01-01' # Write ensemble forecast to netcdf with Dataset(outfile, 'w') as dset: dset.createDimension('time', None) dset.createDimension('lat', state.ny()) dset.createDimension('lon', state.nx()) dset.createDimension('ens', state.nmems()) dset.createVariable('time', 'i4', ('time', )) dset.createVariable('lat', 'f8', ('lat', )) dset.createVariable('lon', 'f8', ('lon')) dset.createVariable('ens', 'i4', ('ens', )) dset.variables['time'].units = tunit dset.variables['lat'].units = 'degrees_north' dset.variables['lon'].units = 'degrees_east' dset.variables['ens'].units = 'member_number' dset.variables['time'][:] = date2num(state.ensemble_times(), tunit) dset.variables['lat'][:] = state['lat'].values[:, 0] dset.variables['lon'][:] = state['lon'].values[0, :] dset.variables['ens'][:] = state['mem'].values for var in state.vars(): print('Writing variable {}'.format(var)) dset.createVariable(var, 'f8', ( 'time', 'lat', 'lon', 'ens', )) dset.variables[var].units = ut.get_units(var) dset.variables[var][:] = state[var].values #Get the required packages