def add_ssh_tides(obs, tide_file, tide_error, tide_start=None, provenance=None, reftime=seapy.default_epoch): """ Apply predicted barotropic tides to the SSH values of given observations using the tide_file given. Parameters ---------- obs : seapy.roms.obs.obs or string, The observations to enforce the error profile upon. tide_file : string, The name of the ROMS tidal forcing file to use for predicting the barotropic tides. tide_error : np.masked_array A two dimensional array of the tidal fit errors to apply to the ssh errors when adding the tides. This should be the same size as the rho-grid. The units of the error must be in meters. If it is masked, the mask will be honored and obs that are in the mask will be removed. This allows you to filter on regions of high error. tide_start : bool, optional, If given, the tide_start of the tide file. If not specified, will read the attribute of the tidal forcing file provenance : list of int or string, optional, The provenance to apply the tides to (ignore other observations of the same type, but different instrument) reftime: datetime, Reference time for the observation times Returns ------- None: The obs structure is mutable is changed in place Examples -------- >>> obs = obs('observation_file.nc') >>> add_ssh_tides(obs, 'tide_frc.nc', errmap) The resulting 'obs' variable will have modified data. To save it: >>> obs.to_netcdf() """ # Load tidal file data frc = seapy.roms.tide.load_forcing(tide_file) if not tide_start: tide_start = frc['tide_start'] # Make sure that the sizes are the same if frc['Eamp'].shape[1:] != tide_error.shape: raise ValueError( "The error array is not the same size as the tidal grid") # Gather the observations that need tidal information obs = seapy.roms.obs.asobs(obs) pro = seapy.roms.obs.asprovenance(provenance) if provenance else None if pro: l = np.where(np.logical_and(obs.type == 1, np.in1d(obs.provenance, pro))) else: l = np.where(obs.type == 1) # If we have any, then do tidal predictions and add the signal # and error to the observations bad = [] if l[0].any(): ox = np.rint(obs.x[l]).astype(int) oy = np.rint(obs.y[l]).astype(int) idx = seapy.unique_rows((ox, oy)) for cur in seapy.progressbar.progress(idx): pts = np.where(np.logical_and(ox == ox[cur], oy == oy[cur])) # If this point is masked, remove from the observations if not tide_error[oy[cur], ox[cur]]: bad.append(l[0][pts].tolist()) else: time = [reftime + datetime.timedelta(t) for t in obs.time[l][pts]] amppha = seapy.tide.pack_amp_phase( frc['tides'], frc['Eamp'][:, oy[cur], ox[cur]], frc['Ephase'][:, oy[cur], ox[cur]]) zpred = seapy.tide.predict(time, amppha, lat=obs.lat[l][cur], tide_start=tide_start) # Add the information to the observations obs.value[l[0][pts]] += zpred obs.error[l[0][pts]] = np.maximum( obs.error[l[0][pts]], tide_error[oy[cur], ox[cur]]**2) # If any were bad, then remove them if bad: obs.delete(seapy.flatten(bad)) pass
def merge_files(obs_files, out_files, days, dt, limits=None, clobber=True): """ merge together a group of observation files into combined new files with observations that lie only within the corresponding dates Parameters ---------- obs_files : list, List of files to merge together (a single file will work, it will just be filtered by the dates) out_files : list or string, list of the filenames to create for each of the output periods. If a single string is given, the character '#' will be replaced by the starting time of the observation (e.g. out_files="out_#.nc" will become out_03234.nc) days : list of tuples, List of starting and ending day numbers for each cycle to process. The first value is the start day, the second is the end day. The number of tuples is the number of files to output. dt : float, Time separation of observations. Observations that are less than dt apart in time will be set to the same time. limits : dict, optional Set the limits of the grid points that observations are allowed within, {'north':i, 'south':i, 'east':i, 'west':i }. As obs near the boundaries are not advisable, this allows you to specify the valid grid range to accept obs within. clobber: bool, optional If True, output files are overwritten. If False, they are skipped. Returns ------- None Examples -------- Put together three files into 5 separate files in two day intervals from day 10 through day 20: >>> merge_files(["obs_1.nc", "obs_2.nc", "obs_3.nc"], "new_#.nc", [(i, i+2) for i in range(10, 20, 2)]) Put together same three files into 3 overlapping separate files in five day intervals with one overlapping day: >>> merge_files(["obs_1.nc", "obs_2.nc", "obs_3.nc"], "new_#.nc", [(i, i+5) for i in range(10, 20, 4)]) """ import re import os # Only unique files obs_files = set().union(seapy.flatten(obs_files)) outtime = False if isinstance(out_files, str): outtime = True time = re.compile('\#') # Go through the files to determine which periods they cover myobs = list() sdays = list() edays = list() for file in obs_files: nc = seapy.netcdf(file) fdays = nc.variables['survey_time'][:] nc.close() l = np.where(np.logical_and(fdays >= np.min(days), fdays <= np.max(days)))[0] if not l.size: continue myobs.append(file) sdays.append(fdays[0]) edays.append(fdays[-1]) sdays = np.asarray(sdays) edays = np.asarray(edays) # Loop over the dates in pairs for n, t in enumerate(seapy.progressbar.progress(days)): # Set output file name if outtime: outfile = time.sub("{:05d}".format(t[0]), out_files) else: outfile = out_files[n] if os.path.exists(outfile) and not clobber: continue # Find the files that cover the current period fidx = np.where(np.logical_and(sdays <= t[1], edays >= t[0]))[0] if not fidx.size: continue # Create new observations for this time period nobs = obs(myobs[fidx[0]]) l = np.where(np.logical_or(nobs.time < t[0], nobs.time > t[1])) nobs.delete(l) for idx in fidx[1:]: o = obs(myobs[idx]) l = np.where(np.logical_and(o.time >= t[0], o.time <= t[1])) nobs.add(o[l]) # Remove any limits if limits is not None: l = np.where(np.logical_or.reduce(( nobs.x < limits['west'], nobs.x > limits['east'], nobs.y < limits['south'], nobs.y > limits['north']))) nobs.delete(l) # Save out the new observations nobs.to_netcdf(outfile, dt=dt) pass
def merge_files(obs_files, out_files, days, dt, reftime, limits=None, clobber=True): """ merge together a group of observation files into combined new files with observations that lie only within the corresponding dates Parameters ---------- obs_files : list, List of files to merge together (a single file will work, it will just be filtered by the dates) out_files : list or string, list of the filenames to create for each of the output periods. If a single string is given, the character '#' will be replaced by the starting time of the observation (e.g. out_files="out_#.nc" will become out_03234.nc) days : list of tuples, List of starting and ending day numbers for each cycle to process. The first value is the start day, the second is the end day. The number of tuples is the number of files to output. dt : float, Time separation of observations. Observations that are less than dt apart in time will be set to the same time. reftime : Reference time used to process the observations. The merged files are now timed in relation to the beginning of the assimilation cycle limits : dict, optional Set the limits of the grid points that observations are allowed within, {'north':i, 'south':i, 'east':i, 'west':i }. As obs near the boundaries are not advisable, this allows you to specify the valid grid range to accept obs within. clobber: bool, optional If True, output files are overwritten. If False, they are skipped. Returns ------- None Examples -------- Put together three files into 5 separate files in two day intervals from day 10 through day 20: >>> merge_files(["obs_1.nc", "obs_2.nc", "obs_3.nc"], "new_#.nc", [(i, i+2) for i in range(10, 20, 2)]) Put together same three files into 3 overlapping separate files in five day intervals with one overlapping day: >>> merge_files(["obs_1.nc", "obs_2.nc", "obs_3.nc"], "new_#.nc", [(i, i+5) for i in range(10, 20, 4)]) """ import re import os # Only unique files obs_files = set().union(seapy.flatten(obs_files)) outtime = False if isinstance(out_files, str): outtime = True time = re.compile('\#') # Go through the files to determine which periods they cover myobs = list() sdays = list() edays = list() for file in obs_files: nc = seapy.netcdf(file) fdays = nc.variables['survey_time'][:] nc.close() l = np.where( np.logical_and(fdays >= np.min(days), fdays <= np.max(days)))[0] if not l.size: continue myobs.append(file) sdays.append(fdays[0]) edays.append(fdays[-1]) sdays = np.asarray(sdays) edays = np.asarray(edays) # Loop over the dates in pairs for n, t in enumerate(seapy.progressbar.progress(days)): # Set output file name if outtime: outfile = time.sub("{:05d}".format(t[0]), out_files) else: outfile = out_files[n] if os.path.exists(outfile) and not clobber: continue # Find the files that cover the current period fidx = np.where(np.logical_and(sdays <= t[1], edays >= t[0]))[0] if not fidx.size: continue # Create new observations for this time period nobs = obs(myobs[fidx[0]]) l = np.where(np.logical_or(nobs.time < t[0], nobs.time > t[1])) nobs.delete(l) for idx in fidx[1:]: o = obs(myobs[idx]) l = np.where(np.logical_and(o.time >= t[0], o.time <= t[1])) nobs.add(o[l]) # Remove any limits if limits is not None: l = np.where( np.logical_or.reduce( (nobs.x < limits['west'], nobs.x > limits['east'], nobs.y < limits['south'], nobs.y > limits['north']))) nobs.delete(l) # Make time relative to the assimilation window nobs.reftime = reftime #nobs.reftime = seapy.day2date(t[0],epoch=reftime) #nobs.time = abs(abs(nobs.time) - abs(t[0])) # Save out the new observations nobs.to_netcdf(outfile, dt=dt) pass