def test_interpolate_gps(self): est_lat, est_lon = interpolate_gps(timestamps=masked_epoch(self.df.t), latitude=self.df.y, longitude=self.df.x) assert len(est_lat) == len(est_lon) assert len(est_lat) == self.df.y.size assert len(est_lon) == self.df.x.size
def test_density(self): sr = SlocumReader(ctd_filepath) df = sr.standardize() salinity = calculate_practical_salinity( sr.data.sci_water_cond, sr.data.sci_water_temp, sr.data.sci_water_pressure, ) assert sr.data.sci_m_present_time.size == salinity.size est_lat, est_lon = interpolate_gps(timestamps=masked_epoch(df.t), latitude=df.y, longitude=df.x) density = calculate_density(sr.data.sci_water_temp, sr.data.sci_water_pressure, salinity, est_lat, est_lon) assert sr.data.sci_m_present_time.size == density.size
def standardize(self, gps_prefix=None): df = self.data.copy() # Convert NMEA coordinates to decimal degrees for col in df.columns: # Ignore if the m_gps_lat and/or m_gps_lon value is the default masterdata value if col.endswith('_lat'): df[col] = df[col].map(lambda x: get_decimal_degrees(x) if x <= 9000 else np.nan) elif col.endswith('_lon'): df[col] = df[col].map(lambda x: get_decimal_degrees(x) if x < 18000 else np.nan) # Standardize 'time' to the 't' column for t in self.TIMESTAMP_SENSORS: if t in df.columns: df['t'] = pd.to_datetime(df[t], unit='s') break # Interpolate GPS coordinates if 'm_gps_lat' in df.columns and 'm_gps_lon' in df.columns: df['drv_m_gps_lat'] = df.m_gps_lat.copy() df['drv_m_gps_lon'] = df.m_gps_lon.copy() # Fill in data will nulls where value is the default masterdata value masterdatas = (df.drv_m_gps_lon >= 18000) | (df.drv_m_gps_lat > 9000) df.loc[masterdatas, 'drv_m_gps_lat'] = np.nan df.loc[masterdatas, 'drv_m_gps_lon'] = np.nan try: # Interpolate the filled in 'x' and 'y' y_interp, x_interp = interpolate_gps(masked_epoch(df.t), df.drv_m_gps_lat, df.drv_m_gps_lon) except (ValueError, IndexError): L.warning("Raw GPS values not found!") y_interp = np.empty(df.drv_m_gps_lat.size) * np.nan x_interp = np.empty(df.drv_m_gps_lon.size) * np.nan df['y'] = y_interp df['x'] = x_interp """ ---- Option 1: Always calculate Z from pressure ---- It's really a matter of data provider preference and varies from one provider to another. That being said, typically the sci_water_pressure or m_water_pressure variables, if present in the raw data files, will typically have more non-NaN values than m_depth. For example, all MARACOOS gliders typically have both m_depth and sci_water_pressure contained in them. However, m_depth is typically heavily decimated while sci_water_pressure contains a more complete pressure record. So, while we transmit both m_depth and sci_water_pressure, I calculate depth from pressure & (interpolated) latitude and use that as my NetCDF depth variable. - Kerfoot """ # Search for a 'pressure' column for p in self.PRESSURE_SENSORS: if p in df.columns: # Convert bar to dbar here df['pressure'] = df[p].copy() * 10 # Calculate depth from pressure and latitude # Negate the results so that increasing values note increasing depths df['z'] = -z_from_p(df.pressure, df.y) break if 'z' not in df and 'pressure' not in df: # Search for a 'z' column for p in self.DEPTH_SENSORS: if p in df.columns: df['z'] = df[p].copy() # Calculate pressure from depth and latitude # Negate the results so that increasing values note increasing depth df['pressure'] = -p_from_z(df.z, df.y) break # End Option 1 """ ---- Option 2: Use raw pressure/depth data that was sent across ---- # Standardize to the 'pressure' column for p in self.PRESSURE_SENSORS: if p in df.columns: # Convert bar to dbar here df['pressure'] = df[p].copy() * 10 break # Standardize to the 'z' column for p in self.DEPTH_SENSORS: if p in df.columns: df['z'] = df[p].copy() break # Don't calculate Z from pressure if a metered depth column exists already if 'pressure' in df and 'z' not in df: # Calculate depth from pressure and latitude # Negate the results so that increasing values note increasing depths df['z'] = -z_from_p(df.pressure, df.y) if 'z' in df and 'pressure' not in df: # Calculate pressure from depth and latitude # Negate the results so that increasing values note increasing depth df['pressure'] = -p_from_z(df.z, df.y) # End Option 2 """ rename_columns = { 'm_water_vx': 'u_orig', 'm_water_vy': 'v_orig', } # These need to be standardize so we can compute salinity and density! for vname in self.TEMPERATURE_SENSORS: if vname in df.columns: rename_columns[vname] = 'temperature' break for vname in self.CONDUCTIVITY_SENSORS: if vname in df.columns: rename_columns[vname] = 'conductivity' break # Standardize columns df = df.rename(columns=rename_columns) # Compute additional columns df = self.compute(df) return df
def assign_profiles(df, tsint=1): profile_df = df.copy() profile_df['profile'] = np.nan # Fill profile with nans tmp_df = df.copy() if tsint is None: tsint = 1 # Make 't' epochs and not a DateTimeIndex tmp_df['t'] = masked_epoch(tmp_df.t) # Set negative depth values to NaN tmp_df.loc[tmp_df.z <= 0, 'z'] = np.nan # Remove any rows where time or z is NaN tmp_df = tmp_df.dropna(subset=['t', 'z'], how='any') if len(tmp_df) < 2: return None # Create the fixed timestamp array from the min timestamp to the max timestamp # spaced by tsint intervals ts = np.arange(tmp_df.t.min(), tmp_df.t.max(), tsint) # Stretch estimated values for interpolation to span entire dataset interp_z = np.interp(ts, tmp_df.t, tmp_df.z, left=tmp_df.z.iloc[0], right=tmp_df.z.iloc[-1]) del tmp_df if len(interp_z) < 2: return None filtered_z = boxcar_smooth_dataset(interp_z, max(tsint // 2, 1)) delta_depth = calculate_delta_depth(filtered_z) # Find where the depth indexes (-1 and 1) flip inflections = np.where(np.diff(delta_depth) != 0)[0] # Do we have any profiles? if inflections.size < 1: return profile_df # Prepend a zero at the beginning start the series of profiles p_inds = np.insert(inflections, 0, 0) # Append the size of the time array to end the series of profiles p_inds = np.append(p_inds, ts.size - 1) # Zip up neighbors to get the ranges of each profile in interpolated space p_inds = list(zip(p_inds[0:-1], p_inds[1:])) # Convert the profile indexes into datetime objets p_inds = [(pd.to_datetime(ts[int(p0)], unit='s'), pd.to_datetime(ts[int(p1)], unit='s')) for p0, p1 in p_inds] # We have the profiles in interpolated space, now associate this # space with the actual data using the datetimes. # Iterate through the profile start/stop indices for profile_index, (min_time, max_time) in enumerate(p_inds): # Get rows between the min and max time time_between = profile_df.t.between(min_time, max_time, inclusive=True) # Get indexes of the between rows since we can't assign by the range due to NaT values ixs = profile_df.loc[time_between].index.tolist() # Set the rows profile column to the profile id if len(ixs) > 1: profile_df.loc[ixs[0]:ixs[-1], 'profile'] = profile_index elif len(ixs) == 1: profile_df.loc[ixs[0], 'profile'] = profile_index else: L.debug( 'No data rows matched the time range of this profile, Skipping.' ) # Remove rows that were not assigned a profile # profile_df = profile_df.loc[~profile_df.profile.isnull()] return profile_df
def assign_profiles(df, tsint=None): """Returns the start and stop timestamps for every profile indexed from the depth timeseries Parameters: time, depth Returns: A Nx2 array of the start and stop timestamps indexed from the yo Use filter_yo_extrema to remove invalid/incomplete profiles """ profile_df = df.copy() profile_df['profile'] = np.nan # Fill profile with nans tmp_df = df.copy() if tsint is None: tsint = 2 # Make 't' epochs and not a DateTimeIndex tmp_df['t'] = masked_epoch(tmp_df.t) # Set negative depth values to NaN tmp_df.loc[tmp_df.z <= 0, 'z'] = np.nan # Remove NaN rows tmp_df = tmp_df.dropna(subset=['t', 'z'], how='any') if len(tmp_df) < 2: return None # Create the fixed timestamp array from the min timestamp to the max timestamp # spaced by tsint intervals ts = np.arange(tmp_df.t.min(), tmp_df.t.max(), tsint) # Stretch estimated values for interpolation to span entire dataset interp_z = np.interp(ts, tmp_df.t, tmp_df.z, left=tmp_df.z.iloc[0], right=tmp_df.z.iloc[-1]) del tmp_df if len(interp_z) < 2: return None filtered_z = boxcar_smooth_dataset(interp_z, max(tsint // 2, 1)) delta_depth = calculate_delta_depth(filtered_z) p_inds = np.empty((0, 2)) inflections = np.where(np.diff(delta_depth) != 0)[0] if inflections.size < 1: return profile_df p_inds = np.append(p_inds, [[0, inflections[0]]], axis=0) for p in range(len(inflections) - 1): p_inds = np.append(p_inds, [[inflections[p], inflections[p + 1]]], axis=0) p_inds = np.append(p_inds, [[inflections[-1], len(ts) - 1]], axis=0) # Start profile index profile_index = 0 ts_window = tsint * 2 # Iterate through the profile start/stop indices for p0, p1 in p_inds: min_time = pd.to_datetime(ts[int(p0)] - ts_window, unit='s') max_time = pd.to_datetime(ts[int(p1)] + ts_window, unit='s') # Get rows between the min and max time time_between = profile_df.t.between(min_time, max_time, inclusive=True) # Get indexes of the between rows since we can't assign by the range due to NaT values ixs = profile_df.loc[time_between].index.tolist() # Set the rows profile column to the profile id if len(ixs) > 1: profile_df.loc[ixs[0]:ixs[-1], 'profile'] = profile_index elif len(ixs) == 1: profile_df.loc[ixs[0], 'profile'] = profile_index else: L.debug( 'No data rows matched the time range of this profile, Skipping.' ) # Increment the profile index profile_index += 1 # Remove rows that were not assigned a profile # profile_df = profile_df.loc[~profile_df.profile.isnull()] # L.info( # list(zip( # profile_df.t, # profile_df.profile, # profile_df.z, # ))[0:20] # ) return profile_df