def subset_by_time(self, mintime: float = None, maxtime: float = None): """ We save the line start/end time as an attribute within each raw_ping record. Use this method to pull out just the data that is within the mintime/maxtime range (inclusive mintime, exclusive maxtime). The class will then only have access to data within that time period. To return to the full original dataset, use restore_subset Parameters ---------- mintime minimum time of the subset, if not provided and maxtime is, use the minimum time of the datasets maxtime maximum time of the subset, if not provided and mintime is, use the maximum time of the datasets Returns ------- bool Error if True """ if mintime is None and maxtime is not None: mintime = np.min( [rp.time.values[0] for rp in self.fqpr.multibeam.raw_ping]) if maxtime is None and mintime is not None: maxtime = np.max( [rp.time.values[-1] for rp in self.fqpr.multibeam.raw_ping]) if mintime is None and maxtime is None: raise ValueError( 'subset_by_time: either mintime or maxtime must be provided to subset by time' ) slice_raw_ping = [] for ra in self.fqpr.multibeam.raw_ping: slice_ra = slice_xarray_by_dim(ra, dimname='time', start_time=mintime, end_time=maxtime) slice_raw_ping.append(slice_ra) if any([slce is None for slce in slice_raw_ping]): print( 'Warning: Subset by time found empty slice at {}-{}, skipping subset' .format(mintime, maxtime)) return True self._prepare_subset() self.subset_mintime = mintime self.subset_maxtime = maxtime self.fqpr.multibeam.raw_ping = slice_raw_ping self.fqpr.multibeam.raw_att = slice_xarray_by_dim( self.fqpr.multibeam.raw_att, dimname='time', start_time=mintime, end_time=maxtime) return False
def build_raw_attitude_for_line(self, line: str, subset: bool = True): """ With the given linename, return the raw_attitude dataset from the fqpr_generation.FQPR instance that contains the line. If subset is true, the returned attitude will only be the raw attitude that covers the line. Parameters ---------- line line name subset if True will only return the dataset cut to the min max time of the multibeam line Returns ------- xr.Dataset the raw attitude either for the whole Fqpr instance that contains the line, or subset to the min/max time of the line """ line_att = None fq_inst = self.return_line_owner(line) if fq_inst is not None: line_att = fq_inst.multibeam.raw_att if subset: # attributes are all the same across raw_ping datasets, just use the first line_start_time, line_end_time = fq_inst.multibeam.raw_ping[ 0].multibeam_files[line] line_att = slice_xarray_by_dim(line_att, dimname='time', start_time=line_start_time, end_time=line_end_time) return line_att
def test_slice_xarray_by_dim(self): data_arr = np.arange(100) test_data = xr.Dataset({'data': (['time'], data_arr)}, coords={'time': data_arr}) # this method lets you slice by dim values that are not in the actual data ans = slice_xarray_by_dim(test_data, dimname='time', start_time=28.7, end_time=29.4) assert ans['data'].values == 29 # can also slice with numbers beyond the data range, will clip to the limits ans = slice_xarray_by_dim(test_data, dimname='time', start_time=98.2, end_time=104) assert ans['data'].shape == (2, ) assert ans['data'].values[0] == 98 assert ans['data'].values[1] == 99
def set_filter_by_polygon(self, polygon: np.ndarray, geographic: bool = True): """ Using this method sets the ping_filter attribute so that you can now use the set_variable_by_filter and get_variable_by_filter methods to get other variables or set data within the polygon selection. This is an alternative to return_soundings_in_polygon that you can use if you want to set the filter without loading/returning a lot of data. Parameters ---------- polygon (N, 2) array of points that make up the selection polygon geographic If True, the coordinates provided are geographic (latitude/longitude) """ if 'horizontal_crs' not in self.fqpr.multibeam.raw_ping[ 0].attrs or 'z' not in self.fqpr.multibeam.raw_ping[ 0].variables.keys(): raise ValueError( 'Georeferencing has not been run yet, you must georeference before you can get soundings' ) geo_polygon, proj_polygon = self._build_polygons(polygon, geographic) self.ping_filter = [] polypath = mpl_path.Path(proj_polygon) for cnt, rp in enumerate(self.fqpr.multibeam.raw_ping): insidedata, intersectdata = filter_subset_by_polygon( rp, geo_polygon) base_filter = np.zeros(rp.x.shape[0] * rp.x.shape[1], dtype=bool) if insidedata or intersectdata: if insidedata: for mline, mdata in insidedata.items(): linemask, startidx, endidx, starttime, endtime = mdata base_filter[startidx:endidx] = linemask if intersectdata: for mline, mdata in intersectdata.items(): linemask, startidx, endidx, starttime, endtime = mdata # only brute force check those points that are in intersecting geohash regions slice_pd = slice_xarray_by_dim(rp, dimname='time', start_time=starttime, end_time=endtime) xintersect, yintersect = np.ravel( slice_pd.x), np.ravel(slice_pd.y) filt = polypath.contains_points( np.c_[xintersect[linemask], yintersect[linemask]]) base_filter[startidx:endidx][linemask] = filt self.ping_filter.append(base_filter)
def export_pings_to_file(self, output_directory: str = None, file_format: str = 'csv', csv_delimiter=' ', filter_by_detection: bool = True, z_pos_down: bool = True, export_by_identifiers: bool = True): """ Uses the output of georef_along_across_depth to build sounding exports. Currently you can export to csv, las or entwine file formats, see file_format argument. This will use all soundings in the dataset. If you export to las and want to retain rejected soundings under the noise classification, set filter_by_detection to False. Filters using the detectioninfo variable if present in multibeam and filter_by_detection is set. Set z_pos_down to False if you want positive up. Otherwise you get positive down. entwine export will build las first, and then entwine from las Parameters ---------- output_directory optional, destination directory for the xyz exports, otherwise will auto export next to converted data file_format optional, destination file format, default is csv file, options include ['csv', 'las', 'entwine'] csv_delimiter optional, if you choose file_format=csv, this will control the delimiter filter_by_detection optional, if True will only write soundings that are not rejected z_pos_down if True, will export soundings with z positive down (this is the native Kluster convention), only for csv export export_by_identifiers if True, will generate separate files for each combination of serial number/sector/frequency Returns ------- list list of written file paths """ chunksize, fldr_path, entwine_fldr_path, suffix = self._validate_export( output_directory, file_format) if not chunksize: return [] self.fqpr.logger.info( '****Exporting xyz data to {}****'.format(file_format)) starttime = perf_counter() chunk_count = 0 written_files = [] for rp in self.fqpr.multibeam.raw_ping: self.fqpr.logger.info('Operating on system {}'.format( rp.system_identifier)) # build list of lists for the mintime and maxtime (inclusive) for each chunk, each chunk will contain number of pings equal to chunksize chunktimes = [[ float(rp.time.isel(time=int(i * chunksize))), float( rp.time.isel( time=int(min((i + 1) * chunksize - 1, rp.time.size - 1)))) ] for i in range(int(np.ceil(rp.time.size / 75000)))] for mintime, maxtime in chunktimes: chunk_count += 1 if suffix: new_suffix = suffix + '_{}'.format(chunk_count) else: new_suffix = '{}'.format(chunk_count) new_files = None slice_rp = slice_xarray_by_dim(rp, dimname='time', start_time=mintime, end_time=maxtime) if file_format == 'csv': new_files = self._export_pings_to_csv( rp=slice_rp, output_directory=fldr_path, suffix=new_suffix, csv_delimiter=csv_delimiter, filter_by_detection=filter_by_detection, z_pos_down=z_pos_down, export_by_identifiers=export_by_identifiers) elif file_format in ['las', 'entwine']: new_files = self._export_pings_to_las( rp=slice_rp, output_directory=fldr_path, suffix=new_suffix, filter_by_detection=filter_by_detection, export_by_identifiers=export_by_identifiers) if new_files: written_files += new_files if file_format == 'entwine': build_entwine_points(fldr_path, entwine_fldr_path) written_files = [entwine_fldr_path] endtime = perf_counter() self.fqpr.logger.info( '****Exporting xyz data to {} complete: {}****\n'.format( file_format, seconds_to_formatted_string(int(endtime - starttime)))) return written_files
def filter_subset_by_polygon(ping_dataset: xr.Dataset, polygon: np.array): """ Given the provided polygon coordinates, return the part of the ping dataset that is completely within the polygon and the part of the dataset that intersects with the polygon Parameters ---------- ping_dataset one of the multibeam.raw_ping datasets, containing the ping variables polygon coordinates of a polygon ex: np.array([[lon1, lat1], [lon2, lat2], ...]), first and last coordinate must be the same Returns ------- xr.Dataset 1dim flattened bool mask for soundings in a geohash that is completely within the polygon xr.Dataset 1dim flattened bool mask for soundings in a geohash that intersects with the polygon """ if 'geohash' in ping_dataset.variables: if 'geohashes' in ping_dataset.attrs: inside_mask_lines = {} intersect_mask_lines = {} gprecision = int(ping_dataset.geohash.dtype.str[2:] ) # ex: dtype='|S7', precision=7 innerhash, intersecthash = polygon_to_geohashes( polygon, precision=gprecision) for mline, mhashes in ping_dataset.attrs['geohashes'].items(): if mline in ping_dataset.attrs[ 'multibeam_files']: # this line might not exist in the lookup if this is a subset linestart, lineend = ping_dataset.attrs['multibeam_files'][ mline][0], ping_dataset.attrs['multibeam_files'][ mline][1] mhashes = [x.encode() for x in mhashes] inside_geohash = [x for x in innerhash if x in mhashes] intersect_geohash = [ x for x in intersecthash if x in mhashes and x not in inside_geohash ] if inside_geohash or intersect_geohash: slice_pd = slice_xarray_by_dim(ping_dataset, dimname='time', start_time=linestart, end_time=lineend) ghash = np.ravel(slice_pd.geohash) filt_start = int( np.where(ping_dataset.time == slice_pd.time[0]) [0]) * ping_dataset.geohash.shape[1] filt_end = filt_start + ghash.shape[0] if inside_geohash: linemask = np.in1d(ghash, inside_geohash) inside_mask_lines[mline] = [ linemask, filt_start, filt_end, linestart, lineend ] if intersect_geohash: linemask = np.in1d(ghash, intersect_geohash) intersect_mask_lines[mline] = [ linemask, filt_start, filt_end, linestart, lineend ] return inside_mask_lines, intersect_mask_lines else: # treat dataset as if all the data needs to be brute force checked, i.e. all data intersects with polygon print( 'Warning: Unable to filter by polygon, cannot find the "geohashes" attribute in the ping record' ) return None, None else: print( 'Warning: Unable to filter by polygon, geohash variable not found') return None, None
def _soundings_by_poly(self, geo_polygon: np.ndarray, proj_polygon: np.ndarray, variable_selection: tuple, isolate_head: int = None): """ Return soundings and sounding attributes that are within the box formed by the provided coordinates. Parameters ---------- geo_polygon (N, 2) array of points that make up the selection polygon, (x, y) in Fqpr CRS proj_polygon (N, 2) array of points that make up the selection polygon, (longitude, latitude) in Fqpr CRS variable_selection list of the variables that you want to return for the soundings in the polygon isolate_head only used with return_soundings, if provided will only return soundings corresponding to this head index, 0 = port, 1 = starboard Returns ------- list list of numpy arrays for each variable in variable selection """ data_vars = [[] for _ in variable_selection] self.ping_filter = [] polypath = mpl_path.Path(proj_polygon) for rpcnt, rp in enumerate(self.fqpr.multibeam.raw_ping): if rp is None or 'z' not in rp or (isolate_head is not None and isolate_head != rpcnt): self.ping_filter.append(None) continue insidedata, intersectdata = filter_subset_by_polygon( rp, geo_polygon) base_filter = np.zeros(rp.x.shape[0] * rp.x.shape[1], dtype=bool) if insidedata or intersectdata: if insidedata: for mline, mdata in insidedata.items(): linemask, startidx, endidx, starttime, endtime = mdata slice_pd = slice_xarray_by_dim(rp, dimname='time', start_time=starttime, end_time=endtime) base_filter[startidx:endidx][linemask] = True stacked_slice = slice_pd.stack( {'sounding': ('time', 'beam')}) for cnt, dvarname in enumerate(variable_selection): if dvarname == 'head': data_vars[cnt].append( np.full(stacked_slice. beampointingangle[linemask].shape, rpcnt, dtype=np.int8)) else: data_vars[cnt].append( stacked_slice[dvarname][linemask].values) if intersectdata: for mline, mdata in intersectdata.items(): linemask, startidx, endidx, starttime, endtime = mdata # only brute force check those points that are in intersecting geohash regions slice_pd = slice_xarray_by_dim(rp, dimname='time', start_time=starttime, end_time=endtime) xintersect, yintersect = np.ravel( slice_pd.x), np.ravel(slice_pd.y) filt = polypath.contains_points( np.c_[xintersect[linemask], yintersect[linemask]]) base_filter[startidx:endidx][linemask] = filt stacked_slice = slice_pd.stack( {'sounding': ('time', 'beam')}) for cnt, dvarname in enumerate(variable_selection): if dvarname == 'head': data_vars[cnt].append( np.full(stacked_slice.beampointingangle[ linemask][filt].shape, rpcnt, dtype=np.int8)) else: data_vars[cnt].append(stacked_slice[dvarname] [linemask][filt].values) self.ping_filter.append(base_filter) return data_vars
def subset_by_times(self, time_segments: list): """ Only retain the portions of this Fqpr object that are within the time segments given in the list provided. The resultant datasets will be the portions of the datasets that lie within the list of time segments concatenated together. To return to the full original dataset, use restore_subset Parameters ---------- time_segments list of lists, where each sub list is the [start time of the segment in utc seconds, endtime of the segment in utc seconds] """ if not isinstance(time_segments, (list, tuple)) or not isinstance( time_segments[0], (list, tuple)) or len(time_segments[0]) != 2: raise ValueError( 'Expected a list of lists where each sub list is 2 elements long and contains start/end times in utc seconds' ) # ensure the time segments are sorted, so that the resultant concatenated datasets are in time order time_segments = sorted(time_segments, key=lambda x: x[0]) slice_raw_ping = [] for ra in self.fqpr.multibeam.raw_ping: final_ra = None for starttime, endtime in time_segments: slice_ra = slice_xarray_by_dim(ra, dimname='time', start_time=starttime, end_time=endtime) if final_ra: final_ra = xr.concat([final_ra, slice_ra], dim='time') else: final_ra = slice_ra slice_raw_ping.append(final_ra) if any([slce is None for slce in slice_raw_ping]): print('Warning: Subset by time found empty slice, skipping subset') return self._prepare_subset() self.subset_times = time_segments self.fqpr.multibeam.raw_ping = slice_raw_ping final_att = None for starttime, endtime in time_segments: slice_nav = slice_xarray_by_dim(self.fqpr.multibeam.raw_att, dimname='time', start_time=starttime, end_time=endtime) if final_att: final_att = xr.concat([final_att, slice_nav], dim='time') else: final_att = slice_nav self.fqpr.multibeam.raw_att = final_att # ensure the multibeam files that we say are in this dataset match the subset of files removelines = [] mfiles = deepcopy(self.fqpr.multibeam.raw_ping[0].multibeam_files) for mfil in mfiles.keys(): # any intersections with the given time segments? intersect = any([ t[1] >= mfiles[mfil][0] >= t[0] or t[1] >= mfiles[mfil][1] >= t[0] or (t[0] >= mfiles[mfil][0] and t[1] <= mfiles[mfil][1]) for t in time_segments ]) if not intersect: removelines.append(mfil) [mfiles.pop(mfil) for mfil in removelines] for ra in self.fqpr.multibeam.raw_ping: ra.attrs['multibeam_files'] = mfiles