Python slice_xarray_by_dim示例，HSTB.kluster.xarray_helpers.slice_xarray_by_dim Python示例

示例#1

0

显示文件

文件： subset.py 项目： noaa-ocs-hydrography/kluster

    def subset_by_time(self, mintime: float = None, maxtime: float = None):
        """
        We save the line start/end time as an attribute within each raw_ping record.  Use this method to pull out
        just the data that is within the mintime/maxtime range (inclusive mintime, exclusive maxtime).  The class will
        then only have access to data within that time period.

        To return to the full original dataset, use restore_subset

        Parameters
        ----------
        mintime
            minimum time of the subset, if not provided and maxtime is, use the minimum time of the datasets
        maxtime
            maximum time of the subset, if not provided and mintime is, use the maximum time of the datasets

        Returns
        -------
        bool
            Error if True
        """

        if mintime is None and maxtime is not None:
            mintime = np.min(
                [rp.time.values[0] for rp in self.fqpr.multibeam.raw_ping])
        if maxtime is None and mintime is not None:
            maxtime = np.max(
                [rp.time.values[-1] for rp in self.fqpr.multibeam.raw_ping])
        if mintime is None and maxtime is None:
            raise ValueError(
                'subset_by_time: either mintime or maxtime must be provided to subset by time'
            )

        slice_raw_ping = []
        for ra in self.fqpr.multibeam.raw_ping:
            slice_ra = slice_xarray_by_dim(ra,
                                           dimname='time',
                                           start_time=mintime,
                                           end_time=maxtime)
            slice_raw_ping.append(slice_ra)
        if any([slce is None for slce in slice_raw_ping]):
            print(
                'Warning: Subset by time found empty slice at {}-{}, skipping subset'
                .format(mintime, maxtime))
            return True

        self._prepare_subset()
        self.subset_mintime = mintime
        self.subset_maxtime = maxtime
        self.fqpr.multibeam.raw_ping = slice_raw_ping
        self.fqpr.multibeam.raw_att = slice_xarray_by_dim(
            self.fqpr.multibeam.raw_att,
            dimname='time',
            start_time=mintime,
            end_time=maxtime)
        return False

示例#2

0

显示文件

文件： fqpr_project.py 项目： billshi-NOAA/kluster

    def build_raw_attitude_for_line(self, line: str, subset: bool = True):
        """
        With the given linename, return the raw_attitude dataset from the fqpr_generation.FQPR instance that contains
        the line.  If subset is true, the returned attitude will only be the raw attitude that covers the line.

        Parameters
        ----------
        line
            line name
        subset
            if True will only return the dataset cut to the min max time of the multibeam line

        Returns
        -------
        xr.Dataset
            the raw attitude either for the whole Fqpr instance that contains the line, or subset to the min/max time of the line
        """

        line_att = None
        fq_inst = self.return_line_owner(line)
        if fq_inst is not None:
            line_att = fq_inst.multibeam.raw_att
            if subset:
                # attributes are all the same across raw_ping datasets, just use the first
                line_start_time, line_end_time = fq_inst.multibeam.raw_ping[
                    0].multibeam_files[line]
                line_att = slice_xarray_by_dim(line_att,
                                               dimname='time',
                                               start_time=line_start_time,
                                               end_time=line_end_time)
        return line_att

示例#3

0

显示文件

文件： test_xarray_helpers.py 项目： noaa-ocs-hydrography/kluster

    def test_slice_xarray_by_dim(self):
        data_arr = np.arange(100)
        test_data = xr.Dataset({'data': (['time'], data_arr)},
                               coords={'time': data_arr})

        # this method lets you slice by dim values that are not in the actual data
        ans = slice_xarray_by_dim(test_data,
                                  dimname='time',
                                  start_time=28.7,
                                  end_time=29.4)
        assert ans['data'].values == 29

        # can also slice with numbers beyond the data range, will clip to the limits
        ans = slice_xarray_by_dim(test_data,
                                  dimname='time',
                                  start_time=98.2,
                                  end_time=104)
        assert ans['data'].shape == (2, )
        assert ans['data'].values[0] == 98
        assert ans['data'].values[1] == 99

示例#4

0

显示文件

文件： subset.py 项目： noaa-ocs-hydrography/kluster

    def set_filter_by_polygon(self,
                              polygon: np.ndarray,
                              geographic: bool = True):
        """
        Using this method sets the ping_filter attribute so that you can now use the set_variable_by_filter and
        get_variable_by_filter methods to get other variables or set data within the polygon selection.

        This is an alternative to return_soundings_in_polygon that you can use if you want to set the filter without
        loading/returning a lot of data.

        Parameters
        ----------
        polygon
            (N, 2) array of points that make up the selection polygon
        geographic
            If True, the coordinates provided are geographic (latitude/longitude)
        """

        if 'horizontal_crs' not in self.fqpr.multibeam.raw_ping[
                0].attrs or 'z' not in self.fqpr.multibeam.raw_ping[
                    0].variables.keys():
            raise ValueError(
                'Georeferencing has not been run yet, you must georeference before you can get soundings'
            )

        geo_polygon, proj_polygon = self._build_polygons(polygon, geographic)

        self.ping_filter = []
        polypath = mpl_path.Path(proj_polygon)
        for cnt, rp in enumerate(self.fqpr.multibeam.raw_ping):
            insidedata, intersectdata = filter_subset_by_polygon(
                rp, geo_polygon)
            base_filter = np.zeros(rp.x.shape[0] * rp.x.shape[1], dtype=bool)
            if insidedata or intersectdata:
                if insidedata:
                    for mline, mdata in insidedata.items():
                        linemask, startidx, endidx, starttime, endtime = mdata
                        base_filter[startidx:endidx] = linemask
                if intersectdata:
                    for mline, mdata in intersectdata.items():
                        linemask, startidx, endidx, starttime, endtime = mdata
                        # only brute force check those points that are in intersecting geohash regions
                        slice_pd = slice_xarray_by_dim(rp,
                                                       dimname='time',
                                                       start_time=starttime,
                                                       end_time=endtime)
                        xintersect, yintersect = np.ravel(
                            slice_pd.x), np.ravel(slice_pd.y)
                        filt = polypath.contains_points(
                            np.c_[xintersect[linemask], yintersect[linemask]])
                        base_filter[startidx:endidx][linemask] = filt
            self.ping_filter.append(base_filter)

示例#5

0

显示文件

文件： export.py 项目： noaa-ocs-hydrography/kluster

    def export_pings_to_file(self,
                             output_directory: str = None,
                             file_format: str = 'csv',
                             csv_delimiter=' ',
                             filter_by_detection: bool = True,
                             z_pos_down: bool = True,
                             export_by_identifiers: bool = True):
        """
        Uses the output of georef_along_across_depth to build sounding exports.  Currently you can export to csv, las or
        entwine file formats, see file_format argument.  This will use all soundings in the dataset.

        If you export to las and want to retain rejected soundings under the noise classification, set
        filter_by_detection to False.

        Filters using the detectioninfo variable if present in multibeam and filter_by_detection is set.  Set z_pos_down
        to False if you want positive up.  Otherwise you get positive down.

        entwine export will build las first, and then entwine from las

        Parameters
        ----------
        output_directory
            optional, destination directory for the xyz exports, otherwise will auto export next to converted data
        file_format
            optional, destination file format, default is csv file, options include ['csv', 'las', 'entwine']
        csv_delimiter
            optional, if you choose file_format=csv, this will control the delimiter
        filter_by_detection
            optional, if True will only write soundings that are not rejected
        z_pos_down
            if True, will export soundings with z positive down (this is the native Kluster convention), only for csv
            export
        export_by_identifiers
            if True, will generate separate files for each combination of serial number/sector/frequency

        Returns
        -------
        list
            list of written file paths
        """

        chunksize, fldr_path, entwine_fldr_path, suffix = self._validate_export(
            output_directory, file_format)
        if not chunksize:
            return []

        self.fqpr.logger.info(
            '****Exporting xyz data to {}****'.format(file_format))
        starttime = perf_counter()
        chunk_count = 0
        written_files = []
        for rp in self.fqpr.multibeam.raw_ping:
            self.fqpr.logger.info('Operating on system {}'.format(
                rp.system_identifier))
            # build list of lists for the mintime and maxtime (inclusive) for each chunk, each chunk will contain number of pings equal to chunksize
            chunktimes = [[
                float(rp.time.isel(time=int(i * chunksize))),
                float(
                    rp.time.isel(
                        time=int(min((i + 1) * chunksize - 1, rp.time.size -
                                     1))))
            ] for i in range(int(np.ceil(rp.time.size / 75000)))]
            for mintime, maxtime in chunktimes:
                chunk_count += 1
                if suffix:
                    new_suffix = suffix + '_{}'.format(chunk_count)
                else:
                    new_suffix = '{}'.format(chunk_count)
                new_files = None
                slice_rp = slice_xarray_by_dim(rp,
                                               dimname='time',
                                               start_time=mintime,
                                               end_time=maxtime)

                if file_format == 'csv':
                    new_files = self._export_pings_to_csv(
                        rp=slice_rp,
                        output_directory=fldr_path,
                        suffix=new_suffix,
                        csv_delimiter=csv_delimiter,
                        filter_by_detection=filter_by_detection,
                        z_pos_down=z_pos_down,
                        export_by_identifiers=export_by_identifiers)
                elif file_format in ['las', 'entwine']:
                    new_files = self._export_pings_to_las(
                        rp=slice_rp,
                        output_directory=fldr_path,
                        suffix=new_suffix,
                        filter_by_detection=filter_by_detection,
                        export_by_identifiers=export_by_identifiers)
                if new_files:
                    written_files += new_files
            if file_format == 'entwine':
                build_entwine_points(fldr_path, entwine_fldr_path)
                written_files = [entwine_fldr_path]

        endtime = perf_counter()
        self.fqpr.logger.info(
            '****Exporting xyz data to {} complete: {}****\n'.format(
                file_format,
                seconds_to_formatted_string(int(endtime - starttime))))

        return written_files

示例#6

0

显示文件

文件： subset.py 项目： noaa-ocs-hydrography/kluster

def filter_subset_by_polygon(ping_dataset: xr.Dataset, polygon: np.array):
    """
    Given the provided polygon coordinates, return the part of the ping dataset that is completely within
    the polygon and the part of the dataset that intersects with the polygon

    Parameters
    ----------
    ping_dataset
        one of the multibeam.raw_ping datasets, containing the ping variables
    polygon
        coordinates of a polygon ex: np.array([[lon1, lat1], [lon2, lat2], ...]), first and last coordinate
        must be the same

    Returns
    -------
    xr.Dataset
        1dim flattened bool mask for soundings in a geohash that is completely within the polygon
    xr.Dataset
        1dim flattened bool mask for soundings in a geohash that intersects with the polygon
    """

    if 'geohash' in ping_dataset.variables:
        if 'geohashes' in ping_dataset.attrs:
            inside_mask_lines = {}
            intersect_mask_lines = {}
            gprecision = int(ping_dataset.geohash.dtype.str[2:]
                             )  # ex: dtype='|S7', precision=7
            innerhash, intersecthash = polygon_to_geohashes(
                polygon, precision=gprecision)
            for mline, mhashes in ping_dataset.attrs['geohashes'].items():
                if mline in ping_dataset.attrs[
                        'multibeam_files']:  # this line might not exist in the lookup if this is a subset
                    linestart, lineend = ping_dataset.attrs['multibeam_files'][
                        mline][0], ping_dataset.attrs['multibeam_files'][
                            mline][1]
                    mhashes = [x.encode() for x in mhashes]
                    inside_geohash = [x for x in innerhash if x in mhashes]
                    intersect_geohash = [
                        x for x in intersecthash
                        if x in mhashes and x not in inside_geohash
                    ]
                    if inside_geohash or intersect_geohash:
                        slice_pd = slice_xarray_by_dim(ping_dataset,
                                                       dimname='time',
                                                       start_time=linestart,
                                                       end_time=lineend)
                        ghash = np.ravel(slice_pd.geohash)
                        filt_start = int(
                            np.where(ping_dataset.time == slice_pd.time[0])
                            [0]) * ping_dataset.geohash.shape[1]
                        filt_end = filt_start + ghash.shape[0]
                        if inside_geohash:
                            linemask = np.in1d(ghash, inside_geohash)
                            inside_mask_lines[mline] = [
                                linemask, filt_start, filt_end, linestart,
                                lineend
                            ]
                        if intersect_geohash:
                            linemask = np.in1d(ghash, intersect_geohash)
                            intersect_mask_lines[mline] = [
                                linemask, filt_start, filt_end, linestart,
                                lineend
                            ]
            return inside_mask_lines, intersect_mask_lines
        else:  # treat dataset as if all the data needs to be brute force checked, i.e. all data intersects with polygon
            print(
                'Warning: Unable to filter by polygon, cannot find the "geohashes" attribute in the ping record'
            )
            return None, None
    else:
        print(
            'Warning: Unable to filter by polygon, geohash variable not found')
        return None, None

示例#7

0

显示文件

文件： subset.py 项目： noaa-ocs-hydrography/kluster

    def _soundings_by_poly(self,
                           geo_polygon: np.ndarray,
                           proj_polygon: np.ndarray,
                           variable_selection: tuple,
                           isolate_head: int = None):
        """
        Return soundings and sounding attributes that are within the box formed by the provided coordinates.

        Parameters
        ----------
        geo_polygon
            (N, 2) array of points that make up the selection polygon, (x, y) in Fqpr CRS
        proj_polygon
            (N, 2) array of points that make up the selection polygon, (longitude, latitude) in Fqpr CRS
        variable_selection
            list of the variables that you want to return for the soundings in the polygon
        isolate_head
            only used with return_soundings, if provided will only return soundings corresponding to this head index,
            0 = port, 1 = starboard

        Returns
        -------
        list
            list of numpy arrays for each variable in variable selection
        """

        data_vars = [[] for _ in variable_selection]
        self.ping_filter = []
        polypath = mpl_path.Path(proj_polygon)
        for rpcnt, rp in enumerate(self.fqpr.multibeam.raw_ping):
            if rp is None or 'z' not in rp or (isolate_head is not None
                                               and isolate_head != rpcnt):
                self.ping_filter.append(None)
                continue
            insidedata, intersectdata = filter_subset_by_polygon(
                rp, geo_polygon)
            base_filter = np.zeros(rp.x.shape[0] * rp.x.shape[1], dtype=bool)
            if insidedata or intersectdata:
                if insidedata:
                    for mline, mdata in insidedata.items():
                        linemask, startidx, endidx, starttime, endtime = mdata
                        slice_pd = slice_xarray_by_dim(rp,
                                                       dimname='time',
                                                       start_time=starttime,
                                                       end_time=endtime)
                        base_filter[startidx:endidx][linemask] = True
                        stacked_slice = slice_pd.stack(
                            {'sounding': ('time', 'beam')})
                        for cnt, dvarname in enumerate(variable_selection):
                            if dvarname == 'head':
                                data_vars[cnt].append(
                                    np.full(stacked_slice.
                                            beampointingangle[linemask].shape,
                                            rpcnt,
                                            dtype=np.int8))
                            else:
                                data_vars[cnt].append(
                                    stacked_slice[dvarname][linemask].values)
                if intersectdata:
                    for mline, mdata in intersectdata.items():
                        linemask, startidx, endidx, starttime, endtime = mdata
                        # only brute force check those points that are in intersecting geohash regions
                        slice_pd = slice_xarray_by_dim(rp,
                                                       dimname='time',
                                                       start_time=starttime,
                                                       end_time=endtime)
                        xintersect, yintersect = np.ravel(
                            slice_pd.x), np.ravel(slice_pd.y)
                        filt = polypath.contains_points(
                            np.c_[xintersect[linemask], yintersect[linemask]])
                        base_filter[startidx:endidx][linemask] = filt
                        stacked_slice = slice_pd.stack(
                            {'sounding': ('time', 'beam')})
                        for cnt, dvarname in enumerate(variable_selection):
                            if dvarname == 'head':
                                data_vars[cnt].append(
                                    np.full(stacked_slice.beampointingangle[
                                        linemask][filt].shape,
                                            rpcnt,
                                            dtype=np.int8))
                            else:
                                data_vars[cnt].append(stacked_slice[dvarname]
                                                      [linemask][filt].values)
            self.ping_filter.append(base_filter)
        return data_vars

示例#8

0

显示文件

文件： subset.py 项目： noaa-ocs-hydrography/kluster

    def subset_by_times(self, time_segments: list):
        """
        Only retain the portions of this Fqpr object that are within the time segments given in the list provided.  The
        resultant datasets will be the portions of the datasets that lie within the list of time segments concatenated
        together.

        To return to the full original dataset, use restore_subset

        Parameters
        ----------
        time_segments
            list of lists, where each sub list is the [start time of the segment in utc seconds, endtime of the segment
            in utc seconds]
        """

        if not isinstance(time_segments, (list, tuple)) or not isinstance(
                time_segments[0], (list, tuple)) or len(time_segments[0]) != 2:
            raise ValueError(
                'Expected a list of lists where each sub list is 2 elements long and contains start/end times in utc seconds'
            )

        # ensure the time segments are sorted, so that the resultant concatenated datasets are in time order
        time_segments = sorted(time_segments, key=lambda x: x[0])

        slice_raw_ping = []
        for ra in self.fqpr.multibeam.raw_ping:
            final_ra = None
            for starttime, endtime in time_segments:
                slice_ra = slice_xarray_by_dim(ra,
                                               dimname='time',
                                               start_time=starttime,
                                               end_time=endtime)
                if final_ra:
                    final_ra = xr.concat([final_ra, slice_ra], dim='time')
                else:
                    final_ra = slice_ra
            slice_raw_ping.append(final_ra)

        if any([slce is None for slce in slice_raw_ping]):
            print('Warning: Subset by time found empty slice, skipping subset')
            return

        self._prepare_subset()
        self.subset_times = time_segments
        self.fqpr.multibeam.raw_ping = slice_raw_ping

        final_att = None
        for starttime, endtime in time_segments:
            slice_nav = slice_xarray_by_dim(self.fqpr.multibeam.raw_att,
                                            dimname='time',
                                            start_time=starttime,
                                            end_time=endtime)
            if final_att:
                final_att = xr.concat([final_att, slice_nav], dim='time')
            else:
                final_att = slice_nav
        self.fqpr.multibeam.raw_att = final_att

        # ensure the multibeam files that we say are in this dataset match the subset of files
        removelines = []
        mfiles = deepcopy(self.fqpr.multibeam.raw_ping[0].multibeam_files)
        for mfil in mfiles.keys():
            # any intersections with the given time segments?
            intersect = any([
                t[1] >= mfiles[mfil][0] >= t[0]
                or t[1] >= mfiles[mfil][1] >= t[0]
                or (t[0] >= mfiles[mfil][0] and t[1] <= mfiles[mfil][1])
                for t in time_segments
            ])
            if not intersect:
                removelines.append(mfil)
        [mfiles.pop(mfil) for mfil in removelines]
        for ra in self.fqpr.multibeam.raw_ping:
            ra.attrs['multibeam_files'] = mfiles