示例#1
0
    def eval(self, coordinates, output=None):
        """Evalutes this nodes using the supplied coordinates. 
        
        Parameters
        ----------
        coordinates : podpac.Coordinates
            {requested_coordinates}
        output : podpac.UnitsDataArray, optional
            {eval_output}
        
        Returns
        -------
        {eval_return}
        """

        self._requested_coordinates = coordinates

        inputs = {}
        for key, node in self._inputs.items():
            inputs[key] = node.eval(coordinates)

        # accumulate output coordinates
        coords_list = [
            Coordinates.from_xarray(a.coords) for a in inputs.values()
        ]
        output_coordinates = union([coordinates] + coords_list)

        result = self.algorithm(inputs)
        if isinstance(result, np.ndarray):
            if output is None:
                output = self.create_output_array(output_coordinates,
                                                  data=result)
            else:
                output.data[:] = result
        elif isinstance(result, xr.DataArray):
            if output is None:
                output = self.create_output_array(Coordinates.from_xarray(
                    result.coords),
                                                  data=result.data)
            else:
                output[:] = result.data
        elif isinstance(result, UnitsDataArray):
            if output is None:
                output = result
            else:
                output[:] = result
        else:
            raise NodeException

        return output
示例#2
0
    def composite(self, coordinates, data_arrays, result=None):
        """Composites data_arrays in order that they appear. Once a request contains no nans, the result is returned.

        Parameters
        ----------
        coordinates : :class:`podpac.Coordinates`
            {requested_coordinates}
        data_arrays : generator
            Evaluated source data, in the same order as the sources.
        result : podpac.UnitsDataArray, optional
            {eval_output}

        Returns
        -------
        {eval_return} This composites the sources together until there are no nans or no more sources.
        """

        # TODO: Fix boundary information on the combined data arrays
        res = next(data_arrays)
        for arr in data_arrays:
            res = res.combine_first(arr)
        res = UnitsDataArray(res)
        coords = Coordinates.from_xarray(res.coords)
        res.attrs["bounds"] = coords.bounds
        if result is not None:
            result.data[:] = res.transponse(*result.dims).data
            return result
        return res
示例#3
0
    def eval(self, coordinates, **kwargs):
        output = kwargs.get("output")
        if self.zarr_shape is None:
            self._shape = coordinates.shape
        else:
            self._shape = tuple(self.zarr_shape.values())

        # initialize zarr file
        if self.zarr_chunks is None:
            chunks = [self.chunks[d] for d in coordinates]
        else:
            chunks = [self.zarr_chunks[d] for d in coordinates]
        self._chunks = chunks
        zf, data_key, zn = self.initialize_zarr_array(self._shape, chunks)
        self.dataset = zf
        self.zarr_data_key = data_key
        self.zarr_node = zn
        zn.keys

        # eval
        _log.debug("Starting parallel eval.")
        missing_dims = [
            d for d in coordinates.dims if d not in self.chunks.keys()
        ]
        if self.zarr_coordinates is not None:
            missing_dims = missing_dims + [
                d for d in self.zarr_coordinates.dims if d not in missing_dims
            ]
            set_coords = merge_dims(
                [coordinates.drop(missing_dims), self.zarr_coordinates])
        else:
            set_coords = coordinates.drop(missing_dims)
        set_coords.transpose(*coordinates.dims)

        self.set_zarr_coordinates(set_coords, data_key)
        if self.list_dir:
            dk = data_key
            if isinstance(dk, list):
                dk = dk[0]
            self._list_dir = self.zarr_node.list_dir(dk)

        output = super(ZarrOutputMixin, self).eval(coordinates, output=output)

        # fill in the coordinates, this is guaranteed to be correct even if the user messed up.
        if output is not None:
            self.set_zarr_coordinates(Coordinates.from_xarray(output),
                                      data_key)
        else:
            return zf

        return output
示例#4
0
    def eval(self, coordinates, **kwargs):
        """
        Wraps the super Node.eval method in order to cache with the correct coordinates.

        The output is independent of the crs or any extra dimensions, so this transforms and removes extra dimensions
        before caching in the super eval method.
        """

        # check for missing dimensions
        for c in self.coordinates.values():
            if isinstance(c, Coordinates1d):
                if c.name not in coordinates.udims:
                    raise ValueError(
                        "Cannot evaluate these coordinates, missing dim '%s'" %
                        c.name)
            elif isinstance(c, StackedCoordinates):
                if all(dim not in coordinates.udims for dim in c.udims):
                    raise ValueError(
                        "Cannot evaluate these coordinates, missing at least one dim in '%s'"
                        % c.name)

        # store original requested coordinates
        requested_coordinates = coordinates

        # remove extra dimensions
        extra = [
            c.name for c in coordinates.values()
            if (isinstance(c, Coordinates1d) and c.name not in self.udims) or (
                isinstance(c, StackedCoordinates) and all(dim not in self.udims
                                                          for dim in c.dims))
        ]
        coordinates = coordinates.drop(extra)

        # transform coordinates into native crs if different
        if coordinates.crs.lower() != self._crs.lower():
            coordinates = coordinates.transform(self._crs)

        # note: super().eval (not self._eval)
        # This call already sub-selects an 'output' if specified
        output = super().eval(coordinates, **kwargs)

        # transform back to requested coordinates, if necessary
        if coordinates.crs.lower() != requested_coordinates.crs.lower():
            # need to use the already-selected output, if it exists
            try:
                outputs = output["output"].data.tolist()
                if isinstance(outputs, str):
                    # this will pass outputs=None to the create function, which is what we want in this case
                    # which is when it is a single output (not a dim)
                    outputs = []
            except KeyError:
                # 'output' does not exist in the data, so outputs should be empty
                outputs = []
            except Exception as e:
                outputs = self.outputs
            coords = Coordinates.from_xarray(output,
                                             crs=output.attrs.get("crs", None))
            output = self.create_output_array(coords.transform(
                requested_coordinates.crs),
                                              data=output.data,
                                              outputs=outputs)

        if settings["DEBUG"]:
            self._requested_coordinates = requested_coordinates

        return output
示例#5
0
    def _eval(self, coordinates, output=None, _selector=None):
        """Evalutes this nodes using the supplied coordinates.

        Parameters
        ----------
        coordinates : podpac.Coordinates
            {requested_coordinates}
        output : podpac.UnitsDataArray, optional
            {eval_output}
        _selector: callable(coordinates, request_coordinates)
            {eval_selector}

        Returns
        -------
        {eval_return}
        """

        self._requested_coordinates = coordinates

        inputs = {}

        if settings["MULTITHREADING"]:
            n_threads = thread_manager.request_n_threads(len(self.inputs))
            if n_threads == 1:
                thread_manager.release_n_threads(n_threads)
        else:
            n_threads = 0

        if settings["MULTITHREADING"] and n_threads > 1:
            # Create a function for each thread to execute asynchronously
            def f(node):
                return node.eval(coordinates, _selector=_selector)

            # Create pool of size n_threads, note, this may be created from a sub-thread (i.e. not the main thread)
            pool = thread_manager.get_thread_pool(processes=n_threads)

            # Evaluate nodes in parallel/asynchronously
            results = [pool.apply_async(f, [node]) for node in self.inputs.values()]

            # Collect the results in dictionary
            for key, res in zip(self.inputs.keys(), results):
                inputs[key] = res.get()

            # This prevents any more tasks from being submitted to the pool, and will close the workers once done
            pool.close()

            # Release these number of threads back to the thread pool
            thread_manager.release_n_threads(n_threads)
            self._multi_threaded = True
        else:
            # Evaluate nodes in serial
            for key, node in self.inputs.items():
                inputs[key] = node.eval(coordinates, output=output, _selector=_selector)
            self._multi_threaded = False

        result = self.algorithm(inputs, coordinates)

        if not isinstance(result, xr.DataArray):
            raise NodeException("algorithm returned unsupported type '%s'" % type(result))

        if "output" in result.dims and self.output is not None:
            result = result.sel(output=self.output)

        if output is not None:
            missing = [dim for dim in result.dims if dim not in output.dims]
            if any(missing):
                raise NodeException("provided output is missing dims %s" % missing)

            output_dims = output.dims
            output = output.transpose(..., *result.dims)
            output[:] = result.data
            output = output.transpose(*output_dims)
        elif isinstance(result, UnitsDataArray):
            output = result
        else:
            output_coordinates = Coordinates.from_xarray(result)
            output = self.create_output_array(output_coordinates, data=result.data)

        return output
示例#6
0
    def eval(self, coordinates, **kwargs):
        output = kwargs.get("output")
        # Make a thread pool to manage queue
        pool = ThreadPool(processes=self.number_of_workers)

        if output is None and self.fill_output:
            output = self.create_output_array(coordinates)

        shape = []
        for d in coordinates.dims:
            if d in self.chunks:
                shape.append(self.chunks[d])
            else:
                shape.append(coordinates[d].size)

        results = []
        #         inputs = []
        i = 0
        for coords, slc in coordinates.iterchunks(shape, True):
            #             inputs.append(coords)
            if i < self.start_i:
                _log.debug(
                    "Skipping {} since it is less than self.start_i ({})".
                    format(i, self.start_i))
                i += 1
                continue

            out = None
            if self.fill_output and output is not None:
                out = output[slc]
            with self._lock:
                _log.debug("Added {} to worker pool".format(i))
                _log.debug("Node eval with coords: {}, {}".format(slc, coords))
                results.append(
                    pool.apply_async(self.eval_source, [coords, slc, out, i]))
            i += 1

        _log.info("Added all chunks to worker pool. Now waiting for results.")
        start_time = time.time()
        for i, res in enumerate(results):
            #             _log.debug('Waiting for results: {} {}'.format(i, inputs[i]))
            dt = str(
                np.timedelta64(int(1000 * (time.time() - start_time)),
                               "ms").astype(object))
            _log.info("({}): Waiting for results: {} / {}".format(
                dt, i + 1, len(results)))

            # Try to get the results / wait for the results
            try:
                o, slc = res.get()
            except Exception as e:
                o = None
                slc = None
                self.errors.append((i, res, e))
                dt = str(
                    np.timedelta64(int(1000 * (time.time() - start_time)),
                                   "ms").astype(object))
                _log.warning("({}) {} failed with exception {}".format(
                    dt, i, e))

            dt = str(
                np.timedelta64(int(1000 * (time.time() - start_time)),
                               "ms").astype(object))
            _log.info("({}) Finished result: {} / {}".format(
                time.time() - start_time, i + 1, len(results)))

            # Fill output
            if self.fill_output:
                if output is None:
                    missing_dims = [
                        d for d in coordinates.dims
                        if d not in self.chunks.keys()
                    ]
                    coords = coordinates.drop(missing_dims)
                    missing_coords = Coordinates.from_xarray(o).drop(
                        list(self.chunks.keys()))
                    coords = merge_dims([coords, missing_coords])
                    coords = coords.transpose(*coordinates.dims)
                    output = self.create_output_array(coords)
                output[slc] = o

        _log.info("Completed parallel execution.")
        pool.close()

        return output
示例#7
0
    def _eval(self, coordinates, output=None, _selector=None):
        """Evalutes this nodes using the supplied coordinates.

        Parameters
        ----------
        coordinates : podpac.Coordinates
            {requested_coordinates}
        output : podpac.UnitsDataArray, optional
            {eval_output}
        _selector: callable(coordinates, request_coordinates)
            {eval_selector}

        Returns
        -------
        {eval_return}
        """

        self._requested_coordinates = coordinates

        inputs = {}

        if settings["MULTITHREADING"]:
            n_threads = thread_manager.request_n_threads(len(self.inputs))
            if n_threads == 1:
                thread_manager.release_n_threads(n_threads)
        else:
            n_threads = 0

        if settings["MULTITHREADING"] and n_threads > 1:
            # Create a function for each thread to execute asynchronously
            def f(node):
                return node.eval(coordinates, _selector=_selector)

            # Create pool of size n_threads, note, this may be created from a sub-thread (i.e. not the main thread)
            pool = thread_manager.get_thread_pool(processes=n_threads)

            # Evaluate nodes in parallel/asynchronously
            results = [
                pool.apply_async(f, [node]) for node in self.inputs.values()
            ]

            # Collect the results in dictionary
            for key, res in zip(self.inputs.keys(), results):
                inputs[key] = res.get()

            # This prevents any more tasks from being submitted to the pool, and will close the workers once done
            pool.close()

            # Release these number of threads back to the thread pool
            thread_manager.release_n_threads(n_threads)
            self._multi_threaded = True
        else:
            # Evaluate nodes in serial
            for key, node in self.inputs.items():
                inputs[key] = node.eval(coordinates,
                                        output=output,
                                        _selector=_selector)
            self._multi_threaded = False

        # accumulate output coordinates
        coords_list = [
            Coordinates.from_xarray(a.coords, crs=a.attrs.get("crs"))
            for a in inputs.values()
        ]
        output_coordinates = union([coordinates] + coords_list)

        result = self.algorithm(inputs)
        if isinstance(result, UnitsDataArray):
            if output is None:
                output = result
            else:
                output[:] = result.data[:]
        elif isinstance(result, xr.DataArray):
            if output is None:
                output = self.create_output_array(Coordinates.from_xarray(
                    result.coords, crs=result.attrs.get("crs")),
                                                  data=result.data)
            else:
                output[:] = result.data
        elif isinstance(result, np.ndarray):
            if output is None:
                output = self.create_output_array(output_coordinates,
                                                  data=result)
            else:
                output.data[:] = result
        else:
            raise NodeException

        if "output" in output.dims and self.output is not None:
            output = output.sel(output=self.output)

        return output
示例#8
0
    def _eval(self, coordinates, output=None, _selector=None):
        """Evaluates this node using the supplied coordinates.

        The coordinates are mapped to the requested coordinates, interpolated if necessary, and set to
        `_requested_source_coordinates` with associated index `_requested_source_coordinates_index`. The requested
        source coordinates and index are passed to `get_data()` returning the source data at the
        coordinatesset to `_requested_source_data`. Finally `_requested_source_data` is interpolated
        using the `interpolate` method and set to the `output` attribute of the node.


        Parameters
        ----------
        coordinates : :class:`podpac.Coordinates`
            {requested_coordinates}

            An exception is raised if the requested coordinates are missing dimensions in the DataSource.
            Extra dimensions in the requested coordinates are dropped.
        output : :class:`podpac.UnitsDataArray`, optional
            {eval_output}
        _selector :
            {eval_selector}

        Returns
        -------
        {eval_return}

        Raises
        ------
        ValueError
            Cannot evaluate these coordinates
        """

        _logger.debug("Evaluating {} data source".format(self.__class__.__name__))

        # store requested coordinates for debugging
        if settings["DEBUG"]:
            self._original_requested_coordinates = coordinates

        # store input coordinates to evaluated coordinates
        self._evaluated_coordinates = deepcopy(coordinates)

        # reset interpolation
        self._set_interpolation()

        selector = self._interpolation.select_coordinates

        source_out = self._source_eval(self._evaluated_coordinates, selector)
        source_coords = Coordinates.from_xarray(source_out.coords, crs=source_out.crs)

        # Drop extra coordinates
        extra_dims = [d for d in coordinates.udims if d not in source_coords.udims]
        coordinates = coordinates.drop(extra_dims)

        # Transform so that interpolation happens on the source data coordinate system
        if source_coords.crs.lower() != coordinates.crs.lower():
            coordinates = coordinates.transform(source_coords.crs)

        if output is None:
            if "output" in source_out.dims:
                self.set_trait("outputs", source_out.coords["output"].data.tolist())
            output = self.create_output_array(coordinates)

        if source_out.size == 0:  # short cut
            return output

        # interpolate data into output
        output = self._interpolation.interpolate(source_coords, source_out, coordinates, output)

        # if requested crs is differented than coordinates,
        # fabricate a new output with the original coordinates and new values
        if self._evaluated_coordinates.crs != coordinates.crs:
            output = self.create_output_array(self._evaluated_coordinates.drop(extra_dims), data=output[:].values)

        # save output to private for debugging
        if settings["DEBUG"]:
            self._output = output
            self._source_xr = source_out

        return output