Exemplo n.º 1
0
 def _unique_segments(self):
     """ Common code for unique, unique_count and value_counts"""
     # make dense column
     densecol = self.replace(data=self.to_dense_buffer(), mask=None)
     # sort the column
     sortcol, _ = densecol.sort_by_values(ascending=True)
     # find segments
     sortedvals = sortcol.to_gpu_array()
     segs, begins = cudautils.find_segments(sortedvals)
     return segs, sortedvals
Exemplo n.º 2
0
 def _unique_segments(self):
     """ Common code for unique, unique_count and value_counts"""
     # make dense column
     densecol = self.dropna()
     # sort the column
     sortcol, _ = densecol.sort_by_values()
     # find segments
     sortedvals = sortcol.data_array_view
     segs, begins = cudautils.find_segments(sortedvals)
     return segs, sortedvals
Exemplo n.º 3
0
    def _group_inner_levels(self, columns, rowidcol, segs, markers):
        """Group the second and onwards level.

        Parameters
        ----------
        columns : sequence[str]
            Group keys.  The order is important.
        rowid_column : str
            The name of the special column with the original rowid.
            It's internally used to determine the shuffling order.
        df : DataFrame
            The dataframe being grouped.
        segs : Series
            First level group begin offsets.

        Returns
        -------
        (sorted_keys, reordering_indices, segments)
            - sorted_keys : list[Series]
                List of sorted key columns.
                Column order is same as arg *columns*.
            - reordering_indices : device array
                The indices to gather on to shuffle the dataframe
                into the grouped seqence.
            - segments : Series
                Group begin offsets.
        """
        dsegs = segs.astype(dtype=np.int32).data.mem
        sorted_keys = []
        plan_cache = {}
        for col in columns:
            # Shuffle the key column according to the previous groups
            srkeys = self._df[col].take(rowidcol.to_gpu_array(),
                                        ignore_index=True)
            # Segmented sort on the key
            shuf = Column(Buffer(cudautils.arange(len(srkeys))))

            cache_key = (len(srkeys), srkeys.dtype, shuf.dtype)
            plan = plan_cache.get(cache_key)
            plan = apply_segsort(srkeys._column, shuf, dsegs, plan=plan)
            plan_cache[cache_key] = plan

            sorted_keys.append(srkeys)  # keep sorted key cols
            # Determine segments
            dsegs, markers = cudautils.find_segments(srkeys.to_gpu_array(),
                                                     dsegs,
                                                     markers=markers)
            # Shuffle
            rowidcol = rowidcol.take(shuf.to_gpu_array(), ignore_index=True)

        reordering_indices = rowidcol.to_gpu_array()
        return sorted_keys, reordering_indices, Series(dsegs)
Exemplo n.º 4
0
 def _find_segments(self):
     seg, markers = cudautils.find_segments(self.gpu_values)
     return NumericalColumn(data=Buffer(seg), dtype=seg.dtype), markers
Exemplo n.º 5
0
 def _find_segments(self):
     seg, markers = cudautils.find_segments(self.gpu_values)
     return (
         column.build_column(data=Buffer(seg), dtype=seg.dtype),
         markers,
     )