Пример #1
0
    def test_datetimetz_dtype(self):
        dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern')
        assert find_common_type([dtype, dtype]) == 'datetime64[ns, US/Eastern]'

        for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'),
                       np.dtype('datetime64[ns]'), np.object, np.int64]:
            assert find_common_type([dtype, dtype2]) == np.object
            assert find_common_type([dtype2, dtype]) == np.object
Пример #2
0
    def test_period_dtype(self):
        dtype = PeriodDtype(freq='D')
        assert find_common_type([dtype, dtype]) == 'period[D]'

        for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'),
                       PeriodDtype(freq='2D'), PeriodDtype(freq='H'),
                       np.dtype('datetime64[ns]'), np.object, np.int64]:
            assert find_common_type([dtype, dtype2]) == np.object
            assert find_common_type([dtype2, dtype]) == np.object
Пример #3
0
        def func(self, other, sort=True):
            other = self._as_like_interval_index(other)

            # GH 19016: ensure set op will not return a prohibited dtype
            subtypes = [self.dtype.subtype, other.dtype.subtype]
            common_subtype = find_common_type(subtypes)
            if is_object_dtype(common_subtype):
                msg = ('can only do {op} between two IntervalIndex '
                       'objects that have compatible dtypes')
                raise TypeError(msg.format(op=op_name))

            if op_name == 'difference':
                result = getattr(self._multiindex, op_name)(other._multiindex,
                                                            sort)
            else:
                result = getattr(self._multiindex, op_name)(other._multiindex)
            result_name = get_op_result_name(self, other)

            # GH 19101: ensure empty results have correct dtype
            if result.empty:
                result = result.values.astype(self.dtype.subtype)
            else:
                result = result.values

            return type(self).from_tuples(result, closed=self.closed,
                                          name=result_name)
Пример #4
0
        def func(self, other, sort=sort):
            self._assert_can_do_setop(other)
            other = ensure_index(other)
            if not isinstance(other, IntervalIndex):
                result = getattr(self.astype(object), op_name)(other)
                if op_name in ('difference',):
                    result = result.astype(self.dtype)
                return result
            elif self.closed != other.closed:
                msg = ('can only do set operations between two IntervalIndex '
                       'objects that are closed on the same side')
                raise ValueError(msg)

            # GH 19016: ensure set op will not return a prohibited dtype
            subtypes = [self.dtype.subtype, other.dtype.subtype]
            common_subtype = find_common_type(subtypes)
            if is_object_dtype(common_subtype):
                msg = ('can only do {op} between two IntervalIndex '
                       'objects that have compatible dtypes')
                raise TypeError(msg.format(op=op_name))

            result = getattr(self._multiindex, op_name)(other._multiindex,
                                                        sort=sort)
            result_name = get_op_result_name(self, other)

            # GH 19101: ensure empty results have correct dtype
            if result.empty:
                result = result.values.astype(self.dtype.subtype)
            else:
                result = result.values

            return type(self).from_tuples(result, closed=self.closed,
                                          name=result_name)
Пример #5
0
def _sparse_array_op(left, right, op, name):
    if name.startswith('__'):
        # For lookups in _libs.sparse we need non-dunder op name
        name = name[2:-2]

    # dtype used to find corresponding sparse method
    if not is_dtype_equal(left.dtype, right.dtype):
        dtype = find_common_type([left.dtype, right.dtype])
        left = left.astype(dtype)
        right = right.astype(dtype)
    else:
        dtype = left.dtype

    # dtype the result must have
    result_dtype = None

    if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
        with np.errstate(all='ignore'):
            result = op(left.get_values(), right.get_values())
            fill = op(_get_fill(left), _get_fill(right))

        if left.sp_index.ngaps == 0:
            index = left.sp_index
        else:
            index = right.sp_index
    elif left.sp_index.equals(right.sp_index):
        with np.errstate(all='ignore'):
            result = op(left.sp_values, right.sp_values)
            fill = op(_get_fill(left), _get_fill(right))
        index = left.sp_index
    else:
        if name[0] == 'r':
            left, right = right, left
            name = name[1:]

        if name in ('and', 'or') and dtype == 'bool':
            opname = 'sparse_{name}_uint8'.format(name=name)
            # to make template simple, cast here
            left_sp_values = left.sp_values.view(np.uint8)
            right_sp_values = right.sp_values.view(np.uint8)
            result_dtype = np.bool
        else:
            opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
            left_sp_values = left.sp_values
            right_sp_values = right.sp_values

        sparse_op = getattr(splib, opname)
        with np.errstate(all='ignore'):
            result, index, fill = sparse_op(left_sp_values, left.sp_index,
                                            left.fill_value, right_sp_values,
                                            right.sp_index, right.fill_value)

    if result_dtype is None:
        result_dtype = result.dtype

    return _wrap_result(name, result, index, fill, dtype=result_dtype)
Пример #6
0
    def to_coo(self):
        """
        Return the contents of the frame as a sparse SciPy COO matrix.

        .. versionadded:: 0.20.0

        Returns
        -------
        coo_matrix : scipy.sparse.spmatrix
            If the caller is heterogeneous and contains booleans or objects,
            the result will be of dtype=object. See Notes.

        Notes
        -----
        The dtype will be the lowest-common-denominator type (implicit
        upcasting); that is to say if the dtypes (even of numeric types)
        are mixed, the one that accommodates all will be chosen.

        e.g. If the dtypes are float16 and float32, dtype will be upcast to
        float32. By numpy.find_common_type convention, mixing int64 and
        and uint64 will result in a float64 dtype.
        """
        try:
            from scipy.sparse import coo_matrix
        except ImportError:
            raise ImportError('Scipy is not installed')

        dtype = find_common_type(self.dtypes)
        if isinstance(dtype, SparseDtype):
            dtype = dtype.subtype

        cols, rows, datas = [], [], []
        for col, name in enumerate(self):
            s = self[name]
            row = s.sp_index.to_int_index().indices
            cols.append(np.repeat(col, len(row)))
            rows.append(row)
            datas.append(s.sp_values.astype(dtype, copy=False))

        cols = np.concatenate(cols)
        rows = np.concatenate(rows)
        datas = np.concatenate(datas)
        return coo_matrix((datas, (rows, cols)), shape=self.shape)
Пример #7
0
    def na_op(x, y):
        import pandas.core.computation.expressions as expressions

        try:
            result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
        except TypeError:
            if isinstance(y, (np.ndarray, ABCSeries, pd.Index)):
                dtype = find_common_type([x.dtype, y.dtype])
                result = np.empty(x.size, dtype=dtype)
                mask = notna(x) & notna(y)
                result[mask] = op(x[mask], com._values_from_object(y[mask]))
            else:
                assert isinstance(x, np.ndarray)
                result = np.empty(len(x), dtype=x.dtype)
                mask = notna(x)
                result[mask] = op(x[mask], y)

            result, changed = maybe_upcast_putmask(result, ~mask, np.nan)

        result = missing.fill_zeros(result, x, y, name, fill_zeros)
        return result
Пример #8
0
        def func(intvidx_self, other, sort=False):
            intvidx_self._assert_can_do_setop(other)
            other = ensure_index(other)

            if not isinstance(other, IntervalIndex):
                result = getattr(intvidx_self.astype(object),
                                 self.op_name)(other)
                if self.op_name in ('difference',):
                    result = result.astype(intvidx_self.dtype)
                return result
            elif intvidx_self.closed != other.closed:
                msg = ('can only do set operations between two IntervalIndex '
                       'objects that are closed on the same side')
                raise ValueError(msg)

            # GH 19016: ensure set op will not return a prohibited dtype
            subtypes = [intvidx_self.dtype.subtype, other.dtype.subtype]
            common_subtype = find_common_type(subtypes)
            if is_object_dtype(common_subtype):
                msg = ('can only do {op} between two IntervalIndex '
                       'objects that have compatible dtypes')
                raise TypeError(msg.format(op=self.op_name))

            return setop(intvidx_self, other, sort)
Пример #9
0
        def func(self, other):
            msg = ('can only do set operations between two IntervalIndex '
                   'objects that are closed on the same side')
            other = self._as_like_interval_index(other, msg)

            # GH 19016: ensure set op will not return a prohibited dtype
            subtypes = [self.dtype.subtype, other.dtype.subtype]
            common_subtype = find_common_type(subtypes)
            if is_object_dtype(common_subtype):
                msg = ('can only do {op} between two IntervalIndex '
                       'objects that have compatible dtypes')
                raise TypeError(msg.format(op=op_name))

            result = getattr(self._multiindex, op_name)(other._multiindex)
            result_name = self.name if self.name == other.name else None

            # GH 19101: ensure empty results have correct dtype
            if result.empty:
                result = result.values.astype(self.dtype.subtype)
            else:
                result = result.values

            return type(self).from_tuples(result, closed=self.closed,
                                          name=result_name)
Пример #10
0
def _sparse_array_op(left, right, op, name, series=False):

    if series and is_integer_dtype(left) and is_integer_dtype(right):
        # series coerces to float64 if result should have NaN/inf
        if name in ('floordiv', 'mod') and (right.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)
        elif name in ('rfloordiv', 'rmod') and (left.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)

    # dtype used to find corresponding sparse method
    if not is_dtype_equal(left.dtype, right.dtype):
        dtype = find_common_type([left.dtype, right.dtype])
        left = left.astype(dtype)
        right = right.astype(dtype)
    else:
        dtype = left.dtype

    # dtype the result must have
    result_dtype = None

    if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
        with np.errstate(all='ignore'):
            result = op(left.get_values(), right.get_values())
            fill = op(_get_fill(left), _get_fill(right))

        if left.sp_index.ngaps == 0:
            index = left.sp_index
        else:
            index = right.sp_index
    elif left.sp_index.equals(right.sp_index):
        with np.errstate(all='ignore'):
            result = op(left.sp_values, right.sp_values)
            fill = op(_get_fill(left), _get_fill(right))
        index = left.sp_index
    else:
        if name[0] == 'r':
            left, right = right, left
            name = name[1:]

        if name in ('and', 'or') and dtype == 'bool':
            opname = 'sparse_{name}_uint8'.format(name=name)
            # to make template simple, cast here
            left_sp_values = left.sp_values.view(np.uint8)
            right_sp_values = right.sp_values.view(np.uint8)
            result_dtype = np.bool
        else:
            opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
            left_sp_values = left.sp_values
            right_sp_values = right.sp_values

        sparse_op = getattr(splib, opname)
        with np.errstate(all='ignore'):
            result, index, fill = sparse_op(left_sp_values, left.sp_index,
                                            left.fill_value, right_sp_values,
                                            right.sp_index, right.fill_value)

    if result_dtype is None:
        result_dtype = result.dtype

    return _wrap_result(name, result, index, fill, dtype=result_dtype)
Пример #11
0
def test_period_dtype_match():
    dtype = PeriodDtype(freq="D")
    assert find_common_type([dtype, dtype]) == "period[D]"
Пример #12
0
def test_datetimetz_dtype_match():
    dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern")
    assert find_common_type([dtype, dtype]) == "datetime64[ns, US/Eastern]"
Пример #13
0
def test_raises_empty_input():
    with pytest.raises(ValueError, match="no types given"):
        find_common_type([])
Пример #14
0
def test_period_dtype_mismatch(dtype2):
    dtype = PeriodDtype(freq="D")
    assert find_common_type([dtype, dtype2]) == np.object
    assert find_common_type([dtype2, dtype]) == np.object
Пример #15
0
 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
     if not isinstance(dtype, IntervalDtype):
         return False
     common_subtype = find_common_type([self.dtype, dtype])
     return not is_object_dtype(common_subtype)
Пример #16
0
def test_period_dtype_mismatch(dtype2):
    dtype = PeriodDtype(freq="D")
    assert find_common_type([dtype, dtype2]) == object
    assert find_common_type([dtype2, dtype]) == object
Пример #17
0
    def read_json(
        cls,
        path_or_buf=None,
        orient=None,
        typ="frame",
        dtype=True,
        convert_axes=True,
        convert_dates=True,
        keep_default_dates=True,
        numpy=False,
        precise_float=False,
        date_unit=None,
        encoding=None,
        lines=False,
        chunksize=None,
        compression="infer",
    ):
        kwargs = {
            "path_or_buf": path_or_buf,
            "orient": orient,
            "typ": typ,
            "dtype": dtype,
            "convert_axes": convert_axes,
            "convert_dates": convert_dates,
            "keep_default_dates": keep_default_dates,
            "numpy": numpy,
            "precise_float": precise_float,
            "date_unit": date_unit,
            "encoding": encoding,
            "lines": lines,
            "chunksize": chunksize,
            "compression": compression,
        }
        if cls.read_json_remote_task is None:
            return super(RayIO, cls).read_json(**kwargs)

        if not lines:
            ErrorMessage.default_to_pandas(
                "`read_json` only optimized with `lines=True`")
            return super(RayIO, cls).read_json(**kwargs)
        else:
            # TODO: Pick up the columns in an optimized way from all data
            # All rows must be read because some rows may have missing data
            # Currently assumes all rows have the same columns
            from io import BytesIO

            columns = pandas.read_json(
                BytesIO(b"" + open(path_or_buf, "rb").readline()),
                lines=True).columns
            kwargs["columns"] = columns
            empty_pd_df = pandas.DataFrame(columns=columns)

            path_or_buf = kwargs.pop("path_or_buf")

            with file_open(path_or_buf, "rb",
                           kwargs.get("compression", "infer")) as f:
                total_bytes = file_size(f)
                from modin.pandas import DEFAULT_NPARTITIONS

                num_partitions = DEFAULT_NPARTITIONS
                num_splits = min(len(columns), num_partitions)
                chunk_size = max(1, (total_bytes - f.tell()) // num_partitions)

                partition_ids = []
                index_ids = []
                dtypes_ids = []

                column_chunksize = compute_chunksize(empty_pd_df,
                                                     num_splits,
                                                     axis=1)
                if column_chunksize > len(columns):
                    column_widths = [len(columns)]
                    num_splits = 1
                else:
                    column_widths = [
                        column_chunksize if i != num_splits - 1 else
                        len(columns) - (column_chunksize * (num_splits - 1))
                        for i in range(num_splits)
                    ]

                while f.tell() < total_bytes:
                    start = f.tell()
                    f.seek(chunk_size, os.SEEK_CUR)
                    f.readline()
                    partition_id = cls.read_json_remote_task._remote(
                        args=(path_or_buf, num_splits, start, f.tell(),
                              kwargs),
                        num_return_vals=num_splits + 3,
                    )
                    partition_ids.append(partition_id[:-3])
                    index_ids.append(partition_id[-3])
                    dtypes_ids.append(partition_id[-2])

            row_lengths = ray.get(index_ids)
            new_index = pandas.RangeIndex(sum(row_lengths))

            dtypes = (pandas.concat(ray.get(dtypes_ids), axis=1).apply(
                lambda row: find_common_type(row.values),
                axis=1).squeeze(axis=0))

            partition_ids = [[
                cls.frame_partition_cls(
                    partition_ids[i][j],
                    length=row_lengths[i],
                    width=column_widths[j],
                ) for j in range(len(partition_ids[i]))
            ] for i in range(len(partition_ids))]

            if isinstance(dtypes, pandas.Series):
                dtypes.index = columns
            else:
                dtypes = pandas.Series(dtypes, index=columns)

            new_frame = cls.frame_cls(
                np.array(partition_ids),
                new_index,
                columns,
                row_lengths,
                column_widths,
                dtypes=dtypes,
            )
            new_frame._apply_index_objs(axis=0)
            return cls.query_compiler_cls(new_frame)
Пример #18
0
    def _read_csv_from_file_ray(cls, filepath, kwargs={}):
        """Constructs a DataFrame from a CSV file.

        Args:
            filepath (str): path to the CSV file.
            npartitions (int): number of partitions for the DataFrame.
            kwargs (dict): args excluding filepath provided to read_csv.

        Returns:
            DataFrame or Series constructed from CSV file.
        """
        names = kwargs.get("names", None)
        index_col = kwargs.get("index_col", None)
        if names is None:
            # For the sake of the empty df, we assume no `index_col` to get the correct
            # column names before we build the index. Because we pass `names` in, this
            # step has to happen without removing the `index_col` otherwise it will not
            # be assigned correctly
            kwargs["index_col"] = None
            names = pandas.read_csv(filepath,
                                    **dict(kwargs, nrows=0,
                                           skipfooter=0)).columns
            kwargs["index_col"] = index_col

        empty_pd_df = pandas.read_csv(filepath,
                                      **dict(kwargs, nrows=0, skipfooter=0))
        column_names = empty_pd_df.columns
        skipfooter = kwargs.get("skipfooter", None)
        skiprows = kwargs.pop("skiprows", None)

        usecols = kwargs.get("usecols", None)
        usecols_md = _validate_usecols_arg(kwargs.get("usecols", None))
        if usecols is not None and usecols_md[1] != "integer":
            del kwargs["usecols"]
            all_cols = pandas.read_csv(file_open(filepath, "rb"),
                                       **dict(kwargs, nrows=0,
                                              skipfooter=0)).columns
            usecols = all_cols.get_indexer_for(list(usecols_md[0]))
        parse_dates = kwargs.pop("parse_dates", False)
        partition_kwargs = dict(
            kwargs,
            header=None,
            names=names,
            skipfooter=0,
            skiprows=None,
            parse_dates=parse_dates,
            usecols=usecols,
        )
        with file_open(filepath, "rb", kwargs.get("compression",
                                                  "infer")) as f:
            # Get the BOM if necessary
            prefix = b""
            if kwargs.get("encoding", None) is not None:
                prefix = f.readline()
                partition_kwargs["skiprows"] = 1
                f.seek(0, os.SEEK_SET)  # Return to beginning of file

            prefix_id = ray.put(prefix)
            partition_kwargs_id = ray.put(partition_kwargs)
            # Skip the header since we already have the header information and skip the
            # rows we are told to skip.
            kwargs["skiprows"] = skiprows
            cls._skip_header(f, kwargs)
            # Launch tasks to read partitions
            partition_ids = []
            index_ids = []
            dtypes_ids = []
            total_bytes = file_size(f)
            # Max number of partitions available
            from modin.pandas import DEFAULT_NPARTITIONS

            num_partitions = DEFAULT_NPARTITIONS
            # This is the number of splits for the columns
            num_splits = min(len(column_names), num_partitions)
            # This is the chunksize each partition will read
            chunk_size = max(1, (total_bytes - f.tell()) // num_partitions)

            # Metadata
            column_chunksize = compute_chunksize(empty_pd_df,
                                                 num_splits,
                                                 axis=1)
            if column_chunksize > len(column_names):
                column_widths = [len(column_names)]
                # This prevents us from unnecessarily serializing a bunch of empty
                # objects.
                num_splits = 1
            else:
                column_widths = [
                    column_chunksize if len(column_names) >
                    (column_chunksize * (i + 1)) else 0 if len(column_names) <
                    (column_chunksize * i) else len(column_names) -
                    (column_chunksize * i) for i in range(num_splits)
                ]

            while f.tell() < total_bytes:
                start = f.tell()
                f.seek(chunk_size, os.SEEK_CUR)
                f.readline()  # Read a whole number of lines
                # The workers return multiple objects for each part of the file read:
                # - The first n - 2 objects are partitions of data
                # - The n - 1 object is the length of the partition or the index if
                #   `index_col` is specified. We compute the index below.
                # - The nth object is the dtypes of the partition. We combine these to
                #   form the final dtypes below.
                partition_id = cls.read_csv_remote_task._remote(
                    args=(
                        filepath,
                        num_splits,
                        start,
                        f.tell(),
                        partition_kwargs_id,
                        prefix_id,
                    ),
                    num_return_vals=num_splits + 2,
                )
                partition_ids.append(partition_id[:-2])
                index_ids.append(partition_id[-2])
                dtypes_ids.append(partition_id[-1])

        # Compute the index based on a sum of the lengths of each partition (by default)
        # or based on the column(s) that were requested.
        if index_col is None:
            row_lengths = ray.get(index_ids)
            new_index = pandas.RangeIndex(sum(row_lengths))
        else:
            index_objs = ray.get(index_ids)
            row_lengths = [len(o) for o in index_objs]
            new_index = index_objs[0].append(index_objs[1:])
            new_index.name = empty_pd_df.index.name

        # Compute dtypes by getting collecting and combining all of the partitions. The
        # reported dtypes from differing rows can be different based on the inference in
        # the limited data seen by each worker. We use pandas to compute the exact dtype
        # over the whole column for each column. The index is set below.
        dtypes = (pandas.concat(ray.get(dtypes_ids), axis=1).apply(
            lambda row: find_common_type(row.values), axis=1).squeeze(axis=0))
        partition_ids = [[
            cls.frame_partition_cls(partition_ids[i][j],
                                    length=row_lengths[i],
                                    width=column_widths[j])
            for j in range(len(partition_ids[i]))
        ] for i in range(len(partition_ids))]
        # If parse_dates is present, the column names that we have might not be
        # the same length as the returned column names. If we do need to modify
        # the column names, we remove the old names from the column names and
        # insert the new one at the front of the Index.
        if parse_dates is not None:
            # We have to recompute the column widths if `parse_dates` is set because
            # we are not guaranteed to have the correct information regarding how many
            # columns are on each partition.
            column_widths = None
            # Check if is list of lists
            if isinstance(parse_dates, list) and isinstance(
                    parse_dates[0], list):
                for group in parse_dates:
                    new_col_name = "_".join(group)
                    column_names = column_names.drop(group).insert(
                        0, new_col_name)
            # Check if it is a dictionary
            elif isinstance(parse_dates, dict):
                for new_col_name, group in parse_dates.items():
                    column_names = column_names.drop(group).insert(
                        0, new_col_name)
        # Set the index for the dtypes to the column names
        if isinstance(dtypes, pandas.Series):
            dtypes.index = column_names
        else:
            dtypes = pandas.Series(dtypes, index=column_names)
        new_frame = cls.frame_cls(
            partition_ids,
            new_index,
            column_names,
            row_lengths,
            column_widths,
            dtypes=dtypes,
        )
        new_query_compiler = cls.query_compiler_cls(new_frame)

        if skipfooter:
            new_query_compiler = new_query_compiler.drop(
                new_query_compiler.index[-skipfooter:])
        if kwargs.get("squeeze", False) and len(
                new_query_compiler.columns) == 1:
            return new_query_compiler[new_query_compiler.columns[0]]
        if index_col is None:
            new_query_compiler._modin_frame._apply_index_objs(axis=0)
        return new_query_compiler
Пример #19
0
def test_datetimetz_dtype_match():
    dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern")
    assert find_common_type([dtype, dtype]) == "datetime64[ns, US/Eastern]"
Пример #20
0
def test_categorical_dtype(dtypes, exp_type):
    assert find_common_type(dtypes) == exp_type
Пример #21
0
def test_raises_empty_input():
    with pytest.raises(ValueError, match="no types given"):
        find_common_type([])
Пример #22
0
def test_numpy_dtypes(source_dtypes, expected_common_dtype):
    assert find_common_type(source_dtypes) == expected_common_dtype
Пример #23
0
    def test_numpy_dtypes(self):
        # (source_types, destination_type)
        testcases = (
            # identity
            ((np.int64,), np.int64),
            ((np.uint64,), np.uint64),
            ((np.float32,), np.float32),
            ((np.object,), np.object),

            # into ints
            ((np.int16, np.int64), np.int64),
            ((np.int32, np.uint32), np.int64),
            ((np.uint16, np.uint64), np.uint64),

            # into floats
            ((np.float16, np.float32), np.float32),
            ((np.float16, np.int16), np.float32),
            ((np.float32, np.int16), np.float32),
            ((np.uint64, np.int64), np.float64),
            ((np.int16, np.float64), np.float64),
            ((np.float16, np.int64), np.float64),

            # into others
            ((np.complex128, np.int32), np.complex128),
            ((np.object, np.float32), np.object),
            ((np.object, np.int16), np.object),

            # bool with int
            ((np.dtype('bool'), np.int64), np.object),
            ((np.dtype('bool'), np.int32), np.object),
            ((np.dtype('bool'), np.int16), np.object),
            ((np.dtype('bool'), np.int8), np.object),
            ((np.dtype('bool'), np.uint64), np.object),
            ((np.dtype('bool'), np.uint32), np.object),
            ((np.dtype('bool'), np.uint16), np.object),
            ((np.dtype('bool'), np.uint8), np.object),

            # bool with float
            ((np.dtype('bool'), np.float64), np.object),
            ((np.dtype('bool'), np.float32), np.object),

            ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')),
             np.dtype('datetime64[ns]')),
            ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')),
             np.dtype('timedelta64[ns]')),

            ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ms]')),
             np.dtype('datetime64[ns]')),
            ((np.dtype('timedelta64[ms]'), np.dtype('timedelta64[ns]')),
             np.dtype('timedelta64[ns]')),

            ((np.dtype('datetime64[ns]'), np.dtype('timedelta64[ns]')),
             np.object),
            ((np.dtype('datetime64[ns]'), np.int64), np.object)
        )
        for src, common in testcases:
            assert find_common_type(src) == common

        with pytest.raises(ValueError):
            # empty
            find_common_type([])
Пример #24
0
def get_dtype(X):
    try:
        from pandas.core.dtypes.cast import find_common_type
        return find_common_type(X.dtypes) if is_DataFrame(X) else X.dtype
    except ImportError:
        return getattr(X, "dtype", None)
Пример #25
0
def concat_compat(to_concat, axis: int = 0):
    """
    provide concatenation of an array of arrays each of which is a single
    'normalized' dtypes (in that for example, if it's object, then it is a
    non-datetimelike and provide a combined dtype for the resulting array that
    preserves the overall dtype if possible)

    Parameters
    ----------
    to_concat : array of arrays
    axis : axis to provide concatenation

    Returns
    -------
    a single array, preserving the combined dtypes
    """

    # filter empty arrays
    # 1-d dtypes always are included here
    def is_nonempty(x) -> bool:
        if x.ndim <= axis:
            return True
        return x.shape[axis] > 0

    # If all arrays are empty, there's nothing to convert, just short-cut to
    # the concatenation, #3121.
    #
    # Creating an empty array directly is tempting, but the winnings would be
    # marginal given that it would still require shape & dtype calculation and
    # np.concatenate which has them both implemented is compiled.
    non_empties = [x for x in to_concat if is_nonempty(x)]
    if non_empties and axis == 0:
        to_concat = non_empties

    typs = get_dtype_kinds(to_concat)
    _contains_datetime = any(typ.startswith("datetime") for typ in typs)

    all_empty = not len(non_empties)
    single_dtype = len({x.dtype for x in to_concat}) == 1
    any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat)

    if any_ea:
        if not single_dtype:
            target_dtype = find_common_type([x.dtype for x in to_concat])
            to_concat = [
                _cast_to_common_type(arr, target_dtype) for arr in to_concat
            ]

        if isinstance(to_concat[0], ExtensionArray):
            cls = type(to_concat[0])
            return cls._concat_same_type(to_concat)
        else:
            return np.concatenate(to_concat)

    elif _contains_datetime or "timedelta" in typs:
        return concat_datetime(to_concat, axis=axis, typs=typs)

    elif all_empty:
        # we have all empties, but may need to coerce the result dtype to
        # object if we have non-numeric type operands (numpy would otherwise
        # cast this to float)
        typs = get_dtype_kinds(to_concat)
        if len(typs) != 1:

            if not len(typs - {"i", "u", "f"}) or not len(typs -
                                                          {"bool", "i", "u"}):
                # let numpy coerce
                pass
            else:
                # coerce to object
                to_concat = [x.astype("object") for x in to_concat]

    return np.concatenate(to_concat, axis=axis)
Пример #26
0
def _sparse_array_op(left, right, op, name, series=False):

    if series and is_integer_dtype(left) and is_integer_dtype(right):
        # series coerces to float64 if result should have NaN/inf
        if name in ('floordiv', 'mod') and (right.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)
        elif name in ('rfloordiv', 'rmod') and (left.values == 0).any():
            left = left.astype(np.float64)
            right = right.astype(np.float64)

    # dtype used to find corresponding sparse method
    if not is_dtype_equal(left.dtype, right.dtype):
        dtype = find_common_type([left.dtype, right.dtype])
        left = left.astype(dtype)
        right = right.astype(dtype)
    else:
        dtype = left.dtype

    # dtype the result must have
    result_dtype = None

    if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
        with np.errstate(all='ignore'):
            result = op(left.get_values(), right.get_values())
            fill = op(_get_fill(left), _get_fill(right))

        if left.sp_index.ngaps == 0:
            index = left.sp_index
        else:
            index = right.sp_index
    elif left.sp_index.equals(right.sp_index):
        with np.errstate(all='ignore'):
            result = op(left.sp_values, right.sp_values)
            fill = op(_get_fill(left), _get_fill(right))
        index = left.sp_index
    else:
        if name[0] == 'r':
            left, right = right, left
            name = name[1:]

        if name in ('and', 'or') and dtype == 'bool':
            opname = 'sparse_{name}_uint8'.format(name=name, dtype=dtype)
            # to make template simple, cast here
            left_sp_values = left.sp_values.view(np.uint8)
            right_sp_values = right.sp_values.view(np.uint8)
            result_dtype = np.bool
        else:
            opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
            left_sp_values = left.sp_values
            right_sp_values = right.sp_values

        sparse_op = getattr(splib, opname)
        with np.errstate(all='ignore'):
            result, index, fill = sparse_op(left_sp_values, left.sp_index,
                                            left.fill_value, right_sp_values,
                                            right.sp_index, right.fill_value)

    if result_dtype is None:
        result_dtype = result.dtype

    return _wrap_result(name, result, index, fill, dtype=result_dtype)
Пример #27
0
def _masked_arith_op(x: np.ndarray, y, op):
    """
    If the given arithmetic operation fails, attempt it again on
    only the non-null elements of the input array(s).

    Parameters
    ----------
    x : np.ndarray
    y : np.ndarray, Series, Index
    op : binary operator
    """
    # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes
    # the logic valid for both Series and DataFrame ops.
    xrav = x.ravel()
    assert isinstance(x, np.ndarray), type(x)
    if isinstance(y, np.ndarray):
        dtype = find_common_type([x.dtype, y.dtype])
        # error: Argument "dtype" to "empty" has incompatible type
        # "Union[dtype, ExtensionDtype]"; expected "Union[dtype, None, type,
        # _SupportsDtype, str, Tuple[Any, int], Tuple[Any, Union[int,
        # Sequence[int]]], List[Any], _DtypeDict, Tuple[Any, Any]]"
        result = np.empty(x.size, dtype=dtype)  # type: ignore[arg-type]

        if len(x) != len(y):
            raise ValueError(x.shape, y.shape)
        else:
            ymask = notna(y)

        # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex
        #  we would get int64 dtype, see GH#19956
        yrav = y.ravel()
        mask = notna(xrav) & ymask.ravel()

        # See GH#5284, GH#5035, GH#19448 for historical reference
        if mask.any():
            with np.errstate(all="ignore"):
                result[mask] = op(xrav[mask], yrav[mask])

    else:
        if not is_scalar(y):
            raise TypeError(
                f"Cannot broadcast np.ndarray with operand of type { type(y) }"
            )

        # mask is only meaningful for x
        result = np.empty(x.size, dtype=x.dtype)
        mask = notna(xrav)

        # 1 ** np.nan is 1. So we have to unmask those.
        if op is pow:
            mask = np.where(x == 1, False, mask)
        elif op is rpow:
            mask = np.where(y == 1, False, mask)

        if mask.any():
            with np.errstate(all="ignore"):
                result[mask] = op(xrav[mask], y)

    result = maybe_upcast_putmask(result, ~mask)
    result = result.reshape(x.shape)  # 2D compat
    return result
Пример #28
0
 def test_raises_empty_input(self):
     with pytest.raises(ValueError):
         find_common_type([])
Пример #29
0
def putmask_smart(values: np.ndarray, mask: np.ndarray, new) -> np.ndarray:
    """
    Return a new ndarray, try to preserve dtype if possible.

    Parameters
    ----------
    values : np.ndarray
        `values`, updated in-place.
    mask : np.ndarray[bool]
        Applies to both sides (array like).
    new : `new values` either scalar or an array like aligned with `values`

    Returns
    -------
    values : ndarray with updated values
        this *may* be a copy of the original

    See Also
    --------
    ndarray.putmask
    """
    # we cannot use np.asarray() here as we cannot have conversions
    # that numpy does when numeric are mixed with strings

    # n should be the length of the mask or a scalar here
    if not is_list_like(new):
        new = np.broadcast_to(new, mask.shape)

    # see if we are only masking values that if putted
    # will work in the current dtype
    try:
        nn = new[mask]
    except TypeError:
        # TypeError: only integer scalar arrays can be converted to a scalar index
        pass
    else:
        # make sure that we have a nullable type if we have nulls
        if not isna_compat(values, nn[0]):
            pass
        elif not (is_float_dtype(nn.dtype) or is_integer_dtype(nn.dtype)):
            # only compare integers/floats
            pass
        elif not (is_float_dtype(values.dtype) or is_integer_dtype(values.dtype)):
            # only compare integers/floats
            pass
        else:

            # we ignore ComplexWarning here
            with warnings.catch_warnings(record=True):
                warnings.simplefilter("ignore", np.ComplexWarning)
                nn_at = nn.astype(values.dtype)

            comp = nn == nn_at
            if is_list_like(comp) and comp.all():
                nv = values.copy()
                nv[mask] = nn_at
                return nv

    new = np.asarray(new)

    if values.dtype.kind == new.dtype.kind:
        # preserves dtype if possible
        return _putmask_preserve(values, new, mask)

    dtype = find_common_type([values.dtype, new.dtype])
    # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
    # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type,
    # _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]],
    # List[Any], _DTypeDict, Tuple[Any, Any]]]"
    values = values.astype(dtype)  # type: ignore[arg-type]

    return _putmask_preserve(values, new, mask)
Пример #30
0
def test_numpy_dtypes(source_dtypes, expected_common_dtype):
    assert find_common_type(source_dtypes) == expected_common_dtype
Пример #31
0
def _sparse_array_op(
    left: "SparseArray", right: "SparseArray", op: Callable, name: str
) -> Any:
    """
    Perform a binary operation between two arrays.

    Parameters
    ----------
    left : Union[SparseArray, ndarray]
    right : Union[SparseArray, ndarray]
    op : Callable
        The binary operation to perform
    name str
        Name of the callable.

    Returns
    -------
    SparseArray
    """
    if name.startswith("__"):
        # For lookups in _libs.sparse we need non-dunder op name
        name = name[2:-2]

    # dtype used to find corresponding sparse method
    ltype = left.dtype.subtype
    rtype = right.dtype.subtype

    if not is_dtype_equal(ltype, rtype):
        subtype = find_common_type([ltype, rtype])
        ltype = SparseDtype(subtype, left.fill_value)
        rtype = SparseDtype(subtype, right.fill_value)

        # TODO(GH-23092): pass copy=False. Need to fix astype_nansafe
        left = left.astype(ltype)
        right = right.astype(rtype)
        dtype = ltype.subtype
    else:
        dtype = ltype

    # dtype the result must have
    result_dtype = None

    if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
        with np.errstate(all="ignore"):
            result = op(left.to_dense(), right.to_dense())
            fill = op(_get_fill(left), _get_fill(right))

        if left.sp_index.ngaps == 0:
            index = left.sp_index
        else:
            index = right.sp_index
    elif left.sp_index.equals(right.sp_index):
        with np.errstate(all="ignore"):
            result = op(left.sp_values, right.sp_values)
            fill = op(_get_fill(left), _get_fill(right))
        index = left.sp_index
    else:
        if name[0] == "r":
            left, right = right, left
            name = name[1:]

        if name in ("and", "or", "xor") and dtype == "bool":
            opname = f"sparse_{name}_uint8"
            # to make template simple, cast here
            left_sp_values = left.sp_values.view(np.uint8)
            right_sp_values = right.sp_values.view(np.uint8)
            result_dtype = bool
        else:
            opname = f"sparse_{name}_{dtype}"
            left_sp_values = left.sp_values
            right_sp_values = right.sp_values

        sparse_op = getattr(splib, opname)

        with np.errstate(all="ignore"):
            result, index, fill = sparse_op(
                left_sp_values,
                left.sp_index,
                left.fill_value,
                right_sp_values,
                right.sp_index,
                right.fill_value,
            )

    if result_dtype is None:
        result_dtype = result.dtype

    return _wrap_result(name, result, index, fill, dtype=result_dtype)
Пример #32
0
def test_categorical_dtype(dtypes, exp_type):
    assert find_common_type(dtypes) == exp_type
Пример #33
0
def concat_compat(to_concat, axis: int = 0, ea_compat_axis: bool = False):
    """
    provide concatenation of an array of arrays each of which is a single
    'normalized' dtypes (in that for example, if it's object, then it is a
    non-datetimelike and provide a combined dtype for the resulting array that
    preserves the overall dtype if possible)

    Parameters
    ----------
    to_concat : array of arrays
    axis : axis to provide concatenation
    ea_compat_axis : bool, default False
        For ExtensionArray compat, behave as if axis == 1 when determining
        whether to drop empty arrays.

    Returns
    -------
    a single array, preserving the combined dtypes
    """

    # filter empty arrays
    # 1-d dtypes always are included here
    def is_nonempty(x) -> bool:
        if x.ndim <= axis:
            return True
        return x.shape[axis] > 0

    # If all arrays are empty, there's nothing to convert, just short-cut to
    # the concatenation, #3121.
    #
    # Creating an empty array directly is tempting, but the winnings would be
    # marginal given that it would still require shape & dtype calculation and
    # np.concatenate which has them both implemented is compiled.
    non_empties = [x for x in to_concat if is_nonempty(x)]
    if non_empties and axis == 0 and not ea_compat_axis:
        # ea_compat_axis see GH#39574
        to_concat = non_empties

    kinds = {obj.dtype.kind for obj in to_concat}
    contains_datetime = any(kind in ["m", "M"] for kind in kinds) or any(
        isinstance(obj, ABCExtensionArray) and obj.ndim > 1
        for obj in to_concat)

    all_empty = not len(non_empties)
    single_dtype = len({x.dtype for x in to_concat}) == 1
    any_ea = any(isinstance(x.dtype, ExtensionDtype) for x in to_concat)

    if contains_datetime:
        return _concat_datetime(to_concat, axis=axis)

    if any_ea:
        # we ignore axis here, as internally concatting with EAs is always
        # for axis=0
        if not single_dtype:
            target_dtype = find_common_type([x.dtype for x in to_concat])
            to_concat = [
                cast_to_common_type(arr, target_dtype) for arr in to_concat
            ]

        if isinstance(to_concat[0], ABCExtensionArray):
            # TODO: what about EA-backed Index?
            cls = type(to_concat[0])
            return cls._concat_same_type(to_concat)
        else:
            return np.concatenate(to_concat)

    elif all_empty:
        # we have all empties, but may need to coerce the result dtype to
        # object if we have non-numeric type operands (numpy would otherwise
        # cast this to float)
        if len(kinds) != 1:

            if not len(kinds - {"i", "u", "f"}) or not len(kinds -
                                                           {"b", "i", "u"}):
                # let numpy coerce
                pass
            else:
                # coerce to object
                to_concat = [x.astype("object") for x in to_concat]

    return np.concatenate(to_concat, axis=axis)
Пример #34
0
def test_datetimetz_dtype_mismatch(dtype2):
    dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern")
    assert find_common_type([dtype, dtype2]) == np.object
    assert find_common_type([dtype2, dtype]) == np.object
Пример #35
0
def masked_arith_op(x: np.ndarray, y, op):
    """
    If the given arithmetic operation fails, attempt it again on
    only the non-null elements of the input array(s).

    Parameters
    ----------
    x : np.ndarray
    y : np.ndarray, Series, Index
    op : binary operator
    """
    # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes
    # the logic valid for both Series and DataFrame ops.
    xrav = x.ravel()
    assert isinstance(x, np.ndarray), type(x)
    if isinstance(y, np.ndarray):
        dtype = find_common_type([x.dtype, y.dtype])
        result = np.empty(x.size, dtype=dtype)

        if len(x) != len(y):
            raise ValueError(x.shape, y.shape)
        else:
            ymask = notna(y)

        # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex
        #  we would get int64 dtype, see GH#19956
        yrav = y.ravel()
        mask = notna(xrav) & ymask.ravel()

        if yrav.shape != mask.shape:
            # FIXME: GH#5284, GH#5035, GH#19448
            # Without specifically raising here we get mismatched
            # errors in Py3 (TypeError) vs Py2 (ValueError)
            # Note: Only = an issue in DataFrame case
            raise ValueError("Cannot broadcast operands together.")

        if mask.any():
            with np.errstate(all="ignore"):
                result[mask] = op(xrav[mask], yrav[mask])

    else:
        if not is_scalar(y):
            raise TypeError(
                f"Cannot broadcast np.ndarray with operand of type { type(y) }"
            )

        # mask is only meaningful for x
        result = np.empty(x.size, dtype=x.dtype)
        mask = notna(xrav)

        # 1 ** np.nan is 1. So we have to unmask those.
        if op is pow:
            mask = np.where(x == 1, False, mask)
        elif op is rpow:
            mask = np.where(y == 1, False, mask)

        if mask.any():
            with np.errstate(all="ignore"):
                result[mask] = op(xrav[mask], y)

    result, _ = maybe_upcast_putmask(result, ~mask, np.nan)
    result = result.reshape(x.shape)  # 2D compat
    return result
Пример #36
0
 def test_categorical_dtype(self):
     dtype = CategoricalDtype()
     assert find_common_type([dtype]) == 'category'
     assert find_common_type([dtype, dtype]) == 'category'
     assert find_common_type([np.object, dtype]) == np.object
Пример #37
0
def _sparse_array_op(left, right, op, name):
    """
    Perform a binary operation between two arrays.

    Parameters
    ----------
    left : Union[SparseArray, ndarray]
    right : Union[SparseArray, ndarray]
    op : Callable
        The binary operation to perform
    name str
        Name of the callable.

    Returns
    -------
    SparseArray
    """
    # type: (SparseArray, SparseArray, Callable, str) -> Any
    if name.startswith('__'):
        # For lookups in _libs.sparse we need non-dunder op name
        name = name[2:-2]

    # dtype used to find corresponding sparse method
    ltype = left.dtype.subtype
    rtype = right.dtype.subtype

    if not is_dtype_equal(ltype, rtype):
        subtype = find_common_type([ltype, rtype])
        ltype = SparseDtype(subtype, left.fill_value)
        rtype = SparseDtype(subtype, right.fill_value)

        # TODO(GH-23092): pass copy=False. Need to fix astype_nansafe
        left = left.astype(ltype)
        right = right.astype(rtype)
        dtype = ltype.subtype
    else:
        dtype = ltype

    # dtype the result must have
    result_dtype = None

    if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0:
        with np.errstate(all='ignore'):
            result = op(left.get_values(), right.get_values())
            fill = op(_get_fill(left), _get_fill(right))

        if left.sp_index.ngaps == 0:
            index = left.sp_index
        else:
            index = right.sp_index
    elif left.sp_index.equals(right.sp_index):
        with np.errstate(all='ignore'):
            result = op(left.sp_values, right.sp_values)
            fill = op(_get_fill(left), _get_fill(right))
        index = left.sp_index
    else:
        if name[0] == 'r':
            left, right = right, left
            name = name[1:]

        if name in ('and', 'or') and dtype == 'bool':
            opname = 'sparse_{name}_uint8'.format(name=name)
            # to make template simple, cast here
            left_sp_values = left.sp_values.view(np.uint8)
            right_sp_values = right.sp_values.view(np.uint8)
            result_dtype = np.bool
        else:
            opname = 'sparse_{name}_{dtype}'.format(name=name, dtype=dtype)
            left_sp_values = left.sp_values
            right_sp_values = right.sp_values

        sparse_op = getattr(splib, opname)

        with np.errstate(all='ignore'):
            result, index, fill = sparse_op(
                left_sp_values, left.sp_index, left.fill_value,
                right_sp_values, right.sp_index, right.fill_value)

    if result_dtype is None:
        result_dtype = result.dtype

    return _wrap_result(name, result, index, fill, dtype=result_dtype)
Пример #38
0
 def test_raises_empty_input(self):
     with pytest.raises(ValueError):
         find_common_type([])
Пример #39
0
def test_datetimetz_dtype_mismatch(dtype2):
    dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern")
    assert find_common_type([dtype, dtype2]) == object
    assert find_common_type([dtype2, dtype]) == object
Пример #40
0
 def test_categorical_dtype(self):
     dtype = CategoricalDtype()
     assert find_common_type([dtype]) == 'category'
     assert find_common_type([dtype, dtype]) == 'category'
     assert find_common_type([np.object, dtype]) == np.object
Пример #41
0
 def test_categorical_dtype(self):
     dtype = CategoricalDtype()
     self.assertEqual(find_common_type([dtype]), 'category')
     self.assertEqual(find_common_type([dtype, dtype]), 'category')
     self.assertEqual(find_common_type([np.object, dtype]), np.object)
Пример #42
0
    def postprocess(self, index_info: IndexInfo,
                    context: IndexHandlerContext) -> None:
        if not self.need_postprocess(index_info, context):
            # do not need postprocess
            return

        chunks, nsplits = context.out_chunks, context.out_nsplits
        index_to_chunks = {c.index: c for c in chunks}

        axis = index_info.output_axis
        new_out_chunks = []
        chunk_axis_shapes = dict()
        for chunk_index in itertools.product(*(range(len(ns))
                                               for ax, ns in enumerate(nsplits)
                                               if ax != axis)):
            to_concat_chunks = []
            for i in range(len(nsplits[axis])):
                if axis == 0:
                    to_concat_index = (i, ) + chunk_index
                else:
                    to_concat_index = chunk_index + (i, )
                to_concat_chunks.append(index_to_chunks[to_concat_index])
            concat_chunk = context.concat_chunks(to_concat_chunks, axis)
            chunk_op = context.op.copy().reset_key()
            indexes = [slice(None)] * len(nsplits)
            indexes[axis] = index_info.raw_index
            params = concat_chunk.params
            if np.isscalar(index_info.raw_index):
                assert axis == 0
                if "columns_value" in params:
                    params["index_value"] = params.pop("columns_value")
                    params["dtype"] = find_common_type(
                        params["dtypes"].tolist())
                    del params["dtypes"]
                    if getattr(context.op.outputs[0], "name",
                               None) is not None:
                        params["name"] = context.op.outputs[0].name
                if len(params["index"]) == chunks[0].ndim:
                    index = list(params["index"])
                    index.pop(index_info.output_axis)
                    params["index"] = tuple(index)
                    shape = list(params["shape"])
                    shape.pop(index_info.output_axis)
                    params["shape"] = tuple(shape)
                if context.op.outputs[0].ndim == 0:
                    del params["index_value"]
            elif axis == 0:
                pd_index = pd.Index(index_info.raw_index)
                params["index_value"] = parse_index(pd_index, store_data=False)
                shape = list(params["shape"])
                shape[0] = len(pd_index)
                params["shape"] = shape
            else:
                if context.op.can_index_miss:
                    # reindex
                    params["dtypes"] = dtypes = to_concat_chunks[0].dtypes
                else:
                    params["dtypes"] = dtypes = concat_chunk.dtypes.loc[
                        index_info.raw_index]
                params["columns_value"] = parse_index(dtypes.index,
                                                      store_data=True)
                shape = list(params["shape"])
                shape[1] = len(dtypes)
                params["shape"] = tuple(shape)
            chunk_op._indexes = indexes
            chunk_op.stage = OperandStage.agg
            out_chunk = chunk_op.new_chunk([concat_chunk], kws=[params])
            if len(out_chunk.shape) != 0:
                chunk_axis_shapes[
                    out_chunk.index[axis]] = out_chunk.shape[axis]
            new_out_chunks.append(out_chunk)

        new_nsplits = list(nsplits)
        if np.isscalar(index_info.raw_index):
            new_nsplits = new_nsplits[:axis] + new_nsplits[axis + 1:]
        else:
            new_nsplits[axis] = (sum(chunk_axis_shapes.values()), )
        context.out_chunks = new_out_chunks
        context.out_nsplits = new_nsplits
Пример #43
0
def test_period_dtype_match():
    dtype = PeriodDtype(freq="D")
    assert find_common_type([dtype, dtype]) == "period[D]"