Exemplo n.º 1
0
def dict_to_mgr(data: Dict, index, columns, dtype: Optional[DtypeObj],
                typ: str) -> Manager:
    """
    Segregate Series based on type and coerce into matrices.
    Needs to handle a lot of exceptional cases.

    Used in DataFrame.__init__
    """
    arrays: Union[Sequence[Any], Series]

    if columns is not None:
        from pandas.core.series import Series

        arrays = Series(data, index=columns, dtype=object)
        data_names = arrays.index

        missing = arrays.isna()
        if index is None:
            # GH10856
            # raise ValueError if only scalars in dict
            index = extract_index(arrays[~missing])
        else:
            index = ensure_index(index)

        # no obvious "empty" int column
        if missing.any() and not is_integer_dtype(dtype):
            nan_dtype: DtypeObj

            if dtype is None or (isinstance(dtype, np.dtype)
                                 and np.issubdtype(dtype, np.flexible)):
                # GH#1783
                nan_dtype = np.dtype("object")
            else:
                nan_dtype = dtype
            val = construct_1d_arraylike_from_scalar(np.nan, len(index),
                                                     nan_dtype)
            arrays.loc[missing] = [val] * missing.sum()

    else:
        keys = list(data.keys())
        columns = data_names = Index(keys)
        arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
        # GH#24096 need copy to be deep for datetime64tz case
        # TODO: See if we can avoid these copies
        arrays = [
            arr if not isinstance(arr, ABCIndex) else arr._data
            for arr in arrays
        ]
        arrays = [
            arr if not is_datetime64tz_dtype(arr) else arr.copy()
            for arr in arrays
        ]
    return arrays_to_mgr(arrays,
                         data_names,
                         index,
                         columns,
                         dtype=dtype,
                         typ=typ)
Exemplo n.º 2
0
def init_dict(data, index, columns, dtype=None):
    """
    Segregate Series based on type and coerce into matrices.
    Needs to handle a lot of exceptional cases.
    """
    if columns is not None:
        from pandas.core.series import Series

        arrays = Series(data, index=columns, dtype=object)
        data_names = arrays.index

        missing = arrays.isna()
        if index is None:
            # GH10856
            # raise ValueError if only scalars in dict
            index = extract_index(arrays[~missing])
        else:
            index = ensure_index(index)

        # no obvious "empty" int column
        if missing.any() and not is_integer_dtype(dtype):
            if dtype is None or np.issubdtype(dtype, np.flexible):
                # GH#1783
                nan_dtype = object
            else:
                nan_dtype = dtype
            val = construct_1d_arraylike_from_scalar(np.nan, len(index),
                                                     nan_dtype)
            arrays.loc[missing] = [val] * missing.sum()

    else:
        keys = com.dict_keys_to_ordered_list(data)
        columns = data_names = Index(keys)
        arrays = (com.maybe_iterable_to_list(data[k]) for k in keys)
        # GH#24096 need copy to be deep for datetime64tz case
        # TODO: See if we can avoid these copies
        arrays = [
            arr if not isinstance(arr, ABCIndexClass) else arr._data
            for arr in arrays
        ]
        arrays = [
            arr if not is_datetime64tz_dtype(arr) else arr.copy()
            for arr in arrays
        ]
    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
Exemplo n.º 3
0
def dict_to_mgr(
    data: dict,
    index,
    columns,
    *,
    dtype: DtypeObj | None = None,
    typ: str = "block",
    copy: bool = True,
) -> Manager:
    """
    Segregate Series based on type and coerce into matrices.
    Needs to handle a lot of exceptional cases.

    Used in DataFrame.__init__
    """
    arrays: Sequence[Any] | Series

    if columns is not None:
        from pandas.core.series import Series

        arrays = Series(data, index=columns, dtype=object)
        missing = arrays.isna()
        if index is None:
            # GH10856
            # raise ValueError if only scalars in dict
            index = _extract_index(arrays[~missing])
        else:
            index = ensure_index(index)

        # no obvious "empty" int column
        if missing.any() and not is_integer_dtype(dtype):
            nan_dtype: DtypeObj

            if dtype is not None:
                # calling sanitize_array ensures we don't mix-and-match
                #  NA dtypes
                midxs = missing.values.nonzero()[0]
                for i in midxs:
                    arr = sanitize_array(arrays.iat[i], index, dtype=dtype)
                    arrays.iat[i] = arr
            else:
                # GH#1783
                nan_dtype = np.dtype("object")
                val = construct_1d_arraylike_from_scalar(
                    np.nan, len(index), nan_dtype)
                arrays.loc[missing] = [val] * missing.sum()

        arrays = list(arrays)
        columns = ensure_index(columns)

    else:
        keys = list(data.keys())
        columns = Index(keys)
        arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
        arrays = [
            arr if not isinstance(arr, Index) else arr._data for arr in arrays
        ]

    if copy:
        if typ == "block":
            # We only need to copy arrays that will not get consolidated, i.e.
            #  only EA arrays
            arrays = [
                x.copy() if isinstance(x, ExtensionArray) else x
                for x in arrays
            ]
        else:
            # dtype check to exclude e.g. range objects, scalars
            arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays]

    return arrays_to_mgr(arrays,
                         columns,
                         index,
                         dtype=dtype,
                         typ=typ,
                         consolidate=copy)
Exemplo n.º 4
0
def dict_to_mgr(data: Dict, index, columns, dtype: Optional[DtypeObj],
                typ: str) -> Manager:
    """
    Segregate Series based on type and coerce into matrices.
    Needs to handle a lot of exceptional cases.

    Used in DataFrame.__init__
    """
    arrays: Union[Sequence[Any], Series]

    if columns is not None:
        from pandas.core.series import Series

        arrays = Series(data, index=columns, dtype=object)
        data_names = arrays.index

        missing = arrays.isna()
        if index is None:
            # GH10856
            # raise ValueError if only scalars in dict
            index = extract_index(arrays[~missing])
        else:
            index = ensure_index(index)

        # no obvious "empty" int column
        if missing.any() and not is_integer_dtype(dtype):
            if dtype is None or (
                    not is_extension_array_dtype(dtype)
                    # error: Argument 1 to "issubdtype" has incompatible type
                    # "Union[dtype, ExtensionDtype]"; expected "Union[dtype, None,
                    # type, _SupportsDtype, str, Tuple[Any, int], Tuple[Any,
                    # Union[int, Sequence[int]]], List[Any], _DtypeDict, Tuple[Any,
                    # Any]]"
                    and np.issubdtype(dtype,
                                      np.flexible)  # type: ignore[arg-type]
            ):
                # GH#1783

                # error: Value of type variable "_DTypeScalar" of "dtype" cannot be
                # "object"
                nan_dtype = np.dtype(object)  # type: ignore[type-var]
            else:
                # error: Incompatible types in assignment (expression has type
                # "Union[dtype, ExtensionDtype]", variable has type "dtype")
                nan_dtype = dtype  # type: ignore[assignment]
            val = construct_1d_arraylike_from_scalar(np.nan, len(index),
                                                     nan_dtype)
            arrays.loc[missing] = [val] * missing.sum()

    else:
        keys = list(data.keys())
        columns = data_names = Index(keys)
        arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
        # GH#24096 need copy to be deep for datetime64tz case
        # TODO: See if we can avoid these copies
        arrays = [
            arr if not isinstance(arr, ABCIndex) else arr._data
            for arr in arrays
        ]
        arrays = [
            arr if not is_datetime64tz_dtype(arr) else arr.copy()
            for arr in arrays
        ]
    return arrays_to_mgr(arrays,
                         data_names,
                         index,
                         columns,
                         dtype=dtype,
                         typ=typ)
Exemplo n.º 5
0
def dict_to_mgr(
    data: dict,
    index,
    columns,
    *,
    dtype: DtypeObj | None = None,
    typ: str = "block",
    copy: bool = True,
) -> Manager:
    """
    Segregate Series based on type and coerce into matrices.
    Needs to handle a lot of exceptional cases.

    Used in DataFrame.__init__
    """
    arrays: Sequence[Any] | Series

    if columns is not None:
        from pandas.core.series import Series

        arrays = Series(data, index=columns, dtype=object)
        data_names = arrays.index
        missing = arrays.isna()
        if index is None:
            # GH10856
            # raise ValueError if only scalars in dict
            index = _extract_index(arrays[~missing])
        else:
            index = ensure_index(index)

        # no obvious "empty" int column
        if missing.any() and not is_integer_dtype(dtype):
            nan_dtype: DtypeObj

            if dtype is None or (isinstance(dtype, np.dtype)
                                 and np.issubdtype(dtype, np.flexible)):
                # GH#1783
                nan_dtype = np.dtype("object")
            else:
                nan_dtype = dtype
            val = construct_1d_arraylike_from_scalar(np.nan, len(index),
                                                     nan_dtype)
            arrays.loc[missing] = [val] * missing.sum()

        arrays = list(arrays)

    else:
        keys = list(data.keys())
        columns = data_names = Index(keys)
        arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
        # GH#24096 need copy to be deep for datetime64tz case
        # TODO: See if we can avoid these copies
        arrays = [
            arr if not isinstance(arr, ABCIndex) else arr._data
            for arr in arrays
        ]
        arrays = [
            arr if not is_datetime64tz_dtype(arr) else arr.copy()
            for arr in arrays
        ]

    if copy:
        # arrays_to_mgr (via form_blocks) won't make copies for EAs
        # dtype attr check to exclude EADtype-castable strs
        arrays = [
            x if not hasattr(x, "dtype")
            or not isinstance(x.dtype, ExtensionDtype) else x.copy()
            for x in arrays
        ]
        # TODO: can we get rid of the dt64tz special case above?

    return arrays_to_mgr(arrays,
                         data_names,
                         index,
                         columns,
                         dtype=dtype,
                         typ=typ,
                         consolidate=copy)