    def test_1d_bool(self):
        arr = np.array([0, 1, 0], dtype=bool)

        result = algos.take_1d(arr, [0, 2, 2, 1])
        expected = arr.take([0, 2, 2, 1])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.take_1d(arr, [0, 2, -1])
        assert result.dtype == np.object_
    def wrapper(left, right, name=name, na_op=na_op):

        if isinstance(right, pd.DataFrame):
            return NotImplemented

        time_converted = _TimeOp.maybe_convert_for_time_op(left, right, name,

        if time_converted is None:
            lvalues, rvalues = left, right
            dtype = None
            wrap_results = lambda x: x
        elif time_converted is NotImplemented:
            return NotImplemented
            left, right = time_converted.left, time_converted.right
            lvalues, rvalues = time_converted.lvalues, time_converted.rvalues
            dtype = time_converted.dtype
            wrap_results = time_converted.wrap_results
            na_op = time_converted.na_op

        if isinstance(rvalues, ABCSeries):
            rindex = getattr(rvalues, 'index', rvalues)
            name = _maybe_match_name(left, rvalues)
            lvalues = getattr(lvalues, 'values', lvalues)
            rvalues = getattr(rvalues, 'values', rvalues)
            if left.index.equals(rindex):
                index = left.index
                index, lidx, ridx = left.index.join(rindex, how='outer',

                if lidx is not None:
                    lvalues = algos.take_1d(lvalues, lidx)

                if ridx is not None:
                    rvalues = algos.take_1d(rvalues, ridx)

            arr = na_op(lvalues, rvalues)

            return left._constructor(wrap_results(arr), index=index,
                                     name=name, dtype=dtype)
            # scalars
            if (hasattr(lvalues, 'values') and
                    not isinstance(lvalues, pd.DatetimeIndex)):
                lvalues = lvalues.values

            return left._constructor(wrap_results(na_op(lvalues, rvalues)),
                                     index=left.index, name=left.name,
文件: merge.py 项目: clamus/pandas
    def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
        # insert group keys

        keys = zip(self.join_names, self.left_on, self.right_on)
        for i, (name, lname, rname) in enumerate(keys):
            if not _should_fill(lname, rname):

            if name in result:
                key_indexer = result.columns.get_loc(name)

                if left_indexer is not None and right_indexer is not None:

                    if name in self.left:
                        if len(self.left) == 0:

                        na_indexer = (left_indexer == -1).nonzero()[0]
                        if len(na_indexer) == 0:

                        right_na_indexer = right_indexer.take(na_indexer)
                        result.iloc[na_indexer, key_indexer] = (
                    elif name in self.right:
                        if len(self.right) == 0:

                        na_indexer = (right_indexer == -1).nonzero()[0]
                        if len(na_indexer) == 0:

                        left_na_indexer = left_indexer.take(na_indexer)
                        result.iloc[na_indexer, key_indexer] = (
            elif left_indexer is not None \
                    and isinstance(self.left_join_keys[i], np.ndarray):

                if name is None:
                    name = 'key_%d' % i

                # a faster way?
                key_col = algos.take_1d(self.left_join_keys[i], left_indexer)
                na_indexer = (left_indexer == -1).nonzero()[0]
                right_na_indexer = right_indexer.take(na_indexer)
                key_col.put(na_indexer, algos.take_1d(self.right_join_keys[i],
                result.insert(i, name, key_col)
    def test_1d_fill_nonna(self, dtype_fill_out_dtype):
        dtype, fill_value, out_dtype = dtype_fill_out_dtype
        data = np.random.randint(0, 2, 4).astype(dtype)
        indexer = [2, 1, 0, -1]

        result = algos.take_1d(data, indexer, fill_value=fill_value)
        assert ((result[[0, 1, 2]] == data[[2, 1, 0]]).all())
        assert (result[3] == fill_value)
        assert (result.dtype == out_dtype)

        indexer = [2, 1, 0, 1]

        result = algos.take_1d(data, indexer, fill_value=fill_value)
        assert ((result[[0, 1, 2, 3]] == data[indexer]).all())
        assert (result.dtype == dtype)
    def _delegate_property_get(self, name):
        from pandas import Series

        result = getattr(self.values, name)

        # maybe need to upcast (ints)
        if isinstance(result, np.ndarray):
            if is_integer_dtype(result):
                result = result.astype('int64')
        elif not is_list_like(result):
            return result

        result = np.asarray(result)

        # blow up if we operate on categories
        if self.orig is not None:
            result = take_1d(result, self.orig.cat.codes)

        # return the result as a Series, which is by definition a copy
        result = Series(result, index=self.index, name=self.name)

        # setting this object will show a SettingWithCopyWarning/Error
        result.is_copy = ("modifications to a property of a datetimelike "
                          "object are not supported and are discarded. "
                          "Change values on the original.")

        return result
    def get_indexer(self, target, method=None, limit=None, tolerance=None):
        from pandas.core.arrays.categorical import _recode_for_categories

        method = missing.clean_reindex_fill_method(method)
        target = ibase._ensure_index(target)

        if self.is_unique and self.equals(target):
            return np.arange(len(self), dtype='intp')

        if method == 'pad' or method == 'backfill':
            raise NotImplementedError("method='pad' and method='backfill' not "
                                      "implemented yet for CategoricalIndex")
        elif method == 'nearest':
            raise NotImplementedError("method='nearest' not implemented yet "
                                      'for CategoricalIndex')

        if (isinstance(target, CategoricalIndex) and
            if self.values.equals(target.values):
                # we have the same codes
                codes = target.codes
                codes = _recode_for_categories(target.codes,
            if isinstance(target, CategoricalIndex):
                code_indexer = self.categories.get_indexer(target.categories)
                codes = take_1d(code_indexer, target.codes, fill_value=-1)
                codes = self.categories.get_indexer(target)

        indexer, _ = self._engine.get_indexer_non_unique(codes)
        return _ensure_platform_int(indexer)
    def _reindex_with_indexers(self, reindexers, method=None, fill_value=None,
                               limit=None, copy=False, allow_dups=False):

        if method is not None or limit is not None:
            raise NotImplementedError("cannot reindex with a method or limit "
                                      "with sparse")

        if fill_value is None:
            fill_value = np.nan

        index, row_indexer = reindexers.get(0, (None, None))
        columns, col_indexer = reindexers.get(1, (None, None))

        if columns is None:
            columns = self.columns

        new_arrays = {}
        for col in columns:
            if col not in self:
            if row_indexer is not None:
                new_arrays[col] = algos.take_1d(self[col].get_values(),
                new_arrays[col] = self[col]

        return self._constructor(new_arrays, index=index,
def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None):
    if columns is None:
        columns = _get_objs_combined_axis(data, sort=False)

    indexer_cache = {}

    aligned_values = []
    for s in data:
        index = getattr(s, 'index', None)
        if index is None:
            index = ibase.default_index(len(s))

        if id(index) in indexer_cache:
            indexer = indexer_cache[id(index)]
            indexer = indexer_cache[id(index)] = index.get_indexer(columns)

        values = com.values_from_object(s)
        aligned_values.append(algorithms.take_1d(values, indexer))

    values = np.vstack(aligned_values)

    if values.dtype == np.object_:
        content = list(values.T)
        return _convert_object_array(content, columns, dtype=dtype,
        return values.T, columns
    def test_1d_other_dtypes(self):
        arr = np.random.randn(10).astype(np.float32)

        indexer = [1, 2, 3, -1]
        result = algos.take_1d(arr, indexer)
        expected = arr.take(indexer)
        expected[-1] = np.nan
        tm.assert_almost_equal(result, expected)
def _take_new_index(obj, indexer, new_index, axis=0):
    from pandas.core.api import Series, DataFrame

    if isinstance(obj, Series):
        new_values = algos.take_1d(obj.values, indexer)
        return Series(new_values, index=new_index, name=obj.name)
    elif isinstance(obj, DataFrame):
        if axis == 1:
            raise NotImplementedError("axis 1 is not supported")
        return DataFrame(obj._data.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1))
        raise ValueError("'obj' should be either a Series or a DataFrame")
    def test_1d_with_out(self, dtype_can_hold_na, writeable):
        dtype, can_hold_na = dtype_can_hold_na

        data = np.random.randint(0, 2, 4).astype(dtype)
        data.flags.writeable = writeable

        indexer = [2, 1, 0, 1]
        out = np.empty(4, dtype=dtype)
        algos.take_1d(data, indexer, out=out)

        expected = data.take(indexer)
        tm.assert_almost_equal(out, expected)

        indexer = [2, 1, 0, -1]
        out = np.empty(4, dtype=dtype)

        if can_hold_na:
            algos.take_1d(data, indexer, out=out)
            expected = data.take(indexer)
            expected[3] = np.nan
            tm.assert_almost_equal(out, expected)
            with pytest.raises(TypeError, match=self.fill_error):
                algos.take_1d(data, indexer, out=out)

            # No Exception otherwise.
            data.take(indexer, out=out)
def union_categoricals(to_union):
    Combine list-like of Categoricals, unioning categories. All
    must have the same dtype, and none can be ordered.

    .. versionadded:: 0.19.0

    to_union : list-like of Categoricals

       A single array, categories will be ordered as they
       appear in the list

        If any of the categoricals are ordered or all do not
        have the same dtype
        Emmpty list of categoricals passed
    from pandas import Index, Categorical

    if len(to_union) == 0:
        raise ValueError("No Categoricals to union")

    first = to_union[0]
    if any(c.ordered for c in to_union):
        raise TypeError("Can only combine unordered Categoricals")

    if not all(is_dtype_equal(c.categories.dtype, first.categories.dtype) for c in to_union):
        raise TypeError("dtype of categories must be the same")

    cats = first.categories
    unique_cats = cats.append([c.categories for c in to_union[1:]]).unique()
    categories = Index(unique_cats)

    new_codes = []
    for c in to_union:
        if len(c.categories) > 0:
            indexer = categories.get_indexer(c.categories)
            new_codes.append(take_1d(indexer, c.codes, fill_value=-1))
            # must be all NaN

    new_codes = np.concatenate(new_codes)
    return Categorical(new_codes, categories=categories, ordered=False, fastpath=True)
        def _test_dtype(dtype, can_hold_na, writeable=True):
            data = np.random.randint(0, 2, 4).astype(dtype)
            data.flags.writeable = writeable

            indexer = [2, 1, 0, 1]
            out = np.empty(4, dtype=dtype)
            algos.take_1d(data, indexer, out=out)
            expected = data.take(indexer)
            tm.assert_almost_equal(out, expected)

            indexer = [2, 1, 0, -1]
            out = np.empty(4, dtype=dtype)
            if can_hold_na:
                algos.take_1d(data, indexer, out=out)
                expected = data.take(indexer)
                expected[3] = np.nan
                tm.assert_almost_equal(out, expected)
                with tm.assertRaisesRegexp(TypeError, self.fill_error):
                    algos.take_1d(data, indexer, out=out)
                # no exception o/w
                data.take(indexer, out=out)
 def parallel_take1d():
     take_1d(df["col"].values, indexer)
文件: base.py 项目: zheewang/pandas
    def _map_values(self, mapper, na_action=None):
        """An internal function that maps values using the input
        correspondence (which can be a dict, Series, or function).

        mapper : function, dict, or Series
            The input correspondence object
        na_action : {None, 'ignore'}
            If 'ignore', propagate NA values, without passing them to the
            mapping function

        applied : Union[Index, MultiIndex], inferred
            The output of the mapping function applied to the index.
            If the function returns a tuple with more than one element
            a MultiIndex will be returned.


        # we can fastpath dict/Series to an efficient map
        # as we know that we are not going to have to yield
        # python types
        if isinstance(mapper, dict):
            if hasattr(mapper, '__missing__'):
                # If a dictionary subclass defines a default value method,
                # convert mapper to a lookup function (GH #15999).
                dict_with_default = mapper
                mapper = lambda x: dict_with_default[x]
                # Dictionary does not have a default. Thus it's safe to
                # convert to an Series for efficiency.
                # we specify the keys here to handle the
                # possibility that they are tuples
                from pandas import Series
                mapper = Series(mapper)

        if isinstance(mapper, ABCSeries):
            # Since values were input this means we came from either
            # a dict or a series and mapper should be an index
            if is_extension_type(self.dtype):
                values = self._values
                values = self.values

            indexer = mapper.index.get_indexer(values)
            new_values = algorithms.take_1d(mapper._values, indexer)

            return new_values

        # we must convert to python types
        if is_extension_type(self.dtype):
            values = self._values
            if na_action is not None:
                raise NotImplementedError
            map_f = lambda values, f: values.map(f)
            values = self.astype(object)
            values = getattr(values, 'values', values)
            if na_action == 'ignore':

                def map_f(values, f):
                    return lib.map_infer_mask(values, f,
                map_f = lib.map_infer

        # mapper is a function
        new_values = map_f(values, mapper)

        return new_values
文件: concat.py 项目: zhuw1989/pandas
def union_categoricals(to_union, sort_categories=False, ignore_order=False):
    Combine list-like of Categorical-like, unioning categories. All
    categories must have the same dtype.

    .. versionadded:: 0.19.0

    to_union : list-like of Categorical, CategoricalIndex,
               or Series with dtype='category'
    sort_categories : boolean, default False
        If true, resulting categories will be lexsorted, otherwise
        they will be ordered as they appear in the data.
    ignore_order: boolean, default False
        If true, the ordered attribute of the Categoricals will be ignored.
        Results in an unordered categorical.

        .. versionadded:: 0.20.0

    result : Categorical

        - all inputs do not have the same dtype
        - all inputs do not have the same ordered property
        - all inputs are ordered and their categories are not identical
        - sort_categories=True and Categoricals are ordered
        Empty list of categoricals passed


    To learn more about categories, see `link


    >>> from pandas.api.types import union_categoricals

    If you want to combine categoricals that do not necessarily have
    the same categories, `union_categoricals` will combine a list-like
    of categoricals. The new categories will be the union of the
    categories being combined.

    >>> a = pd.Categorical(["b", "c"])
    >>> b = pd.Categorical(["a", "b"])
    >>> union_categoricals([a, b])
    [b, c, a, b]
    Categories (3, object): [b, c, a]

    By default, the resulting categories will be ordered as they appear
    in the `categories` of the data. If you want the categories to be
    lexsorted, use `sort_categories=True` argument.

    >>> union_categoricals([a, b], sort_categories=True)
    [b, c, a, b]
    Categories (3, object): [a, b, c]

    `union_categoricals` also works with the case of combining two
    categoricals of the same categories and order information (e.g. what
    you could also `append` for).

    >>> a = pd.Categorical(["a", "b"], ordered=True)
    >>> b = pd.Categorical(["a", "b", "a"], ordered=True)
    >>> union_categoricals([a, b])
    [a, b, a, b, a]
    Categories (2, object): [a < b]

    Raises `TypeError` because the categories are ordered and not identical.

    >>> a = pd.Categorical(["a", "b"], ordered=True)
    >>> b = pd.Categorical(["a", "b", "c"], ordered=True)
    >>> union_categoricals([a, b])
    TypeError: to union ordered Categoricals, all categories must be the same

    New in version 0.20.0

    Ordered categoricals with different categories or orderings can be
    combined by using the `ignore_ordered=True` argument.

    >>> a = pd.Categorical(["a", "b", "c"], ordered=True)
    >>> b = pd.Categorical(["c", "b", "a"], ordered=True)
    >>> union_categoricals([a, b], ignore_order=True)
    [a, b, c, c, b, a]
    Categories (3, object): [a, b, c]

    `union_categoricals` also works with a `CategoricalIndex`, or `Series`
    containing categorical data, but note that the resulting array will
    always be a plain `Categorical`

    >>> a = pd.Series(["b", "c"], dtype='category')
    >>> b = pd.Series(["a", "b"], dtype='category')
    >>> union_categoricals([a, b])
    [b, c, a, b]
    Categories (3, object): [b, c, a]
    from pandas import Index, Categorical, CategoricalIndex, Series
    from pandas.core.categorical import _recode_for_categories

    if len(to_union) == 0:
        raise ValueError('No Categoricals to union')

    def _maybe_unwrap(x):
        if isinstance(x, (CategoricalIndex, Series)):
            return x.values
        elif isinstance(x, Categorical):
            return x
            raise TypeError("all components to combine must be Categorical")

    to_union = [_maybe_unwrap(x) for x in to_union]
    first = to_union[0]

    if not all(
            is_dtype_equal(other.categories.dtype, first.categories.dtype)
            for other in to_union[1:]):
        raise TypeError("dtype of categories must be the same")

    ordered = False
    if all(first.is_dtype_equal(other) for other in to_union[1:]):
        # identical categories - fastpath
        categories = first.categories
        ordered = first.ordered
        new_codes = np.concatenate([c.codes for c in to_union])

        if sort_categories and not ignore_order and ordered:
            raise TypeError("Cannot use sort_categories=True with "
                            "ordered Categoricals")

        if sort_categories and not categories.is_monotonic_increasing:
            categories = categories.sort_values()
            indexer = categories.get_indexer(first.categories)

            from pandas.core.algorithms import take_1d
            new_codes = take_1d(indexer, new_codes, fill_value=-1)
    elif ignore_order or all(not c.ordered for c in to_union):
        # different categories - union and recode
        cats = first.categories.append([c.categories for c in to_union[1:]])
        categories = Index(cats.unique())
        if sort_categories:
            categories = categories.sort_values()

        new_codes = []
        for c in to_union:
                _recode_for_categories(c.codes, c.categories, categories))
        new_codes = np.concatenate(new_codes)
        # ordered - to show a proper error message
        if all(c.ordered for c in to_union):
            msg = ("to union ordered Categoricals, "
                   "all categories must be the same")
            raise TypeError(msg)
            raise TypeError('Categorical.ordered must be the same')

    if ignore_order:
        ordered = False

    return Categorical(new_codes,
文件: base.py 项目: japython/pandas
    def _map_values(self, mapper, na_action=None):
        An internal function that maps values using the input
        correspondence (which can be a dict, Series, or function).

        mapper : function, dict, or Series
            The input correspondence object
        na_action : {None, 'ignore'}
            If 'ignore', propagate NA values, without passing them to the
            mapping function

        Union[Index, MultiIndex], inferred
            The output of the mapping function applied to the index.
            If the function returns a tuple with more than one element
            a MultiIndex will be returned.
        # we can fastpath dict/Series to an efficient map
        # as we know that we are not going to have to yield
        # python types
        if is_dict_like(mapper):
            if isinstance(mapper, dict) and hasattr(mapper, "__missing__"):
                # If a dictionary subclass defines a default value method,
                # convert mapper to a lookup function (GH #15999).
                dict_with_default = mapper
                mapper = lambda x: dict_with_default[x]
                # Dictionary does not have a default. Thus it's safe to
                # convert to an Series for efficiency.
                # we specify the keys here to handle the
                # possibility that they are tuples

                # The return value of mapping with an empty mapper is
                # expected to be pd.Series(np.nan, ...). As np.nan is
                # of dtype float64 the return value of this method should
                # be float64 as well
                mapper = create_series_with_explicit_dtype(
                    mapper, dtype_if_empty=np.float64

        if isinstance(mapper, ABCSeries):
            # Since values were input this means we came from either
            # a dict or a series and mapper should be an index
            if is_categorical_dtype(self._values):
                # use the built in categorical series mapper which saves
                # time by mapping the categories instead of all values
                return self._values.map(mapper)
            if is_extension_array_dtype(self.dtype):
                values = self._values
                values = self.values

            indexer = mapper.index.get_indexer(values)
            new_values = algorithms.take_1d(mapper._values, indexer)

            return new_values

        # we must convert to python types
        if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"):
            # GH#23179 some EAs do not have `map`
            values = self._values
            if na_action is not None:
                raise NotImplementedError
            map_f = lambda values, f: values.map(f)
            values = self.astype(object)
            values = getattr(values, "values", values)
            if na_action == "ignore":

                def map_f(values, f):
                    return lib.map_infer_mask(values, f, isna(values).view(np.uint8))

                map_f = lib.map_infer

        # mapper is a function
        new_values = map_f(values, mapper)

        return new_values
 def parallel_take1d():
     take_1d(df['col'].values, indexer)
文件: concat.py 项目: zmyer/pandas
def union_categoricals(to_union, sort_categories=False, ignore_order=False):
    Combine list-like of Categorical-like, unioning categories. All
    categories must have the same dtype.

    .. versionadded:: 0.19.0

    to_union : list-like of Categorical, CategoricalIndex,
               or Series with dtype='category'
    sort_categories : boolean, default False
        If true, resulting categories will be lexsorted, otherwise
        they will be ordered as they appear in the data.
    ignore_order: boolean, default False
        If true, the ordered attribute of the Categoricals will be ignored.
        Results in an unordered categorical.

        .. versionadded:: 0.20.0

    result : Categorical

        - all inputs do not have the same dtype
        - all inputs do not have the same ordered property
        - all inputs are ordered and their categories are not identical
        - sort_categories=True and Categoricals are ordered
        Empty list of categoricals passed
    from pandas import Index, Categorical, CategoricalIndex, Series

    if len(to_union) == 0:
        raise ValueError('No Categoricals to union')

    def _maybe_unwrap(x):
        if isinstance(x, (CategoricalIndex, Series)):
            return x.values
        elif isinstance(x, Categorical):
            return x
            raise TypeError("all components to combine must be Categorical")

    to_union = [_maybe_unwrap(x) for x in to_union]
    first = to_union[0]

    if not all(
            is_dtype_equal(other.categories.dtype, first.categories.dtype)
            for other in to_union[1:]):
        raise TypeError("dtype of categories must be the same")

    ordered = False
    if all(first.is_dtype_equal(other) for other in to_union[1:]):
        # identical categories - fastpath
        categories = first.categories
        ordered = first.ordered
        new_codes = np.concatenate([c.codes for c in to_union])

        if sort_categories and not ignore_order and ordered:
            raise TypeError("Cannot use sort_categories=True with "
                            "ordered Categoricals")

        if sort_categories and not categories.is_monotonic_increasing:
            categories = categories.sort_values()
            indexer = categories.get_indexer(first.categories)
            new_codes = take_1d(indexer, new_codes, fill_value=-1)
    elif ignore_order or all(not c.ordered for c in to_union):
        # different categories - union and recode
        cats = first.categories.append([c.categories for c in to_union[1:]])
        categories = Index(cats.unique())
        if sort_categories:
            categories = categories.sort_values()

        new_codes = []
        for c in to_union:
            if len(c.categories) > 0:
                indexer = categories.get_indexer(c.categories)
                new_codes.append(take_1d(indexer, c.codes, fill_value=-1))
                # must be all NaN
        new_codes = np.concatenate(new_codes)
        # ordered - to show a proper error message
        if all(c.ordered for c in to_union):
            msg = ("to union ordered Categoricals, "
                   "all categories must be the same")
            raise TypeError(msg)
            raise TypeError('Categorical.ordered must be the same')

    if ignore_order:
        ordered = False

    return Categorical(new_codes,
 def get_level_values(num):
     unique = vals.levels[num]  # .values
     labels = vals.labels[num]
     filled = algos.take_1d(unique._values, labels,
     return filled
 def parallel_take1d():
     take_1d(df['col'].values, indexer)
文件: concat.py 项目: zer0kg/pandas
def _get_mgr_concatenation_plan(mgr, indexers):
    Construct concatenation plan for given block manager and indexers.

    mgr : BlockManager
    indexers : dict of {axis: indexer}

    plan : list of (BlockPlacement, JoinUnit) tuples

    # Calculate post-reindex shape , save for item axis which will be separate
    # for each block anyway.
    mgr_shape_list = list(mgr.shape)
    for ax, indexer in indexers.items():
        mgr_shape_list[ax] = len(indexer)
    mgr_shape = tuple(mgr_shape_list)

    if 0 in indexers:
        ax0_indexer = indexers.pop(0)
        blknos = algos.take_1d(mgr.blknos, ax0_indexer, fill_value=-1)
        blklocs = algos.take_1d(mgr.blklocs, ax0_indexer, fill_value=-1)

        if mgr._is_single_block:
            blk = mgr.blocks[0]
            return [(blk.mgr_locs, JoinUnit(blk, mgr_shape, indexers))]

        ax0_indexer = None
        blknos = mgr.blknos
        blklocs = mgr.blklocs

    plan = []
    for blkno, placements in libinternals.get_blkno_placements(blknos,

        assert placements.is_slice_like

        join_unit_indexers = indexers.copy()

        shape_list = list(mgr_shape)
        shape_list[0] = len(placements)
        shape = tuple(shape_list)

        if blkno == -1:
            unit = JoinUnit(None, shape)
            blk = mgr.blocks[blkno]
            ax0_blk_indexer = blklocs[placements.indexer]

            unit_no_ax0_reindexing = (
                len(placements) == len(blk.mgr_locs) and
                # Fastpath detection of join unit not
                # needing to reindex its block: no ax0
                # reindexing took place and block
                # placement was sequential before.
                ((ax0_indexer is None and blk.mgr_locs.is_slice_like
                  and blk.mgr_locs.as_slice.step == 1) or
                 # Slow-ish detection: all indexer locs
                 # are sequential (and length match is
                 # checked above).
                 (np.diff(ax0_blk_indexer) == 1).all()))

            # Omit indexer if no item reindexing is required.
            if unit_no_ax0_reindexing:
                join_unit_indexers.pop(0, None)
                join_unit_indexers[0] = ax0_blk_indexer

            unit = JoinUnit(blk, shape, join_unit_indexers)

        plan.append((placements, unit))

    return plan
def union_categoricals(to_union, sort_categories=False, ignore_order=False):
    Combine list-like of Categorical-like, unioning categories. All
    categories must have the same dtype.

    .. versionadded:: 0.19.0

    to_union : list-like of Categorical, CategoricalIndex,
               or Series with dtype='category'
    sort_categories : boolean, default False
        If true, resulting categories will be lexsorted, otherwise
        they will be ordered as they appear in the data.
    ignore_order: boolean, default False
        If true, the ordered attribute of the Categoricals will be ignored.
        Results in an unordered categorical.

        .. versionadded:: 0.20.0

    result : Categorical

        - all inputs do not have the same dtype
        - all inputs do not have the same ordered property
        - all inputs are ordered and their categories are not identical
        - sort_categories=True and Categoricals are ordered
        Empty list of categoricals passed
    from pandas import Index, Categorical, CategoricalIndex, Series

    if len(to_union) == 0:
        raise ValueError('No Categoricals to union')

    def _maybe_unwrap(x):
        if isinstance(x, (CategoricalIndex, Series)):
            return x.values
        elif isinstance(x, Categorical):
            return x
            raise TypeError("all components to combine must be Categorical")

    to_union = [_maybe_unwrap(x) for x in to_union]
    first = to_union[0]

    if not all(is_dtype_equal(other.categories.dtype, first.categories.dtype)
               for other in to_union[1:]):
        raise TypeError("dtype of categories must be the same")

    ordered = False
    if all(first.is_dtype_equal(other) for other in to_union[1:]):
        # identical categories - fastpath
        categories = first.categories
        ordered = first.ordered
        new_codes = np.concatenate([c.codes for c in to_union])

        if sort_categories and not ignore_order and ordered:
            raise TypeError("Cannot use sort_categories=True with "
                            "ordered Categoricals")

        if sort_categories and not categories.is_monotonic_increasing:
            categories = categories.sort_values()
            indexer = categories.get_indexer(first.categories)
            new_codes = take_1d(indexer, new_codes, fill_value=-1)
    elif ignore_order or all(not c.ordered for c in to_union):
        # different categories - union and recode
        cats = first.categories.append([c.categories for c in to_union[1:]])
        categories = Index(cats.unique())
        if sort_categories:
            categories = categories.sort_values()

        new_codes = []
        for c in to_union:
            if len(c.categories) > 0:
                indexer = categories.get_indexer(c.categories)
                new_codes.append(take_1d(indexer, c.codes, fill_value=-1))
                # must be all NaN
        new_codes = np.concatenate(new_codes)
        # ordered - to show a proper error message
        if all(c.ordered for c in to_union):
            msg = ("to union ordered Categoricals, "
                   "all categories must be the same")
            raise TypeError(msg)
            raise TypeError('Categorical.ordered must be the same')

    if ignore_order:
        ordered = False

    return Categorical(new_codes, categories=categories, ordered=ordered,
文件: base.py 项目: xtowerlink/pandas
    def _map_values(self, mapper, na_action=None):
        An internal function that maps values using the input
        correspondence (which can be a dict, Series, or function).

        mapper : function, dict, or Series
            The input correspondence object
        na_action : {None, 'ignore'}
            If 'ignore', propagate NA values, without passing them to the
            mapping function

        Union[Index, MultiIndex], inferred
            The output of the mapping function applied to the index.
            If the function returns a tuple with more than one element
            a MultiIndex will be returned.
        # we can fastpath dict/Series to an efficient map
        # as we know that we are not going to have to yield
        # python types
        if is_dict_like(mapper):
            if isinstance(mapper, dict) and hasattr(mapper, "__missing__"):
                # If a dictionary subclass defines a default value method,
                # convert mapper to a lookup function (GH #15999).
                dict_with_default = mapper
                mapper = lambda x: dict_with_default[x]
                # Dictionary does not have a default. Thus it's safe to
                # convert to an Series for efficiency.
                # we specify the keys here to handle the
                # possibility that they are tuples

                # The return value of mapping with an empty mapper is
                # expected to be pd.Series(np.nan, ...). As np.nan is
                # of dtype float64 the return value of this method should
                # be float64 as well
                mapper = create_series_with_explicit_dtype(
                    mapper, dtype_if_empty=np.float64)

        if isinstance(mapper, ABCSeries):
            # Since values were input this means we came from either
            # a dict or a series and mapper should be an index
            if is_categorical_dtype(self.dtype):
                # use the built in categorical series mapper which saves
                # time by mapping the categories instead of all values

                # pandas\core\base.py:893: error: Incompatible types in
                # assignment (expression has type "Categorical", variable has
                # type "IndexOpsMixin")  [assignment]
                self = cast("Categorical", self)  # type: ignore[assignment]
                # pandas\core\base.py:894: error: Item "ExtensionArray" of
                # "Union[ExtensionArray, Any]" has no attribute "map"
                # [union-attr]
                return self._values.map(mapper)  # type: ignore[union-attr]

            values = self._values

            indexer = mapper.index.get_indexer(values)
            new_values = algorithms.take_1d(mapper._values, indexer)

            return new_values

        # we must convert to python types
        if is_extension_array_dtype(self.dtype) and hasattr(
                self._values, "map"):
            # GH#23179 some EAs do not have `map`
            values = self._values
            if na_action is not None:
                raise NotImplementedError
            map_f = lambda values, f: values.map(f)
            # pandas\core\base.py:1142: error: "IndexOpsMixin" has no attribute
            # "astype"  [attr-defined]
            values = self.astype(object)._values  # type: ignore[attr-defined]
            if na_action == "ignore":
                map_f = lambda values, f: lib.map_infer_mask(
                    values, f,
            elif na_action is None:
                map_f = lib.map_infer
                msg = ("na_action must either be 'ignore' or None, "
                       f"{na_action} was passed")
                raise ValueError(msg)

        # mapper is a function
        new_values = map_f(values, mapper)

        return new_values
文件: base.py 项目: MasonGallo/pandas
    def _map_values(self, mapper, na_action=None):
        """An internal function that maps values using the input
        correspondence (which can be a dict, Series, or function).

        mapper : function, dict, or Series
            The input correspondence object
        na_action : {None, 'ignore'}
            If 'ignore', propagate NA values, without passing them to the
            mapping function

        applied : Union[Index, MultiIndex], inferred
            The output of the mapping function applied to the index.
            If the function returns a tuple with more than one element
            a MultiIndex will be returned.


        # we can fastpath dict/Series to an efficient map
        # as we know that we are not going to have to yield
        # python types
        if isinstance(mapper, dict):
            if hasattr(mapper, '__missing__'):
                # If a dictionary subclass defines a default value method,
                # convert mapper to a lookup function (GH #15999).
                dict_with_default = mapper
                mapper = lambda x: dict_with_default[x]
                # Dictionary does not have a default. Thus it's safe to
                # convert to an Series for efficiency.
                # we specify the keys here to handle the
                # possibility that they are tuples
                from pandas import Series
                mapper = Series(mapper)

        if isinstance(mapper, ABCSeries):
            # Since values were input this means we came from either
            # a dict or a series and mapper should be an index
            if is_extension_type(self.dtype):
                values = self._values
                values = self.values

            indexer = mapper.index.get_indexer(values)
            new_values = algorithms.take_1d(mapper._values, indexer)

            return new_values

        # we must convert to python types
        if is_extension_type(self.dtype):
            values = self._values
            if na_action is not None:
                raise NotImplementedError
            map_f = lambda values, f: values.map(f)
            values = self.astype(object)
            values = getattr(values, 'values', values)
            if na_action == 'ignore':
                def map_f(values, f):
                    return lib.map_infer_mask(values, f,
                map_f = lib.map_infer

        # mapper is a function
        new_values = map_f(values, mapper)

        return new_values
 def _get_ilevel_values(index, level):
     # accept level number only
     unique = index.levels[level]
     level_codes = index.codes[level]
     filled = take_1d(unique._values, level_codes, fill_value=unique._na_value)
     return unique._shallow_copy(filled, name=index.names[level])
def union_categoricals(to_union, sort_categories=False, ignore_order=False):
    Combine list-like of Categorical-like, unioning categories. All
    categories must have the same dtype.

    .. versionadded:: 0.19.0

    to_union : list-like of Categorical, CategoricalIndex,
               or Series with dtype='category'
    sort_categories : boolean, default False
        If true, resulting categories will be lexsorted, otherwise
        they will be ordered as they appear in the data.
    ignore_order: boolean, default False
        If true, the ordered attribute of the Categoricals will be ignored.
        Results in an unordered categorical.

        .. versionadded:: 0.20.0

    result : Categorical

        - all inputs do not have the same dtype
        - all inputs do not have the same ordered property
        - all inputs are ordered and their categories are not identical
        - sort_categories=True and Categoricals are ordered
        Empty list of categoricals passed


    To learn more about categories, see `link


    >>> from pandas.api.types import union_categoricals

    If you want to combine categoricals that do not necessarily have
    the same categories, `union_categoricals` will combine a list-like
    of categoricals. The new categories will be the union of the
    categories being combined.

    >>> a = pd.Categorical(["b", "c"])
    >>> b = pd.Categorical(["a", "b"])
    >>> union_categoricals([a, b])
    [b, c, a, b]
    Categories (3, object): [b, c, a]

    By default, the resulting categories will be ordered as they appear
    in the `categories` of the data. If you want the categories to be
    lexsorted, use `sort_categories=True` argument.

    >>> union_categoricals([a, b], sort_categories=True)
    [b, c, a, b]
    Categories (3, object): [a, b, c]

    `union_categoricals` also works with the case of combining two
    categoricals of the same categories and order information (e.g. what
    you could also `append` for).

    >>> a = pd.Categorical(["a", "b"], ordered=True)
    >>> b = pd.Categorical(["a", "b", "a"], ordered=True)
    >>> union_categoricals([a, b])
    [a, b, a, b, a]
    Categories (2, object): [a < b]

    Raises `TypeError` because the categories are ordered and not identical.

    >>> a = pd.Categorical(["a", "b"], ordered=True)
    >>> b = pd.Categorical(["a", "b", "c"], ordered=True)
    >>> union_categoricals([a, b])
    TypeError: to union ordered Categoricals, all categories must be the same

    New in version 0.20.0

    Ordered categoricals with different categories or orderings can be
    combined by using the `ignore_ordered=True` argument.

    >>> a = pd.Categorical(["a", "b", "c"], ordered=True)
    >>> b = pd.Categorical(["c", "b", "a"], ordered=True)
    >>> union_categoricals([a, b], ignore_order=True)
    [a, b, c, c, b, a]
    Categories (3, object): [a, b, c]

    `union_categoricals` also works with a `CategoricalIndex`, or `Series`
    containing categorical data, but note that the resulting array will
    always be a plain `Categorical`

    >>> a = pd.Series(["b", "c"], dtype='category')
    >>> b = pd.Series(["a", "b"], dtype='category')
    >>> union_categoricals([a, b])
    [b, c, a, b]
    Categories (3, object): [b, c, a]
    from pandas import Index, Categorical, CategoricalIndex, Series
    from pandas.core.categorical import _recode_for_categories

    if len(to_union) == 0:
        raise ValueError('No Categoricals to union')

    def _maybe_unwrap(x):
        if isinstance(x, (CategoricalIndex, Series)):
            return x.values
        elif isinstance(x, Categorical):
            return x
            raise TypeError("all components to combine must be Categorical")

    to_union = [_maybe_unwrap(x) for x in to_union]
    first = to_union[0]

    if not all(is_dtype_equal(other.categories.dtype, first.categories.dtype)
               for other in to_union[1:]):
        raise TypeError("dtype of categories must be the same")

    ordered = False
    if all(first.is_dtype_equal(other) for other in to_union[1:]):
        # identical categories - fastpath
        categories = first.categories
        ordered = first.ordered
        new_codes = np.concatenate([c.codes for c in to_union])

        if sort_categories and not ignore_order and ordered:
            raise TypeError("Cannot use sort_categories=True with "
                            "ordered Categoricals")

        if sort_categories and not categories.is_monotonic_increasing:
            categories = categories.sort_values()
            indexer = categories.get_indexer(first.categories)

            from pandas.core.algorithms import take_1d
            new_codes = take_1d(indexer, new_codes, fill_value=-1)
    elif ignore_order or all(not c.ordered for c in to_union):
        # different categories - union and recode
        cats = first.categories.append([c.categories for c in to_union[1:]])
        categories = Index(cats.unique())
        if sort_categories:
            categories = categories.sort_values()

        new_codes = []
        for c in to_union:
            new_codes.append(_recode_for_categories(c.codes, c.categories,
        new_codes = np.concatenate(new_codes)
        # ordered - to show a proper error message
        if all(c.ordered for c in to_union):
            msg = ("to union ordered Categoricals, "
                   "all categories must be the same")
            raise TypeError(msg)
            raise TypeError('Categorical.ordered must be the same')

    if ignore_order:
        ordered = False

    return Categorical(new_codes, categories=categories, ordered=ordered,
def union_categoricals(to_union):
    Combine list-like of Categoricals, unioning categories. All
    must have the same dtype, and none can be ordered.

    .. versionadded:: 0.19.0

    to_union : list-like of Categoricals

       A single array, categories will be ordered as they
       appear in the list

        - all inputs do not have the same dtype
        - all inputs do not have the same ordered property
        - all inputs are ordered and their categories are not identical
        Emmpty list of categoricals passed
    from pandas import Index, Categorical

    if len(to_union) == 0:
        raise ValueError('No Categoricals to union')

    first = to_union[0]

    if not all(
            is_dtype_equal(c.categories.dtype, first.categories.dtype)
            for c in to_union):
        raise TypeError("dtype of categories must be the same")

    if all(first.is_dtype_equal(other) for other in to_union[1:]):
        return Categorical(np.concatenate([c.codes for c in to_union]),
    elif all(not c.ordered for c in to_union):
        # not ordered
        # to show a proper error message
        if all(c.ordered for c in to_union):
            msg = ("to union ordered Categoricals, "
                   "all categories must be the same")
            raise TypeError(msg)
            raise TypeError('Categorical.ordered must be the same')

    cats = first.categories
    unique_cats = cats.append([c.categories for c in to_union[1:]]).unique()
    categories = Index(unique_cats)

    new_codes = []
    for c in to_union:
        if len(c.categories) > 0:
            indexer = categories.get_indexer(c.categories)
            new_codes.append(take_1d(indexer, c.codes, fill_value=-1))
            # must be all NaN

    new_codes = np.concatenate(new_codes)
    return Categorical(new_codes,
def _(groupby):
    src = groupby._orig_obj
    ids, _, ngroup = groupby._orig_grouper.group_info
    out = algorithms.take_1d(groupby.obj._values, ids)
    return Series(out, index=src.index, name=src.name)
def get_mgr_concatenation_plan(mgr, indexers):
    Construct concatenation plan for given block manager and indexers.

    mgr : BlockManager
    indexers : dict of {axis: indexer}

    plan : list of (BlockPlacement, JoinUnit) tuples

    # Calculate post-reindex shape , save for item axis which will be separate
    # for each block anyway.
    mgr_shape = list(mgr.shape)
    for ax, indexer in indexers.items():
        mgr_shape[ax] = len(indexer)
    mgr_shape = tuple(mgr_shape)

    if 0 in indexers:
        ax0_indexer = indexers.pop(0)
        blknos = algos.take_1d(mgr._blknos, ax0_indexer, fill_value=-1)
        blklocs = algos.take_1d(mgr._blklocs, ax0_indexer, fill_value=-1)

        if mgr._is_single_block:
            blk = mgr.blocks[0]
            return [(blk.mgr_locs, JoinUnit(blk, mgr_shape, indexers))]

        ax0_indexer = None
        blknos = mgr._blknos
        blklocs = mgr._blklocs

    plan = []
    for blkno, placements in libinternals.get_blkno_placements(blknos,

        assert placements.is_slice_like

        join_unit_indexers = indexers.copy()

        shape = list(mgr_shape)
        shape[0] = len(placements)
        shape = tuple(shape)

        if blkno == -1:
            unit = JoinUnit(None, shape)
            blk = mgr.blocks[blkno]
            ax0_blk_indexer = blklocs[placements.indexer]

            unit_no_ax0_reindexing = (len(placements) == len(blk.mgr_locs) and
                                      # Fastpath detection of join unit not
                                      # needing to reindex its block: no ax0
                                      # reindexing took place and block
                                      # placement was sequential before.
                                      ((ax0_indexer is None and
                                        blk.mgr_locs.is_slice_like and
                                        blk.mgr_locs.as_slice.step == 1) or
                                       # Slow-ish detection: all indexer locs
                                       # are sequential (and length match is
                                       # checked above).
                                       (np.diff(ax0_blk_indexer) == 1).all()))

            # Omit indexer if no item reindexing is required.
            if unit_no_ax0_reindexing:
                join_unit_indexers.pop(0, None)
                join_unit_indexers[0] = ax0_blk_indexer

            unit = JoinUnit(blk, shape, join_unit_indexers)

        plan.append((placements, unit))

    return plan
文件: concat.py 项目: abrockwa/pandas
def union_categoricals(to_union):
    Combine list-like of Categoricals, unioning categories. All
    must have the same dtype, and none can be ordered.

    .. versionadded:: 0.19.0

    to_union : list-like of Categoricals

       A single array, categories will be ordered as they
       appear in the list

        - all inputs do not have the same dtype
        - all inputs do not have the same ordered property
        - all inputs are ordered and their categories are not identical
        Emmpty list of categoricals passed
    from pandas import Index, Categorical

    if len(to_union) == 0:
        raise ValueError('No Categoricals to union')

    first = to_union[0]

    if not all(is_dtype_equal(c.categories.dtype, first.categories.dtype)
               for c in to_union):
        raise TypeError("dtype of categories must be the same")

    if all(first.is_dtype_equal(other) for other in to_union[1:]):
        return Categorical(np.concatenate([c.codes for c in to_union]),
                           categories=first.categories, ordered=first.ordered,
    elif all(not c.ordered for c in to_union):
        # not ordered
        # to show a proper error message
        if all(c.ordered for c in to_union):
            msg = ("to union ordered Categoricals, "
                   "all categories must be the same")
            raise TypeError(msg)
            raise TypeError('Categorical.ordered must be the same')

    cats = first.categories
    unique_cats = cats.append([c.categories for c in to_union[1:]]).unique()
    categories = Index(unique_cats)

    new_codes = []
    for c in to_union:
        if len(c.categories) > 0:
            indexer = categories.get_indexer(c.categories)
            new_codes.append(take_1d(indexer, c.codes, fill_value=-1))
            # must be all NaN

    new_codes = np.concatenate(new_codes)
    return Categorical(new_codes, categories=categories, ordered=False,
def safe_sort(values,
    Sort ``values`` and reorder corresponding ``labels``.
    ``values`` should be unique if ``labels`` is not None.
    Safe for use with mixed types (int, str), orders ints before strs.

    .. versionadded:: 0.19.0

    values : list-like
        Sequence; must be unique if ``labels`` is not None.
    labels : list_like
        Indices to ``values``. All out of bound indices are treated as
        "not found" and will be masked with ``na_sentinel``.
    na_sentinel : int, default -1
        Value in ``labels`` to mark "not found".
        Ignored when ``labels`` is None.
    assume_unique : bool, default False
        When True, ``values`` are assumed to be unique, which can speed up
        the calculation. Ignored when ``labels`` is None.
    verify : bool, default True
        Check if labels are out of bound for the values and put out of bound
        labels equal to na_sentinel. If ``verify=False``, it is assumed there
        are no out of bound labels. Ignored when ``labels`` is None.

        .. versionadded:: 0.25.0

    ordered : ndarray
        Sorted ``values``
    new_labels : ndarray
        Reordered ``labels``; returned when ``labels`` is not None.

        * If ``values`` is not list-like or if ``labels`` is neither None
        nor list-like
        * If ``values`` cannot be sorted
        * If ``labels`` is not None and ``values`` contain duplicates.
    if not is_list_like(values):
        raise TypeError("Only list-like objects are allowed to be passed to"
                        "safe_sort as values")

    if (not isinstance(values, np.ndarray)
            and not is_extension_array_dtype(values)):
        # don't convert to string types
        dtype, _ = infer_dtype_from_array(values)
        values = np.asarray(values, dtype=dtype)

    def sort_mixed(values):
        # order ints before strings, safe in py3
        str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
        nums = np.sort(values[~str_pos])
        strs = np.sort(values[str_pos])
        return np.concatenate([nums, np.asarray(strs, dtype=object)])

    sorter = None
    if (not is_extension_array_dtype(values)
            and lib.infer_dtype(values, skipna=False) == 'mixed-integer'):
        # unorderable in py3 if mixed str/int
        ordered = sort_mixed(values)
            sorter = values.argsort()
            ordered = values.take(sorter)
        except TypeError:
            # try this anyway
            ordered = sort_mixed(values)

    # labels:

    if labels is None:
        return ordered

    if not is_list_like(labels):
        raise TypeError("Only list-like objects or None are allowed to be"
                        "passed to safe_sort as labels")
    labels = ensure_platform_int(np.asarray(labels))

    from pandas import Index
    if not assume_unique and not Index(values).is_unique:
        raise ValueError("values should be unique if labels is not None")

    if sorter is None:
        # mixed types
         _), values = algorithms._get_data_algo(values, algorithms._hashtables)
        t = hash_klass(len(values))
        sorter = ensure_platform_int(t.lookup(ordered))

    if na_sentinel == -1:
        # take_1d is faster, but only works for na_sentinels of -1
        order2 = sorter.argsort()
        new_labels = algorithms.take_1d(order2, labels, fill_value=-1)
        if verify:
            mask = (labels < -len(values)) | (labels >= len(values))
            mask = None
        reverse_indexer = np.empty(len(sorter), dtype=np.int_)
        reverse_indexer.put(sorter, np.arange(len(sorter)))
        # Out of bound indices will be masked with `na_sentinel` next, so we
        # may deal with them here without performance loss using `mode='wrap'`
        new_labels = reverse_indexer.take(labels, mode='wrap')

        mask = labels == na_sentinel
        if verify:
            mask = mask | (labels < -len(values)) | (labels >= len(values))

    if mask is not None:
        np.putmask(new_labels, mask, na_sentinel)

    return ordered, ensure_platform_int(new_labels)
def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False,
    Sort ``values`` and reorder corresponding ``labels``.
    ``values`` should be unique if ``labels`` is not None.
    Safe for use with mixed types (int, str), orders ints before strs.

    .. versionadded:: 0.19.0

    values : list-like
        Sequence; must be unique if ``labels`` is not None.
    labels : list_like
        Indices to ``values``. All out of bound indices are treated as
        "not found" and will be masked with ``na_sentinel``.
    na_sentinel : int, default -1
        Value in ``labels`` to mark "not found".
        Ignored when ``labels`` is None.
    assume_unique : bool, default False
        When True, ``values`` are assumed to be unique, which can speed up
        the calculation. Ignored when ``labels`` is None.
    verify : bool, default True
        Check if labels are out of bound for the values and put out of bound
        labels equal to na_sentinel. If ``verify=False``, it is assumed there
        are no out of bound labels. Ignored when ``labels`` is None.

        .. versionadded:: 0.25.0

    ordered : ndarray
        Sorted ``values``
    new_labels : ndarray
        Reordered ``labels``; returned when ``labels`` is not None.

        * If ``values`` is not list-like or if ``labels`` is neither None
        nor list-like
        * If ``values`` cannot be sorted
        * If ``labels`` is not None and ``values`` contain duplicates.
    if not is_list_like(values):
        raise TypeError("Only list-like objects are allowed to be passed to"
                        "safe_sort as values")

    if (not isinstance(values, np.ndarray)
            and not is_extension_array_dtype(values)):
        # don't convert to string types
        dtype, _ = infer_dtype_from_array(values)
        values = np.asarray(values, dtype=dtype)

    def sort_mixed(values):
        # order ints before strings, safe in py3
        str_pos = np.array([isinstance(x, str) for x in values],
        nums = np.sort(values[~str_pos])
        strs = np.sort(values[str_pos])
        return np.concatenate([nums, np.asarray(strs, dtype=object)])

    sorter = None
    if (not is_extension_array_dtype(values)
            and lib.infer_dtype(values, skipna=False) == 'mixed-integer'):
        # unorderable in py3 if mixed str/int
        ordered = sort_mixed(values)
            sorter = values.argsort()
            ordered = values.take(sorter)
        except TypeError:
            # try this anyway
            ordered = sort_mixed(values)

    # labels:

    if labels is None:
        return ordered

    if not is_list_like(labels):
        raise TypeError("Only list-like objects or None are allowed to be"
                        "passed to safe_sort as labels")
    labels = ensure_platform_int(np.asarray(labels))

    from pandas import Index
    if not assume_unique and not Index(values).is_unique:
        raise ValueError("values should be unique if labels is not None")

    if sorter is None:
        # mixed types
        (hash_klass, _), values = algorithms._get_data_algo(
            values, algorithms._hashtables)
        t = hash_klass(len(values))
        sorter = ensure_platform_int(t.lookup(ordered))

    if na_sentinel == -1:
        # take_1d is faster, but only works for na_sentinels of -1
        order2 = sorter.argsort()
        new_labels = algorithms.take_1d(order2, labels, fill_value=-1)
        if verify:
            mask = (labels < -len(values)) | (labels >= len(values))
            mask = None
        reverse_indexer = np.empty(len(sorter), dtype=np.int_)
        reverse_indexer.put(sorter, np.arange(len(sorter)))
        # Out of bound indices will be masked with `na_sentinel` next, so we
        # may deal with them here without performance loss using `mode='wrap'`
        new_labels = reverse_indexer.take(labels, mode='wrap')

        mask = labels == na_sentinel
        if verify:
            mask = mask | (labels < -len(values)) | (labels >= len(values))

    if mask is not None:
        np.putmask(new_labels, mask, na_sentinel)

    return ordered, ensure_platform_int(new_labels)
    def _maybe_add_join_keys(self, result, left_indexer, right_indexer):

        left_has_missing = None
        right_has_missing = None

        keys = zip(self.join_names, self.left_on, self.right_on)
        for i, (name, lname, rname) in enumerate(keys):
            if not _should_fill(lname, rname):

            take_left, take_right = None, None

            if name in result:

                if left_indexer is not None and right_indexer is not None:
                    if name in self.left:

                        if left_has_missing is None:
                            left_has_missing = any(left_indexer == -1)

                        if left_has_missing:
                            take_right = self.right_join_keys[i]

                            if not com.is_dtype_equal(result[name].dtype,
                                take_left = self.left[name]._values

                    elif name in self.right:

                        if right_has_missing is None:
                            right_has_missing = any(right_indexer == -1)

                        if right_has_missing:
                            take_left = self.left_join_keys[i]

                            if not com.is_dtype_equal(result[name].dtype,
                                take_right = self.right[name]._values

            elif left_indexer is not None \
                    and isinstance(self.left_join_keys[i], np.ndarray):

                take_left = self.left_join_keys[i]
                take_right = self.right_join_keys[i]

            if take_left is not None or take_right is not None:

                if take_left is None:
                    lvals = result[name]._values
                    lfill = na_value_for_dtype(take_left.dtype)
                    lvals = algos.take_1d(take_left, left_indexer,

                if take_right is None:
                    rvals = result[name]._values
                    rfill = na_value_for_dtype(take_right.dtype)
                    rvals = algos.take_1d(take_right, right_indexer,

                # if we have an all missing left_indexer
                # make sure to just use the right values
                mask = left_indexer == -1
                if mask.all():
                    key_col = rvals
                    key_col = Index(lvals).where(~mask, rvals)

                if name in result:
                    result[name] = key_col
                    result.insert(i, name or 'key_%d' % i, key_col)