Exemplo n.º 1
0
def isin(comps, values):
    """
    Compute the isin boolean array

    Parameters
    ----------
    comps: array-like
    values: array-like

    Returns
    -------
    boolean array same length as comps
    """

    if not com.is_list_like(comps):
        raise TypeError(
            "only list-like objects are allowed to be passed"
            " to isin(), you passed a "
            "[{0}]".format(type(comps).__name__)
        )
    comps = np.asarray(comps)
    if not com.is_list_like(values):
        raise TypeError(
            "only list-like objects are allowed to be passed"
            " to isin(), you passed a "
            "[{0}]".format(type(values).__name__)
        )

    # GH11232
    # work-around for numpy < 1.8 and comparisions on py3
    # faster for larger cases to use np.in1d
    if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000:
        f = lambda x, y: np.in1d(x, np.asarray(list(y)))
    else:
        f = lambda x, y: lib.ismember_int64(x, set(y))

    # may need i8 conversion for proper membership testing
    if com.is_datetime64_dtype(comps):
        from pandas.tseries.tools import to_datetime

        values = to_datetime(values)._values.view("i8")
        comps = comps.view("i8")
    elif com.is_timedelta64_dtype(comps):
        from pandas.tseries.timedeltas import to_timedelta

        values = to_timedelta(values)._values.view("i8")
        comps = comps.view("i8")
    elif com.is_int64_dtype(comps):
        pass
    else:
        f = lambda x, y: lib.ismember(x, set(values))

    return f(comps, values)
Exemplo n.º 2
0
    def _get_wom_rule(self):
        wdiffs = unique(np.diff(self.index.week))
        if not lib.ismember(wdiffs, set([4, 5])).all():
            return None

        weekdays = unique(self.index.weekday)
        if len(weekdays) > 1:
            return None

        # get which week
        week = (self.index[0].day - 1) // 7 + 1
        wd = _weekday_rule_aliases[weekdays[0]]

        return "WOM-%d%s" % (week, wd)
    def _get_wom_rule(self):
        wdiffs = unique(np.diff(self.index.week))
        if not lib.ismember(wdiffs, set([4, 5])).all():
            return None

        weekdays = unique(self.index.weekday)
        if len(weekdays) > 1:
            return None

        # get which week
        week = (self.index[0].day - 1) // 7 + 1
        wd = _weekday_rule_aliases[weekdays[0]]

        return 'WOM-%d%s' % (week, wd)
Exemplo n.º 4
0
def isin(comps, values):
    """
    Compute the isin boolean array

    Parameters
    ----------
    comps: array-like
    values: array-like

    Returns
    -------
    boolean array same length as comps
    """

    if not is_list_like(comps):
        raise TypeError("only list-like objects are allowed to be passed"
                        " to isin(), you passed a "
                        "[{0}]".format(type(comps).__name__))
    comps = np.asarray(comps)
    if not is_list_like(values):
        raise TypeError("only list-like objects are allowed to be passed"
                        " to isin(), you passed a "
                        "[{0}]".format(type(values).__name__))
    if not isinstance(values, np.ndarray):
        values = list(values)

    # GH11232
    # work-around for numpy < 1.8 and comparisions on py3
    # faster for larger cases to use np.in1d
    if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000:
        f = lambda x, y: np.in1d(x, np.asarray(list(y)))
    else:
        f = lambda x, y: lib.ismember_int64(x, set(y))

    # may need i8 conversion for proper membership testing
    if is_datetime64_dtype(comps):
        from pandas.tseries.tools import to_datetime
        values = to_datetime(values)._values.view('i8')
        comps = comps.view('i8')
    elif is_timedelta64_dtype(comps):
        from pandas.tseries.timedeltas import to_timedelta
        values = to_timedelta(values)._values.view('i8')
        comps = comps.view('i8')
    elif is_int64_dtype(comps):
        pass
    else:
        f = lambda x, y: lib.ismember(x, set(values))

    return f(comps, values)
Exemplo n.º 5
0
def _attempt_YYYYMMDD(arg, errors):
    """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
        arg is a passed in as an object dtype, but could really be ints/strings
        with nan-like/or floats (e.g. with nan)

    Parameters
    ----------
    arg : passed value
    errors : 'raise','ignore','coerce'
    """

    def calc(carg):
        # calculate the actual result
        carg = carg.astype(object)
        parsed = lib.try_parse_year_month_day(carg / 10000,
                                              carg / 100 % 100,
                                              carg % 100)
        return tslib.array_to_datetime(parsed, errors=errors)

    def calc_with_mask(carg, mask):
        result = np.empty(carg.shape, dtype='M8[ns]')
        iresult = result.view('i8')
        iresult[~mask] = tslib.iNaT
        result[mask] = calc(carg[mask].astype(np.float64).astype(np.int64)). \
            astype('M8[ns]')
        return result

    # try intlike / strings that are ints
    try:
        return calc(arg.astype(np.int64))
    except:
        pass

    # a float with actual np.nan
    try:
        carg = arg.astype(np.float64)
        return calc_with_mask(carg, notnull(carg))
    except:
        pass

    # string with NaN-like
    try:
        mask = ~lib.ismember(arg, tslib._nat_strings)
        return calc_with_mask(arg, mask)
    except:
        pass

    return None
Exemplo n.º 6
0
def _attempt_YYYYMMDD(arg, errors):
    """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
        arg is a passed in as an object dtype, but could really be ints/strings
        with nan-like/or floats (e.g. with nan)

    Parameters
    ----------
    arg : passed value
    errors : 'raise','ignore','coerce'
    """

    def calc(carg):
        # calculate the actual result
        carg = carg.astype(object)
        parsed = lib.try_parse_year_month_day(carg / 10000,
                                              carg / 100 % 100,
                                              carg % 100)
        return tslib.array_to_datetime(parsed, errors=errors)

    def calc_with_mask(carg, mask):
        result = np.empty(carg.shape, dtype='M8[ns]')
        iresult = result.view('i8')
        iresult[~mask] = tslib.iNaT
        result[mask] = calc(carg[mask].astype(np.float64).astype(np.int64)).\
            astype('M8[ns]')
        return result

    # try intlike / strings that are ints
    try:
        return calc(arg.astype(np.int64))
    except:
        pass

    # a float with actual np.nan
    try:
        carg = arg.astype(np.float64)
        return calc_with_mask(carg, notnull(carg))
    except:
        pass

    # string with NaN-like
    try:
        mask = ~lib.ismember(arg, tslib._nat_strings)
        return calc_with_mask(arg, mask)
    except:
        pass

    return None
Exemplo n.º 7
0
    def drop(self, labels, axis=0, level=None):
        """
        Return new object with labels in requested axis removed

        Parameters
        ----------
        labels : array-like
        axis : int
        level : int or name, default None
            For MultiIndex

        Returns
        -------
        dropped : type of caller
        """
        axis_name = self._get_axis_name(axis)
        axis, axis_ = self._get_axis(axis), axis

        if axis.is_unique:
            if level is not None:
                if not isinstance(axis, MultiIndex):
                    raise AssertionError('axis must be a MultiIndex')
                new_axis = axis.drop(labels, level=level)
            else:
                new_axis = axis.drop(labels)
            dropped = self.reindex(**{axis_name: new_axis})
            try:
                dropped.axes[axis_].names = axis.names
            except AttributeError:
                pass
            return dropped

        else:
            if level is not None:
                if not isinstance(axis, MultiIndex):
                    raise AssertionError('axis must be a MultiIndex')
                indexer = -lib.ismember(axis.get_level_values(level),
                                        set(labels))
            else:
                indexer = -axis.isin(labels)

            slicer = [slice(None)] * self.ndim
            slicer[self._get_axis_number(axis_name)] = indexer

            return self.ix[tuple(slicer)]
Exemplo n.º 8
0
    def drop(self, labels, axis=0, level=None):
        """
        Return new object with labels in requested axis removed

        Parameters
        ----------
        labels : array-like
        axis : int
        level : int or name, default None
            For MultiIndex

        Returns
        -------
        dropped : type of caller
        """
        axis_name = self._get_axis_name(axis)
        axis, axis_ = self._get_axis(axis), axis

        if axis.is_unique:
            if level is not None:
                if not isinstance(axis, MultiIndex):
                    raise AssertionError('axis must be a MultiIndex')
                new_axis = axis.drop(labels, level=level)
            else:
                new_axis = axis.drop(labels)
            dropped = self.reindex(**{axis_name: new_axis})
            try:
                dropped.axes[axis_].names = axis.names
            except AttributeError:
                pass
            return dropped

        else:
            if level is not None:
                if not isinstance(axis, MultiIndex):
                    raise AssertionError('axis must be a MultiIndex')
                indexer = -lib.ismember(axis.get_level_values(level),
                                        set(labels))
            else:
                indexer = -axis.isin(labels)

            slicer = [slice(None)] * self.ndim
            slicer[self._get_axis_number(axis_name)] = indexer

            return self.ix[tuple(slicer)]
Exemplo n.º 9
0
    def isin(self, values):
        """
        Compute boolean array of whether each index value is found in the
        passed set of values

        Parameters
        ----------
        values : set or sequence of values

        Returns
        -------
        is_contained : ndarray (boolean dtype)
        """
        if not isinstance(values, type(self)):
            try:
                values = type(self)(values)
            except ValueError:
                return self.asobject.isin(values)

        value_set = set(values.asi8)
        return lib.ismember(self.asi8, value_set)
Exemplo n.º 10
0
    def isin(self, values):
        """
        Compute boolean array of whether each index value is found in the
        passed set of values

        Parameters
        ----------
        values : set or sequence of values

        Returns
        -------
        is_contained : ndarray (boolean dtype)
        """
        if not isinstance(values, type(self)):
            try:
                values = type(self)(values)
            except ValueError:
                return self.asobject.isin(values)

        value_set = set(values.asi8)
        return lib.ismember(self.asi8, value_set)
Exemplo n.º 11
0
def _convert_types(values, na_values):
    na_count = 0
    if issubclass(values.dtype.type, (np.number, np.bool_)):
        mask = lib.ismember(values, na_values)
        na_count = mask.sum()
        if na_count > 0:
            if com.is_integer_dtype(values):
                values = values.astype(np.float64)
            np.putmask(values, mask, np.nan)
        return values, na_count

    try:
        result = lib.maybe_convert_numeric(values, na_values, False)
    except Exception:
        na_count = lib.sanitize_objects(values, na_values, False)
        result = values

    if result.dtype == np.object_:
        result = lib.maybe_convert_bool(values)

    return result, na_count
Exemplo n.º 12
0
def _convert_types(values, na_values):
    na_count = 0
    if issubclass(values.dtype.type, (np.number, np.bool_)):
        mask = lib.ismember(values, na_values)
        na_count = mask.sum()
        if na_count > 0:
            if com.is_integer_dtype(values):
                values = values.astype(np.float64)
            np.putmask(values, mask, np.nan)
        return values, na_count

    try:
        result = lib.maybe_convert_numeric(values, na_values)
    except Exception:
        na_count = lib.sanitize_objects(values, na_values)
        result = values

    if result.dtype == np.object_:
        result = lib.maybe_convert_bool(values)

    return result, na_count
Exemplo n.º 13
0
def _attempt_YYYYMMDD(arg, coerce):
    """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
        arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) """

    def calc(carg):
        # calculate the actual result
        carg = carg.astype(object)
        return tslib.array_to_datetime(
            lib.try_parse_year_month_day(carg / 10000, carg / 100 % 100, carg % 100), coerce=coerce
        )

    def calc_with_mask(carg, mask):
        result = np.empty(carg.shape, dtype="M8[ns]")
        iresult = result.view("i8")
        iresult[~mask] = tslib.iNaT
        result[mask] = calc(carg[mask].astype(np.float64).astype(np.int64)).astype("M8[ns]")
        return result

    # try intlike / strings that are ints
    try:
        return calc(arg.astype(np.int64))
    except:
        pass

    # a float with actual np.nan
    try:
        carg = arg.astype(np.float64)
        return calc_with_mask(carg, com.notnull(carg))
    except:
        pass

    # string with NaN-like
    try:
        mask = ~lib.ismember(arg, tslib._nat_strings)
        return calc_with_mask(arg, mask)
    except:
        pass

    return None
Exemplo n.º 14
0
    def drop(self, labels, axis=0, level=None):
        """
        Return new object with labels in requested axis removed

        Parameters
        ----------
        labels : array-like
        axis : int
        level : int or name, default None
            For MultiIndex

        Returns
        -------
        dropped : type of caller
        """
        axis_name = self._get_axis_name(axis)
        axis = self._get_axis(axis)

        if axis.is_unique:
            if level is not None:
                assert (isinstance(axis, MultiIndex))
                new_axis = axis.drop(labels, level=level)
            else:
                new_axis = axis.drop(labels)

            return self.reindex(**{axis_name: new_axis})
        else:
            if level is not None:
                assert (isinstance(axis, MultiIndex))
                indexer = -lib.ismember(axis.get_level_values(level),
                                        set(labels))
            else:
                indexer = -axis.isin(labels)

            slicer = [slice(None)] * self.ndim
            slicer[self._get_axis_number(axis_name)] = indexer

            return self.ix[tuple(slicer)]
Exemplo n.º 15
0
def _attempt_YYYYMMDD(arg):
    """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
        arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) """
    def calc(carg):
        # calculate the actual result
        carg = carg.astype(object)
        return lib.try_parse_year_month_day(carg / 10000, carg / 100 % 100,
                                            carg % 100)

    def calc_with_mask(carg, mask):
        result = np.empty(carg.shape, dtype='M8[ns]')
        iresult = result.view('i8')
        iresult[-mask] = tslib.iNaT
        result[mask] = calc(carg[mask].astype(np.float64).astype(
            np.int64)).astype('M8[ns]')
        return result

    # try intlike / strings that are ints
    try:
        return calc(arg.astype(np.int64))
    except:
        pass

    # a float with actual np.nan
    try:
        carg = arg.astype(np.float64)
        return calc_with_mask(carg, com.notnull(carg))
    except:
        pass

    # string with NaN-like
    try:
        mask = ~lib.ismember(arg, tslib._nat_strings)
        return calc_with_mask(arg, mask)
    except:
        pass

    return None
Exemplo n.º 16
0
    def drop(self, labels, axis=0, level=None):
        """
        Return new object with labels in requested axis removed

        Parameters
        ----------
        labels : array-like
        axis : int
        level : int or name, default None
            For MultiIndex

        Returns
        -------
        dropped : type of caller
        """
        axis_name = self._get_axis_name(axis)
        axis = self._get_axis(axis)

        if axis.is_unique:
            if level is not None:
                assert(isinstance(axis, MultiIndex))
                new_axis = axis.drop(labels, level=level)
            else:
                new_axis = axis.drop(labels)

            return self.reindex(**{axis_name: new_axis})
        else:
            if level is not None:
                assert(isinstance(axis, MultiIndex))
                indexer = -lib.ismember(axis.get_level_values(level),
                                        set(labels))
            else:
                indexer = -axis.isin(labels)

            slicer = [slice(None)] * self.ndim
            slicer[self._get_axis_number(axis_name)] = indexer

            return self.ix[tuple(slicer)]