Пример #1
0
    def _align_frame(self, indexer, df):
        is_frame = self.obj.ndim == 2
        is_panel = self.obj.ndim >= 3
        if isinstance(indexer, tuple):
            idx, cols = None, None
            sindexers = []
            for i, ix in enumerate(indexer):
                ax = self.obj.axes[i]
                if com._is_sequence(ix) or isinstance(ix, slice):
                    if idx is None:
                        idx = ax[ix].ravel()
                    elif cols is None:
                        cols = ax[ix].ravel()
                    else:
                        break
                else:
                    sindexers.append(i)

            # panel
            if is_panel:
                if len(sindexers) == 1 and idx is None and cols is None:
                    if sindexers[0] == 0:
                        df = df.T
                    return self.obj.conform(df, axis=sindexers[0])
                df = df.T

            if idx is not None and cols is not None:
                if df.index.equals(idx) and df.columns.equals(cols):
                    val = df.copy().values
                else:
                    val = df.reindex(idx, columns=cols).values
                return val

        elif (isinstance(indexer, slice) or com.is_list_like(indexer)) and is_frame:
            ax = self.obj.index[indexer]
            if df.index.equals(ax):
                val = df.copy().values
            else:
                val = df.reindex(ax).values
            return val

        elif np.isscalar(indexer) and not is_frame:
            idx = self.obj.axes[1]
            cols = self.obj.axes[2]

            # by definition we are indexing on the 0th axis
            if is_panel:
                df = df.T

            if idx.equals(df.index) and cols.equals(df.columns):
                return df.copy().values

            # a passed in dataframe which is actually a transpose
            # of what is needed
            elif idx.equals(df.columns) and cols.equals(df.index):
                return df.T.copy().values

            return df.reindex(idx, columns=cols).values

        raise ValueError("Incompatible indexer with DataFrame")
Пример #2
0
def _convert_to_list_like(list_like):
    if hasattr(list_like, "dtype"):
        return list_like
    if isinstance(list_like, list):
        return list_like
    if (com._is_sequence(list_like) or isinstance(list_like, tuple)
                                    or isinstance(list_like, types.GeneratorType)):
        return list(list_like)
    elif np.isscalar(list_like):
        return [list_like]
    else:
        # is this reached?
        return [list_like]
Пример #3
0
def _convert_to_list_like(list_like):
    if hasattr(list_like, "dtype"):
        return list_like
    if isinstance(list_like, list):
        return list_like
    if (com._is_sequence(list_like) or isinstance(list_like, tuple)
            or isinstance(list_like, types.GeneratorType)):
        return list(list_like)
    elif np.isscalar(list_like):
        return [list_like]
    else:
        # is this reached?
        return [list_like]
Пример #4
0
    def _align_frame(self, indexer, df):
        from pandas import DataFrame
        is_frame = isinstance(self.obj, DataFrame)
        if not is_frame:
            df = df.T
        if isinstance(indexer, tuple):
            idx, cols = None, None
            for i, ix in enumerate(indexer):
                ax = self.obj.axes[i]
                if com._is_sequence(ix) or isinstance(ix, slice):
                    if idx is None:
                        idx = ax[ix].ravel()
                    elif cols is None:
                        cols = ax[ix].ravel()
                    else:
                        break

            if idx is not None and cols is not None:
                if df.index.equals(idx) and df.columns.equals(cols):
                    val = df.copy().values
                else:
                    val = df.reindex(idx, columns=cols).values
                return val

        elif ((isinstance(indexer, slice) or com.is_list_like(indexer))
              and is_frame):
            ax = self.obj.index[indexer]
            if df.index.equals(ax):
                val = df.copy().values
            else:
                val = df.reindex(ax).values
            return val

        elif np.isscalar(indexer) and not is_frame:
            idx = self.obj.axes[1]
            cols = self.obj.axes[2]

            if idx.equals(df.index) and cols.equals(df.columns):
                return df.copy().values
            return df.reindex(idx, columns=cols).values

        raise ValueError('Incompatible indexer with DataFrame')
Пример #5
0
    def _align_frame(self, indexer, df):
        from pandas import DataFrame
        is_frame = isinstance(self.obj, DataFrame)
        if not is_frame:
            df = df.T
        if isinstance(indexer, tuple):
            idx, cols = None, None
            for i, ix in enumerate(indexer):
                ax = self.obj.axes[i]
                if com._is_sequence(ix) or isinstance(ix, slice):
                    if idx is None:
                        idx = ax[ix]
                    elif cols is None:
                        cols = ax[ix]
                    else:
                        break

            if idx is not None and cols is not None:
                if df.index.equals(idx) and df.columns.equals(cols):
                    val = df.copy().values
                else:
                    val = df.reindex(idx, columns=cols).values
                return val

        elif ((isinstance(indexer, slice) or com.is_list_like(indexer))
              and is_frame):
            ax = self.obj.index[indexer]
            if df.index.equals(ax):
                val = df.copy().values
            else:
                val = df.reindex(ax).values
            return val

        elif np.isscalar(indexer) and not is_frame:
            idx = self.obj.axes[1]
            cols = self.obj.axes[2]

            if idx.equals(df.index) and cols.equals(df.columns):
                return df.copy().values
            return df.reindex(idx, columns=cols).values

        raise ValueError('Incompatible indexer with DataFrame')
Пример #6
0
    def _align_series(self, indexer, ser):
        # indexer to assign Series can be tuple or scalar
        if isinstance(indexer, tuple):
            for i, idx in enumerate(indexer):
                ax = self.obj.axes[i]
                if com._is_sequence(idx) or isinstance(idx, slice):
                    new_ix = ax[idx]
                    if ser.index.equals(new_ix):
                        return ser.values.copy()
                    return ser.reindex(new_ix).values

        elif np.isscalar(indexer):
            ax = self.obj._get_axis(1)

            if ser.index.equals(ax):
                return ser.values.copy()

            return ser.reindex(ax).values

        raise ValueError('Incompatible indexer with Series')
Пример #7
0
    def _align_series(self, indexer, ser):
        # indexer to assign Series can be tuple or scalar
        if isinstance(indexer, tuple):
            for i, idx in enumerate(indexer):
                ax = self.obj.axes[i]
                if com._is_sequence(idx) or isinstance(idx, slice):
                    new_ix = ax[idx]
                    if ser.index.equals(new_ix):
                        return ser.values.copy()
                    return ser.reindex(new_ix).values

        elif np.isscalar(indexer):
            ax = self.obj._get_axis(1)

            if ser.index.equals(ax):
                return ser.values.copy()

            return ser.reindex(ax).values

        raise ValueError('Incompatible indexer with Series')
Пример #8
0
def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None,
                    idx_type=None):
    """Create an index/multindex with given dimensions, levels, names, etc'

    nentries - number of entries in index
    nlevels - number of levels (> 1 produces multindex)
    prefix - a string prefix for labels
    names - (Optional), bool or list of strings. if True will use default names,
       if false will use no names, if a list is given,  the name of each level
       in the index will be taken from the list.
    ndupe_l - (Optional), list of ints, the number of rows for which the
       label will repeated at the corresponding level, you can specify just
       the first few, the rest will use the default ndupe_l of 1.
       len(ndupe_l) <= nlevels.
    idx_type - "i"/"f"/"s"/"u"/"dt/"p".
       If idx_type is not None, `idx_nlevels` must be 1.
       "i"/"f" creates an integer/float index,
       "s"/"u" creates a string/unicode index
       "dt" create a datetime index.

        if unspecified, string labels will be generated.
    """

    if ndupe_l is None:
        ndupe_l = [1] * nlevels
    assert (_is_sequence(ndupe_l) and len(ndupe_l) <= nlevels)
    assert (names is None or names is False
            or names is True or len(names) is nlevels)
    assert idx_type is None or \
        (idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and nlevels == 1)

    if names is True:
        # build default names
        names = [prefix + str(i) for i in range(nlevels)]
    if names is False:
        # pass None to index constructor for no name
        names = None

    # make singelton case uniform
    if isinstance(names, compat.string_types) and nlevels == 1:
        names = [names]

    # specific 1D index type requested?
    idx_func = dict(i=makeIntIndex, f=makeFloatIndex, s=makeStringIndex,
                    u=makeUnicodeIndex, dt=makeDateIndex, p=makePeriodIndex).get(idx_type)
    if idx_func:
        idx = idx_func(nentries)
        # but we need to fill in the name
        if names:
            idx.name = names[0]
        return idx
    elif idx_type is not None:
        raise ValueError('"%s" is not a legal value for `idx_type`, use  '
                         '"i"/"f"/"s"/"u"/"dt/"p".' % idx_type)

    if len(ndupe_l) < nlevels:
        ndupe_l.extend([1] * (nlevels - len(ndupe_l)))
    assert len(ndupe_l) == nlevels

    assert all([x > 0 for x in ndupe_l])

    tuples = []
    for i in range(nlevels):
        def keyfunc(x):
            import re
            numeric_tuple = re.sub("[^\d_]_?", "", x).split("_")
            return lmap(int, numeric_tuple)

        # build a list of lists to create the index from
        div_factor = nentries // ndupe_l[i] + 1
        cnt = Counter()
        for j in range(div_factor):
            label = prefix + '_l%d_g' % i + str(j)
            cnt[label] = ndupe_l[i]
        # cute Counter trick
        result = list(sorted(cnt.elements(), key=keyfunc))[:nentries]
        tuples.append(result)

    tuples = lzip(*tuples)

    # convert tuples to index
    if nentries == 1:
        index = Index(tuples[0], name=names[0])
    else:
        index = MultiIndex.from_tuples(tuples, names=names)
    return index
Пример #9
0
def makeCustomIndex(nentries,
                    nlevels,
                    prefix='#',
                    names=False,
                    ndupe_l=None,
                    idx_type=None):
    """Create an index/multindex with given dimensions, levels, names, etc'

    nentries - number of entries in index
    nlevels - number of levels (> 1 produces multindex)
    prefix - a string prefix for labels
    names - (Optional), bool or list of strings. if True will use default names,
       if false will use no names, if a list is given,  the name of each level
       in the index will be taken from the list.
    ndupe_l - (Optional), list of ints, the number of rows for which the
       label will repeated at the corresponding level, you can specify just
       the first few, the rest will use the default ndupe_l of 1.
       len(ndupe_l) <= nlevels.
    idx_type - "i"/"f"/"s"/"u"/"dt/"p".
       If idx_type is not None, `idx_nlevels` must be 1.
       "i"/"f" creates an integer/float index,
       "s"/"u" creates a string/unicode index
       "dt" create a datetime index.

        if unspecified, string labels will be generated.
    """

    if ndupe_l is None:
        ndupe_l = [1] * nlevels
    assert (_is_sequence(ndupe_l) and len(ndupe_l) <= nlevels)
    assert (names is None or names is False or names is True
            or len(names) is nlevels)
    assert idx_type is None or \
        (idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and nlevels == 1)

    if names is True:
        # build default names
        names = [prefix + str(i) for i in range(nlevels)]
    if names is False:
        # pass None to index constructor for no name
        names = None

    # make singelton case uniform
    if isinstance(names, compat.string_types) and nlevels == 1:
        names = [names]

    # specific 1D index type requested?
    idx_func = dict(i=makeIntIndex,
                    f=makeFloatIndex,
                    s=makeStringIndex,
                    u=makeUnicodeIndex,
                    dt=makeDateIndex,
                    p=makePeriodIndex).get(idx_type)
    if idx_func:
        idx = idx_func(nentries)
        # but we need to fill in the name
        if names:
            idx.name = names[0]
        return idx
    elif idx_type is not None:
        raise ValueError('"%s" is not a legal value for `idx_type`, use  '
                         '"i"/"f"/"s"/"u"/"dt/"p".' % idx_type)

    if len(ndupe_l) < nlevels:
        ndupe_l.extend([1] * (nlevels - len(ndupe_l)))
    assert len(ndupe_l) == nlevels

    assert all([x > 0 for x in ndupe_l])

    tuples = []
    for i in range(nlevels):

        def keyfunc(x):
            import re
            numeric_tuple = re.sub("[^\d_]_?", "", x).split("_")
            return lmap(int, numeric_tuple)

        # build a list of lists to create the index from
        div_factor = nentries // ndupe_l[i] + 1
        cnt = Counter()
        for j in range(div_factor):
            label = prefix + '_l%d_g' % i + str(j)
            cnt[label] = ndupe_l[i]
        # cute Counter trick
        result = list(sorted(cnt.elements(), key=keyfunc))[:nentries]
        tuples.append(result)

    tuples = lzip(*tuples)

    # convert tuples to index
    if nentries == 1:
        index = Index(tuples[0], name=names[0])
    else:
        index = MultiIndex.from_tuples(tuples, names=names)
    return index
Пример #10
0
    def _align_series(self, indexer, ser):
        # indexer to assign Series can be tuple or scalar
        if isinstance(indexer, tuple):

            aligners = [ not _is_null_slice(idx) for idx in indexer ]
            sum_aligners = sum(aligners)
            single_aligner = sum_aligners == 1
            is_frame = self.obj.ndim == 2
            is_panel = self.obj.ndim >= 3
            obj = self.obj

            # are we a single alignable value on a non-primary
            # dim (e.g. panel: 1,2, or frame: 0) ?
            # hence need to align to a single axis dimension
            # rather that find all valid dims

            # frame
            if is_frame:
                single_aligner = single_aligner and aligners[0]

            # panel
            elif is_panel:
                single_aligner = single_aligner and (aligners[1] or aligners[2])

            # we have a frame, with multiple indexers on both axes; and a series,
            # so need to broadcast (see GH5206)
            if sum_aligners == self.ndim and all([ com._is_sequence(_) for _ in indexer ]):

                ser = ser.reindex(obj.axes[0][indexer[0].ravel()],copy=True).values
                l = len(indexer[1].ravel())
                ser = np.tile(ser,l).reshape(l,-1).T
                return ser

            for i, idx in enumerate(indexer):
                ax = obj.axes[i]

                # multiple aligners (or null slices)
                if com._is_sequence(idx) or isinstance(idx, slice):
                    if single_aligner and _is_null_slice(idx):
                        continue
                    new_ix = ax[idx]
                    if not is_list_like(new_ix):
                        new_ix = Index([new_ix])
                    else:
                        new_ix = Index(new_ix.ravel())
                    if ser.index.equals(new_ix):
                        return ser.values.copy()
                    return ser.reindex(new_ix).values

                # 2 dims
                elif single_aligner and is_frame:

                    # reindex along index
                    ax = self.obj.axes[1]
                    if ser.index.equals(ax):
                        return ser.values.copy()
                    return ser.reindex(ax).values

                # >2 dims
                elif single_aligner:

                    broadcast = []
                    for n, labels in enumerate(self.obj._get_plane_axes(i)):

                        # reindex along the matching dimensions
                        if len(labels & ser.index):
                            ser = ser.reindex(labels)
                        else:
                            broadcast.append((n,len(labels)))

                    # broadcast along other dims
                    ser = ser.values.copy()
                    for (axis,l) in broadcast:
                        shape = [ -1 ] * (len(broadcast)+1)
                        shape[axis] = l
                        ser = np.tile(ser,l).reshape(shape)

                    if self.obj.ndim == 3:
                        ser = ser.T

                    return ser

        elif np.isscalar(indexer):
            ax = self.obj._get_axis(1)

            if ser.index.equals(ax):
                return ser.values.copy()

            return ser.reindex(ax).values

        raise ValueError('Incompatible indexer with Series')
Пример #11
0
    def _align_frame(self, indexer, df):
        is_frame = self.obj.ndim == 2
        is_panel = self.obj.ndim >= 3
        if isinstance(indexer, tuple):
            idx, cols = None, None
            sindexers = []
            for i, ix in enumerate(indexer):
                ax = self.obj.axes[i]
                if com._is_sequence(ix) or isinstance(ix, slice):
                    if idx is None:
                        idx = ax[ix].ravel()
                    elif cols is None:
                        cols = ax[ix].ravel()
                    else:
                        break
                else:
                    sindexers.append(i)

            # panel
            if is_panel:
                if len(sindexers) == 1 and idx is None and cols is None:
                    if sindexers[0] == 0:
                        df = df.T
                    return self.obj.conform(df, axis=sindexers[0])
                df = df.T

            if idx is not None and cols is not None:
                if df.index.equals(idx) and df.columns.equals(cols):
                    val = df.copy().values
                else:
                    val = df.reindex(idx, columns=cols).values
                return val

        elif ((isinstance(indexer, slice) or com.is_list_like(indexer))
              and is_frame):
            ax = self.obj.index[indexer]
            if df.index.equals(ax):
                val = df.copy().values
            else:
                val = df.reindex(ax).values
            return val

        elif np.isscalar(indexer) and not is_frame:
            idx = self.obj.axes[1]
            cols = self.obj.axes[2]

            # by definition we are indexing on the 0th axis
            if is_panel:
                df = df.T

            if idx.equals(df.index) and cols.equals(df.columns):
                return df.copy().values

            # a passed in dataframe which is actually a transpose
            # of what is needed
            elif idx.equals(df.columns) and cols.equals(df.index):
                return df.T.copy().values

            return df.reindex(idx, columns=cols).values

        raise ValueError('Incompatible indexer with DataFrame')
Пример #12
0
    def _align_series(self, indexer, ser):
        # indexer to assign Series can be tuple or scalar
        if isinstance(indexer, tuple):

            aligners = [not _is_null_slice(idx) for idx in indexer]
            single_aligner = sum(aligners) == 1
            is_frame = self.obj.ndim == 2
            is_panel = self.obj.ndim >= 3

            # are we a single alignable value on a non-primary
            # dim (e.g. panel: 1,2, or frame: 0) ?
            # hence need to align to a single axis dimension
            # rather that find all valid dims

            # frame
            if is_frame:
                single_aligner = single_aligner and aligners[0]

            # panel
            elif is_panel:
                single_aligner = single_aligner and (aligners[1]
                                                     or aligners[2])

            obj = self.obj
            for i, idx in enumerate(indexer):
                ax = obj.axes[i]

                # multiple aligners (or null slices)
                if com._is_sequence(idx) or isinstance(idx, slice):
                    if single_aligner and _is_null_slice(idx):
                        continue
                    new_ix = ax[idx]
                    if not is_list_like(new_ix):
                        new_ix = Index([new_ix])
                    if ser.index.equals(new_ix):
                        return ser.values.copy()
                    return ser.reindex(new_ix).values

                # 2 dims
                elif single_aligner and is_frame:

                    # reindex along index
                    ax = self.obj.axes[1]
                    if ser.index.equals(ax):
                        return ser.values.copy()
                    return ser.reindex(ax).values

                # >2 dims
                elif single_aligner:

                    broadcast = []
                    for n, labels in enumerate(self.obj._get_plane_axes(i)):

                        # reindex along the matching dimensions
                        if len(labels & ser.index):
                            ser = ser.reindex(labels)
                        else:
                            broadcast.append((n, len(labels)))

                    # broadcast along other dims
                    ser = ser.values.copy()
                    for (axis, l) in broadcast:
                        shape = [-1] * (len(broadcast) + 1)
                        shape[axis] = l
                        ser = np.tile(ser, l).reshape(shape)

                    if self.obj.ndim == 3:
                        ser = ser.T

                    return ser

        elif np.isscalar(indexer):
            ax = self.obj._get_axis(1)

            if ser.index.equals(ax):
                return ser.values.copy()

            return ser.reindex(ax).values

        raise ValueError('Incompatible indexer with Series')