Exemplo n.º 1
0
def dict_to_manager(sdict, columns, index):
    """ create and return the block manager from a dict of series, columns, index """

    # from BlockManager perspective
    axes = [_ensure_index(columns), _ensure_index(index)]

    return create_block_manager_from_arrays([sdict[c] for c in columns], columns, axes)
Exemplo n.º 2
0
def _convert_frames(frames, index, columns, fill_value=np.nan, kind='block'):
    from pandas.core.panel import _get_combined_index
    output = {}
    for item, df in compat.iteritems(frames):
        if not isinstance(df, SparseDataFrame):
            df = SparseDataFrame(df, default_kind=kind,
                                 default_fill_value=fill_value)

        output[item] = df

    if index is None:
        all_indexes = [df.index for df in output.values()]
        index = _get_combined_index(all_indexes)
    if columns is None:
        all_columns = [df.columns for df in output.values()]
        columns = _get_combined_index(all_columns)

    index = _ensure_index(index)
    columns = _ensure_index(columns)

    for item, df in compat.iteritems(output):
        if not (df.index.equals(index) and df.columns.equals(columns)):
            output[item] = df.reindex(index=index, columns=columns)

    return output, index, columns
Exemplo n.º 3
0
    def __setstate__(self, state):
        frames, items, major, minor, fv, kind = state

        self.default_fill_value = fv
        self.default_kind = kind
        self._items = _ensure_index(com._unpickle_array(items))
        self._major_axis = _ensure_index(com._unpickle_array(major))
        self._minor_axis = _ensure_index(com._unpickle_array(minor))
        self._frames = frames
Exemplo n.º 4
0
    def __setstate__(self, state):
        frames, items, major, minor, fv, kind = state

        from pandas.io.pickle import _unpickle_array
        self.default_fill_value = fv
        self.default_kind = kind
        self._items = _ensure_index(_unpickle_array(items))
        self._major_axis = _ensure_index(_unpickle_array(major))
        self._minor_axis = _ensure_index(_unpickle_array(minor))
        self._frames = frames
Exemplo n.º 5
0
    def __init__(self, values, items, ref_items, ndim=2):
        if issubclass(values.dtype.type, basestring):
            values = np.array(values, dtype=object)

        assert(values.ndim == ndim)
        assert(len(items) == len(values))

        self._ref_locs = None
        self.values = values
        self.ndim = ndim
        self.items = _ensure_index(items)
        self.ref_items = _ensure_index(ref_items)
Exemplo n.º 6
0
    def __init__(self, values, items, ref_items, ndim=2, do_integrity_check=False):
        if issubclass(values.dtype.type, basestring):
            values = np.array(values, dtype=object)

        assert values.ndim == ndim
        assert len(items) == len(values)

        self.values = values
        self.ndim = ndim
        self.items = _ensure_index(items)
        self.ref_items = _ensure_index(ref_items)

        if do_integrity_check:
            self._check_integrity()
Exemplo n.º 7
0
    def __init__(self, values, items, ref_items, ndim=2):
        if issubclass(values.dtype.type, basestring):
            values = np.array(values, dtype=object)

        if values.ndim != ndim:
            raise AssertionError('Wrong number of dimensions')

        if len(items) != len(values):
            raise AssertionError('Wrong number of items passed')

        self._ref_locs = None
        self.values = values
        self.ndim = ndim
        self.items = _ensure_index(items)
        self.ref_items = _ensure_index(ref_items)
Exemplo n.º 8
0
 def __init__(self, bins, binlabels, filter_empty=False, mutated=False,
              indexer=None):
     self.bins = _ensure_int64(bins)
     self.binlabels = _ensure_index(binlabels)
     self._filter_empty_groups = filter_empty
     self.mutated = mutated
     self.indexer = indexer
Exemplo n.º 9
0
    def _interleave(self, items):
        """
        Return ndarray from blocks with specified item order
        Items must be contained in the blocks
        """
        dtype = _interleaved_dtype(self.blocks)
        items = _ensure_index(items)

        result = np.empty(self.shape, dtype=dtype)
        itemmask = np.zeros(len(items), dtype=bool)

        # By construction, all of the item should be covered by one of the
        # blocks
        if items.is_unique:
            for block in self.blocks:
                indexer = items.get_indexer(block.items)
                if (indexer == -1).any():
                    raise AssertionError('Items must contain all block items')
                result[indexer] = block.get_values(dtype)
                itemmask[indexer] = 1
        else:
            for block in self.blocks:
                mask = items.isin(block.items)
                indexer = mask.nonzero()[0]
                if (len(indexer) != len(block.items)):
                    raise AssertionError('All items must be in block items')
                result[indexer] = block.get_values(dtype)
                itemmask[indexer] = 1

        if not itemmask.all():
            raise AssertionError('Some items were not contained in blocks')

        return result
Exemplo n.º 10
0
    def _set_levels(self, levels):
        from pandas.core.index import _ensure_index

        levels = _ensure_index(levels)
        if not levels.is_unique:
            raise ValueError("Categorical levels must be unique")
        self._levels = levels
Exemplo n.º 11
0
    def __init__(self, frames, items=None, major_axis=None, minor_axis=None,
                 default_fill_value=np.nan, default_kind='block'):
        assert(isinstance(frames, dict))

        self.default_fill_value = fill_value = default_fill_value
        self.default_kind = kind = default_kind

        # pre-filter, if necessary
        if items is None:
            items = Index(sorted(frames.keys()))
        items = _ensure_index(items)

        (clean_frames,
         major_axis,
         minor_axis) = _convert_frames(frames, major_axis,
                                       minor_axis, kind=kind,
                                       fill_value=fill_value)

        self._frames = clean_frames

        # do we want to fill missing ones?
        for item in items:
            if item not in clean_frames:
                raise Exception('column %s not found in data' % item)

        self._items = items
        self.major_axis = major_axis
        self.minor_axis = minor_axis
Exemplo n.º 12
0
    def reindex(self, index=None, method=None, copy=True, limit=None):
        """
        Conform SparseSeries to new Index

        See Series.reindex docstring for general behavior

        Returns
        -------
        reindexed : SparseSeries
        """
        new_index = _ensure_index(index)

        if self.index.equals(new_index):
            if copy:
                return self.copy()
            else:
                return self

        if len(self.index) == 0:
            # FIXME: inelegant / slow
            values = np.empty(len(new_index), dtype=np.float64)
            values.fill(nan)
            return SparseSeries(values, index=new_index, fill_value=self.fill_value)

        new_index, fill_vec = self.index.reindex(index, method=method, limit=limit)
        new_values = common.take_1d(self.values, fill_vec)
        return SparseSeries(new_values, index=new_index, fill_value=self.fill_value, name=self.name)
Exemplo n.º 13
0
    def __init__(self, frame, buf=None, columns=None, col_space=None,
                 header=True, index=True, na_rep='NaN', formatters=None,
                 justify=None, float_format=None, sparsify=None,
                 index_names=True, line_width=None, **kwds):
        self.frame = frame
        self.buf = buf if buf is not None else StringIO()
        self.show_index_names = index_names

        if sparsify is None:
            sparsify = get_option("print.multi_sparse")

        self.sparsify = sparsify

        self.float_format = float_format
        self.formatters = formatters if formatters is not None else {}
        self.na_rep = na_rep
        self.col_space = col_space
        self.header = header
        self.index = index
        self.line_width = line_width

        if justify is None:
            self.justify = get_option("print.colheader_justify")
        else:
            self.justify = justify

        self.kwds = kwds

        if columns is not None:
            self.columns = _ensure_index(columns)
            self.frame = self.frame[self.columns]
        else:
            self.columns = frame.columns
Exemplo n.º 14
0
    def __init__(self, data=None, index=None, columns=None,
                 default_kind='block', default_fill_value=None):
        if default_fill_value is None:
            default_fill_value = np.nan

        self.default_kind = default_kind
        self.default_fill_value = default_fill_value

        if isinstance(data, dict):
            sdict, columns, index = self._init_dict(data, index, columns)
        elif isinstance(data, (np.ndarray, list)):
            sdict, columns, index = self._init_matrix(data, index, columns)
        elif isinstance(data, DataFrame):
            sdict, columns, index = self._init_dict(data, data.index,
                                                    data.columns)
        elif data is None:
            sdict = {}

            if index is None:
                index = Index([])
            else:
                index = _ensure_index(index)

            if columns is None:
                columns = Index([])
            else:
                for c in columns:
                    sdict[c] = SparseSeries(np.nan, index=index,
                                            kind=self.default_kind,
                                            fill_value=self.default_fill_value)

        self._series = sdict
        self.columns = columns
        self.index = index
Exemplo n.º 15
0
    def __init__(
        self,
        frame,
        buf=None,
        columns=None,
        col_space=None,
        na_rep="NaN",
        formatters=None,
        float_format=None,
        sparsify=True,
        index_names=True,
    ):

        self.frame = frame
        self.buf = buf if buf is not None else StringIO()
        self.show_index_names = index_names
        self.sparsify = sparsify
        self.float_format = float_format
        self.formatters = formatters
        self.na_rep = na_rep
        self.col_space = col_space

        if columns is not None:
            self.columns = _ensure_index(columns)
        else:
            self.columns = frame.columns
Exemplo n.º 16
0
    def __init__(self, frame, buf=None, columns=None, col_space=None,
                 header=True, index=True, na_rep='NaN', formatters=None,
                 justify=None, float_format=None, sparsify=True,
                 index_names=True, **kwds):
        self.frame = frame
        self.buf = buf if buf is not None else StringIO()
        self.show_index_names = index_names
        self.sparsify = sparsify
        self.float_format = float_format
        self.formatters = formatters
        self.na_rep = na_rep
        self.col_space = col_space
        self.header = header
        self.index = index

        if justify is None:
            self.justify = com.print_config.colheader_justify
        else:
            self.justify = justify

        self.kwds = kwds

        if columns is not None:
            self.columns = _ensure_index(columns)
        else:
            self.columns = frame.columns
Exemplo n.º 17
0
    def reindex_items(self, new_items, copy=True):
        """

        """
        new_items = _ensure_index(new_items)
        data = self
        if not data.is_consolidated():
            data = data.consolidate()
            return data.reindex_items(new_items)

        # TODO: this part could be faster (!)
        new_items, indexer = self.items.reindex(new_items)

        # could have some pathological (MultiIndex) issues here
        new_blocks = []
        if indexer is None:
            for blk in self.blocks:
                if copy:
                    new_blocks.append(blk.reindex_items_from(new_items))
                else:
                    new_blocks.append(blk)
        else:
            for block in self.blocks:
                newb = block.reindex_items_from(new_items, copy=copy)
                if len(newb.items) > 0:
                    new_blocks.append(newb)

            mask = indexer == -1
            if mask.any():
                extra_items = new_items[mask]
                na_block = self._make_na_block(extra_items, new_items)
                new_blocks.append(na_block)
                new_blocks = _consolidate(new_blocks, new_items)

        return BlockManager(new_blocks, [new_items] + self.axes[1:])
Exemplo n.º 18
0
    def _has_valid_type(self, key, axis):
        ax = self.obj._get_axis(axis)

        # valid for a label where all labels are in the index
        # slice of lables (where start-end in labels)
        # slice of integers (only if in the lables)
        # boolean

        if isinstance(key, slice):

            if ax.is_floating():

                # allowing keys to be slicers with no fallback
                pass

            else:
                if key.start is not None:
                    if key.start not in ax:
                        raise KeyError("start bound [%s] is not the [%s]" % (key.start,self.obj._get_axis_name(axis)))
                if key.stop is not None:
                    if key.stop not in ax:
                        raise KeyError("stop bound [%s] is not in the [%s]" % (key.stop,self.obj._get_axis_name(axis)))

        elif com._is_bool_indexer(key):
                return True

        elif _is_list_like(key):

            # mi is just a passthru
            if isinstance(key, tuple) and isinstance(ax, MultiIndex):
                return True

            # require all elements in the index
            idx = _ensure_index(key)
            if not idx.isin(ax).all():
                raise KeyError("[%s] are not in ALL in the [%s]" % (key,self.obj._get_axis_name(axis)))

            return True

        else:

            def error():
                if isnull(key):
                    raise ValueError("cannot use label indexing with a null key")
                raise KeyError("the label [%s] is not in the [%s]" % (key,self.obj._get_axis_name(axis)))

            try:
                key = self._convert_scalar_indexer(key, axis)
                if not key in ax:
                    error()
            except (TypeError) as e:

                # python 3 type errors should be raised
                if 'unorderable' in str(e):  # pragma: no cover
                    error()
                raise
            except:
                error()

        return True
Exemplo n.º 19
0
    def _get_concat_axis(self):
        if self._is_series:
            if self.axis == 0:
                indexes = [x.index for x in self.objs]
            elif self.keys is None:
                names = []
                for x in self.objs:
                    if not isinstance(x, Series):
                        raise TypeError(
                            "Cannot concatenate type 'Series' " "with object of type " "%r" % type(x).__name__
                        )
                    if x.name is not None:
                        names.append(x.name)
                    else:
                        return Index(np.arange(len(self.objs)))
                return Index(names)
            else:
                return _ensure_index(self.keys)
        else:
            indexes = [x._data.axes[self.axis] for x in self.objs]

        if self.keys is None:
            concat_axis = _concat_indexes(indexes)
        else:
            concat_axis = _make_concat_multiindex(indexes, self.keys, self.levels, self.names)

        self._maybe_check_integrity(concat_axis)

        return concat_axis
Exemplo n.º 20
0
    def remove_unused_categories(self, inplace=False):
        """ Removes categories which are not used.

        Parameters
        ----------
        inplace : boolean (default: False)
           Whether or not to drop unused categories inplace or return a copy of this categorical
           with unused categories dropped.

        Returns
        -------
        cat : Categorical with unused categories dropped or None if inplace.

        See also
        --------
        rename_categories
        reorder_categories
        add_categories
        remove_categories
        set_categories
        """
        cat = self if inplace else self.copy()
        _used = sorted(np.unique(cat._codes))
        new_categories = cat.categories.take(com._ensure_platform_int(_used))
        new_categories = _ensure_index(new_categories)
        cat._codes = _get_codes_for_values(cat.__array__(), new_categories)
        cat._categories = new_categories
        if not inplace:
            return cat
Exemplo n.º 21
0
def _extract_axis(data, axis=0, intersect=False):
    if len(data) == 0:
        index = Index([])
    elif len(data) > 0:
        raw_lengths = []
        indexes = []

        have_raw_arrays = False
        have_frames = False

        for v in data.values():
            if isinstance(v, DataFrame):
                have_frames = True
                indexes.append(v._get_axis(axis))
            else:
                have_raw_arrays = True
                raw_lengths.append(v.shape[axis])

        if have_frames:
            index = _get_combined_index(indexes, intersect=intersect)

        if have_raw_arrays:
            lengths = list(set(raw_lengths))
            if len(lengths) > 1:
                raise ValueError('ndarrays must match shape on axis %d' % axis)

            if have_frames:
                assert(lengths[0] == len(index))
            else:
                index = Index(np.arange(lengths[0]))

    return _ensure_index(index)
Exemplo n.º 22
0
    def _get_concat_axis(self):
        if self._is_series:
            if self.axis == 0:
                indexes = [x.index for x in self.objs]
            elif self.keys is None:
                names = []
                for x in self.objs:
                    if x.name is not None:
                        names.append(x.name)
                    else:
                        return Index(np.arange(len(self.objs)))
                return Index(names)
            else:
                return _ensure_index(self.keys)
        else:
            indexes = [x._data.axes[self.axis] for x in self.objs]

        if self.keys is None:
            concat_axis = _concat_indexes(indexes)
        else:
            concat_axis = _make_concat_multiindex(indexes, self.keys,
                                                  self.levels, self.names)

        self._maybe_check_integrity(concat_axis)

        return concat_axis
Exemplo n.º 23
0
    def reindex_axis(self, new_axis, method=None, axis=0, copy=True):
        new_axis = _ensure_index(new_axis)
        cur_axis = self.axes[axis]

        if new_axis.equals(cur_axis):
            if copy:
                result = self.copy(deep=True)
                result.axes[axis] = new_axis

                if axis == 0:
                    # patch ref_items, #1823
                    for blk in result.blocks:
                        blk.ref_items = new_axis

                return result
            else:
                return self

        if axis == 0:
            if method is not None:
                raise AssertionError('method argument not supported for '
                                     'axis == 0')
            return self.reindex_items(new_axis)

        new_axis, indexer = cur_axis.reindex(new_axis, method)
        return self.reindex_indexer(new_axis, indexer, axis=axis)
Exemplo n.º 24
0
    def _interleave(self, items):
        """
        Return ndarray from blocks with specified item order
        Items must be contained in the blocks
        """
        dtype = _interleaved_dtype(self.blocks)
        items = _ensure_index(items)

        result = np.empty(self.shape, dtype=dtype)
        itemmask = np.zeros(len(items), dtype=bool)

        # By construction, all of the item should be covered by one of the
        # blocks
        if items.is_unique:
            for block in self.blocks:
                indexer = items.get_indexer(block.items)
                assert((indexer != -1).all())
                result[indexer] = block.get_values(dtype)
                itemmask[indexer] = 1
        else:
            for block in self.blocks:
                mask = items.isin(block.items)
                indexer = mask.nonzero()[0]
                assert(len(indexer) == len(block.items))
                result[indexer] = block.get_values(dtype)
                itemmask[indexer] = 1

        assert(itemmask.all())

        return result
Exemplo n.º 25
0
    def set_axis(self, axis, value):
        cur_axis = self.axes[axis]
        if len(value) != len(cur_axis):
            raise Exception("Length mismatch (%d vs %d)" % (len(value), len(cur_axis)))
        self.axes[axis] = _ensure_index(value)

        if axis == 0:
            for block in self.blocks:
                block.set_ref_items(self.items, maybe_rename=True)
Exemplo n.º 26
0
    def remove_unused_levels(self):
        """ Removes levels which are not used.

        The level removal is done inplace.
        """
        _used = sorted(np.unique(self._codes))
        new_levels = self.levels.take(_used)
        new_levels = _ensure_index(new_levels)
        self._codes = _get_codes_for_values(self.__array__(), new_levels)
        self._levels = new_levels
Exemplo n.º 27
0
    def __init__(self, labels, levels, name=None):
        from pandas.core.index import _ensure_index

        levels = _ensure_index(levels)
        if not levels.is_unique:
            raise ValueError('Factor levels must be unique')

        self.labels = labels
        self.levels = levels
        self.name = name
Exemplo n.º 28
0
    def __set__(self, obj, value):
        value = _ensure_index(value)

        if isinstance(value, MultiIndex):
            raise NotImplementedError

        for v in compat.itervalues(obj._frames):
            setattr(v, self.frame_attr, value)

        setattr(obj, self.cache_field, value)
Exemplo n.º 29
0
    def __init__(self, blocks, axes, do_integrity_check=True):
        self.axes = [_ensure_index(ax) for ax in axes]
        self.blocks = blocks

        ndim = len(axes)
        for block in blocks:
            assert(ndim == block.values.ndim)

        if do_integrity_check:
            self._verify_integrity()
Exemplo n.º 30
0
    def __setstate__(self, state):
        # discard anything after 3rd, support beta pickling format for a little
        # while longer
        ax_arrays, bvalues, bitems = state[:3]

        self.axes = [_ensure_index(ax) for ax in ax_arrays]
        blocks = []
        for values, items in zip(bvalues, bitems):
            blk = make_block(values, items, self.axes[0], do_integrity_check=True)
            blocks.append(blk)
        self.blocks = blocks
Exemplo n.º 31
0
    def __new__(cls,
                data,
                index=None,
                sparse_index=None,
                kind='block',
                fill_value=None,
                name=None,
                copy=False):

        is_sparse_array = isinstance(data, SparseArray)
        if fill_value is None:
            if is_sparse_array:
                fill_value = data.fill_value
            else:
                fill_value = nan

        if is_sparse_array:
            if isinstance(data, SparseSeries) and index is None:
                index = data.index
            elif index is not None:
                assert (len(index) == len(data))

            sparse_index = data.sp_index
            values = np.asarray(data)
        elif isinstance(data, (Series, dict)):
            if index is None:
                index = data.index

            data = Series(data)
            values, sparse_index = make_sparse(data,
                                               kind=kind,
                                               fill_value=fill_value)
        elif np.isscalar(data):  # pragma: no cover
            if index is None:
                raise Exception('must pass index!')

            values = np.empty(len(index))
            values.fill(data)

            # TODO: more efficient

            values, sparse_index = make_sparse(values,
                                               kind=kind,
                                               fill_value=fill_value)

        else:
            # array-like
            if sparse_index is None:
                values, sparse_index = make_sparse(data,
                                                   kind=kind,
                                                   fill_value=fill_value)
            else:
                values = data
                assert (len(values) == sparse_index.npoints)

        if index is None:
            index = Index(np.arange(sparse_index.length))
        index = _ensure_index(index)

        # Create array, do *not* copy data by default
        if copy:
            subarr = np.array(values, dtype=np.float64, copy=True)
        else:
            subarr = np.asarray(values, dtype=np.float64)

        if index.is_all_dates:
            cls = SparseTimeSeries

        # Change the class of the array to be the subclass type.
        output = subarr.view(cls)
        output.sp_index = sparse_index
        output.fill_value = np.float64(fill_value)
        output.index = index
        output.name = name
        return output
Exemplo n.º 32
0
def _make_concat_multiindex(indexes, keys, levels=None, names=None):
    if ((levels is None and isinstance(keys[0], tuple))
            or (levels is not None and len(levels) > 1)):
        zipped = zip(*keys)
        if names is None:
            names = [None] * len(zipped)

        if levels is None:
            levels = [Factor(zp).levels for zp in zipped]
        else:
            levels = [_ensure_index(x) for x in levels]
    else:
        zipped = [keys]
        if names is None:
            names = [None]

        if levels is None:
            levels = [_ensure_index(keys)]
        else:
            levels = [_ensure_index(x) for x in levels]

    if not _all_indexes_same(indexes):
        label_list = []

        # things are potentially different sizes, so compute the exact labels
        # for each level and pass those to MultiIndex.from_arrays

        for hlevel, level in zip(zipped, levels):
            to_concat = []
            for key, index in zip(hlevel, indexes):
                i = level.get_loc(key)
                to_concat.append(np.repeat(i, len(index)))
            label_list.append(np.concatenate(to_concat))

        concat_index = _concat_indexes(indexes)

        # these go at the end
        if isinstance(concat_index, MultiIndex):
            levels.extend(concat_index.levels)
            label_list.extend(concat_index.labels)
        else:
            factor = Factor(concat_index)
            levels.append(factor.levels)
            label_list.append(factor.labels)

        # also copies
        names = names + _get_consensus_names(indexes)

        return MultiIndex(levels=levels, labels=label_list, names=names)

    new_index = indexes[0]
    n = len(new_index)
    kpieces = len(indexes)

    # also copies
    new_names = list(names)
    new_levels = list(levels)

    # construct labels
    new_labels = []

    # do something a bit more speedy

    for hlevel, level in zip(zipped, levels):
        mapped = level.get_indexer(hlevel)
        new_labels.append(np.repeat(mapped, n))

    if isinstance(new_index, MultiIndex):
        new_levels.extend(new_index.levels)
        new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels])
        new_names.extend(new_index.names)
    else:
        new_levels.append(new_index)
        new_names.append(new_index.name)
        new_labels.append(np.tile(np.arange(n), kpieces))

    return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)
Exemplo n.º 33
0
 def __setstate__(self, state):
     items, ref_items, values = state
     self.items = _ensure_index(items)
     self.ref_items = _ensure_index(ref_items)
     self.values = values
     self.ndim = values.ndim
Exemplo n.º 34
0
    def set_items_norename(self, value):
        value = _ensure_index(value)
        self.axes[0] = value

        for block in self.blocks:
            block.set_ref_items(value, maybe_rename=False)
Exemplo n.º 35
0
    def __init__(self,
                 data=None,
                 index=None,
                 sparse_index=None,
                 kind='block',
                 fill_value=None,
                 name=None,
                 dtype=None,
                 copy=False,
                 fastpath=False):

        # we are called internally, so short-circuit
        if fastpath:

            # data is an ndarray, index is defined

            if not isinstance(data, SingleBlockManager):
                data = SingleBlockManager(data, index, fastpath=True)
            if copy:
                data = data.copy()

        else:

            if data is None:
                data = []

            if isinstance(data, Series) and name is None:
                name = data.name

            if isinstance(data, SparseArray):
                if index is not None:
                    assert (len(index) == len(data))
                sparse_index = data.sp_index
                if fill_value is None:
                    fill_value = data.fill_value

                data = np.asarray(data)

            elif isinstance(data, SparseSeries):
                if index is None:
                    index = data.index.view()
                if fill_value is None:
                    fill_value = data.fill_value
                # extract the SingleBlockManager
                data = data._data

            elif isinstance(data, (Series, dict)):
                data = Series(data, index=index)
                index = data.index.view()

                res = make_sparse(data, kind=kind, fill_value=fill_value)
                data, sparse_index, fill_value = res

            elif isinstance(data, (tuple, list, np.ndarray)):
                # array-like
                if sparse_index is None:
                    res = make_sparse(data, kind=kind, fill_value=fill_value)
                    data, sparse_index, fill_value = res
                else:
                    assert (len(data) == sparse_index.npoints)

            elif isinstance(data, SingleBlockManager):
                if dtype is not None:
                    data = data.astype(dtype)
                if index is None:
                    index = data.index.view()
                else:

                    data = data.reindex(index, copy=False)

            else:
                length = len(index)

                if data == fill_value or (isna(data) and isna(fill_value)):
                    if kind == 'block':
                        sparse_index = BlockIndex(length, [], [])
                    else:
                        sparse_index = IntIndex(length, [])
                    data = np.array([])

                else:
                    if kind == 'block':
                        locs, lens = ([0], [length]) if length else ([], [])
                        sparse_index = BlockIndex(length, locs, lens)
                    else:
                        sparse_index = IntIndex(length, index)
                    v = data
                    data = np.empty(length)
                    data.fill(v)

            if index is None:
                index = com._default_index(sparse_index.length)
            index = _ensure_index(index)

            # create/copy the manager
            if isinstance(data, SingleBlockManager):

                if copy:
                    data = data.copy()
            else:

                # create a sparse array
                if not isinstance(data, SparseArray):
                    data = SparseArray(data,
                                       sparse_index=sparse_index,
                                       fill_value=fill_value,
                                       dtype=dtype,
                                       copy=copy)

                data = SingleBlockManager(data, index)

        generic.NDFrame.__init__(self, data)

        self.index = index
        self.name = name
Exemplo n.º 36
0
def _make_concat_multiindex(indexes, keys, levels=None, names=None):
    if ((levels is None and isinstance(keys[0], tuple))
            or (levels is not None and len(levels) > 1)):
        zipped = lzip(*keys)
        if names is None:
            names = [None] * len(zipped)

        if levels is None:
            levels = [Categorical.from_array(zp).levels for zp in zipped]
        else:
            levels = [_ensure_index(x) for x in levels]
    else:
        zipped = [keys]
        if names is None:
            names = [None]

        if levels is None:
            levels = [_ensure_index(keys)]
        else:
            levels = [_ensure_index(x) for x in levels]

    if not _all_indexes_same(indexes):
        label_list = []

        # things are potentially different sizes, so compute the exact labels
        # for each level and pass those to MultiIndex.from_arrays

        for hlevel, level in zip(zipped, levels):
            to_concat = []
            for key, index in zip(hlevel, indexes):
                try:
                    i = level.get_loc(key)
                except KeyError:
                    raise ValueError('Key %s not in level %s' %
                                     (str(key), str(level)))

                to_concat.append(np.repeat(i, len(index)))
            label_list.append(np.concatenate(to_concat))

        concat_index = _concat_indexes(indexes)

        # these go at the end
        if isinstance(concat_index, MultiIndex):
            levels.extend(concat_index.levels)
            label_list.extend(concat_index.labels)
        else:
            factor = Categorical.from_array(concat_index)
            levels.append(factor.levels)
            label_list.append(factor.labels)

        if len(names) == len(levels):
            names = list(names)
        else:
            # make sure that all of the passed indices have the same nlevels
            if not len(set([i.nlevels for i in indexes])) == 1:
                raise AssertionError("Cannot concat indices that do"
                                     " not have the same number of levels")

            # also copies
            names = names + _get_consensus_names(indexes)

        return MultiIndex(levels=levels, labels=label_list, names=names)

    new_index = indexes[0]
    n = len(new_index)
    kpieces = len(indexes)

    # also copies
    new_names = list(names)
    new_levels = list(levels)

    # construct labels
    new_labels = []

    # do something a bit more speedy

    for hlevel, level in zip(zipped, levels):
        hlevel = _ensure_index(hlevel)
        mapped = level.get_indexer(hlevel)

        mask = mapped == -1
        if mask.any():
            raise ValueError('Values not found in passed level: %s' %
                             str(hlevel[mask]))

        new_labels.append(np.repeat(mapped, n))

    if isinstance(new_index, MultiIndex):
        new_levels.extend(new_index.levels)
        new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels])
    else:
        new_levels.append(new_index)
        new_labels.append(np.tile(np.arange(n), kpieces))

    if len(new_names) < len(new_levels):
        new_names.extend(new_index.names)

    return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)
Exemplo n.º 37
0
    def __init__(self,
                 data=None,
                 index=None,
                 columns=None,
                 default_kind=None,
                 default_fill_value=None,
                 dtype=None,
                 copy=False):

        # pick up the defaults from the Sparse structures
        if isinstance(data, SparseDataFrame):
            if index is None:
                index = data.index
            if columns is None:
                columns = data.columns
            if default_fill_value is None:
                default_fill_value = data.default_fill_value
            if default_kind is None:
                default_kind = data.default_kind
        elif isinstance(data, (SparseSeries, SparseArray)):
            if index is None:
                index = data.index
            if default_fill_value is None:
                default_fill_value = data.fill_value
            if columns is None and hasattr(data, 'name'):
                columns = [data.name]
            if columns is None:
                raise Exception("cannot pass a series w/o a name or columns")
            data = {columns[0]: data}

        if default_fill_value is None:
            default_fill_value = np.nan
        if default_kind is None:
            default_kind = 'block'

        self._default_kind = default_kind
        self._default_fill_value = default_fill_value

        if is_scipy_sparse(data):
            mgr = self._init_spmatrix(data,
                                      index,
                                      columns,
                                      dtype=dtype,
                                      fill_value=default_fill_value)
        elif isinstance(data, dict):
            mgr = self._init_dict(data, index, columns, dtype=dtype)
        elif isinstance(data, (np.ndarray, list)):
            mgr = self._init_matrix(data, index, columns, dtype=dtype)
        elif isinstance(data, SparseDataFrame):
            mgr = self._init_mgr(data._data,
                                 dict(index=index, columns=columns),
                                 dtype=dtype,
                                 copy=copy)
        elif isinstance(data, DataFrame):
            mgr = self._init_dict(data, data.index, data.columns, dtype=dtype)
        elif isinstance(data, BlockManager):
            mgr = self._init_mgr(data,
                                 axes=dict(index=index, columns=columns),
                                 dtype=dtype,
                                 copy=copy)
        elif data is None:
            data = DataFrame()

            if index is None:
                index = Index([])
            else:
                index = _ensure_index(index)

            if columns is None:
                columns = Index([])
            else:
                for c in columns:
                    data[c] = SparseArray(np.nan,
                                          index=index,
                                          kind=self._default_kind,
                                          fill_value=self._default_fill_value)
            mgr = to_manager(data, columns, index)
            if dtype is not None:
                mgr = mgr.astype(dtype)

        generic.NDFrame.__init__(self, mgr)
Exemplo n.º 38
0
    def __new__(cls,
                data,
                index=None,
                sparse_index=None,
                kind='block',
                fill_value=None,
                name=None,
                copy=False):

        is_sparse_array = isinstance(data, SparseArray)
        if fill_value is None:
            if is_sparse_array:
                fill_value = data.fill_value
            else:
                fill_value = nan

        if is_sparse_array:
            if isinstance(data, SparseSeries) and index is None:
                index = data.index
            elif index is not None:
                if not (len(index) == len(data)):
                    raise AssertionError()

            sparse_index = data.sp_index
            values = np.asarray(data)
        elif isinstance(data, (Series, dict)):
            if index is None:
                index = data.index

            data = Series(data)
            values, sparse_index = make_sparse(data,
                                               kind=kind,
                                               fill_value=fill_value)
        elif isinstance(data, (tuple, list, np.ndarray)):
            # array-like
            if sparse_index is None:
                values, sparse_index = make_sparse(data,
                                                   kind=kind,
                                                   fill_value=fill_value)
            else:
                values = data
                if not (len(values) == sparse_index.npoints):
                    raise AssertionError()
        else:
            if index is None:
                raise TypeError('must pass index!')

            length = len(index)

            if data == fill_value or (isnull(data) and isnull(fill_value)):
                if kind == 'block':
                    sparse_index = BlockIndex(length, [], [])
                else:
                    sparse_index = IntIndex(length, [])
                values = np.array([])
            else:
                if kind == 'block':
                    locs, lens = ([0], [length]) if length else ([], [])
                    sparse_index = BlockIndex(length, locs, lens)
                else:
                    sparse_index = IntIndex(length, index)
                values = np.empty(length)
                values.fill(data)

        if index is None:
            index = com._default_index(sparse_index.length)
        index = _ensure_index(index)

        # Create array, do *not* copy data by default
        if copy:
            subarr = np.array(values, dtype=np.float64, copy=True)
        else:
            subarr = np.asarray(values, dtype=np.float64)

        if index.is_all_dates:
            cls = SparseTimeSeries

        # Change the class of the array to be the subclass type.
        output = subarr.view(cls)
        output.sp_index = sparse_index
        output.fill_value = np.float64(fill_value)
        output.index = index
        output.name = name
        return output
Exemplo n.º 39
0
 def _set_columns(self, cols):
     if len(cols) != len(self._series):
         raise Exception('Columns length %d did not match data %d!' %
                         (len(cols), len(self._series)))
     self._columns = _ensure_index(cols)
Exemplo n.º 40
0
    def _has_valid_type(self, key, axis):
        ax = self.obj._get_axis(axis)

        # valid for a label where all labels are in the index
        # slice of lables (where start-end in labels)
        # slice of integers (only if in the lables)
        # boolean

        if isinstance(key, slice):

            if ax.is_floating():

                # allowing keys to be slicers with no fallback
                pass

            else:
                if key.start is not None:
                    if key.start not in ax:
                        raise KeyError(
                            "start bound [%s] is not the [%s]" %
                            (key.start, self.obj._get_axis_name(axis)))
                if key.stop is not None:
                    if key.stop not in ax:
                        raise KeyError(
                            "stop bound [%s] is not in the [%s]" %
                            (key.stop, self.obj._get_axis_name(axis)))

        elif com._is_bool_indexer(key):
            return True

        elif _is_list_like(key):

            # mi is just a passthru
            if isinstance(key, tuple) and isinstance(ax, MultiIndex):
                return True

            # require all elements in the index
            idx = _ensure_index(key)
            if not idx.isin(ax).all():
                raise KeyError("[%s] are not in ALL in the [%s]" %
                               (key, self.obj._get_axis_name(axis)))

            return True

        else:

            def error():
                if isnull(key):
                    raise ValueError(
                        "cannot use label indexing with a null key")
                raise KeyError("the label [%s] is not in the [%s]" %
                               (key, self.obj._get_axis_name(axis)))

            try:
                key = self._convert_scalar_indexer(key, axis)
                if not key in ax:
                    error()
            except (TypeError) as e:

                # python 3 type errors should be raised
                if 'unorderable' in str(e):  # pragma: no cover
                    error()
                raise
            except:
                error()

        return True
Exemplo n.º 41
0
 def _validate_levels(cls, levels):
     """" Validates that we have good levels """
     levels = _ensure_index(levels)
     if not levels.is_unique:
         raise ValueError('Categorical levels must be unique')
     return levels
Exemplo n.º 42
0
 def _set_index(self, index):
     self._index = _ensure_index(index)
     for v in self._series.values():
         v.index = self._index