Пример #1
0
class TestFactor(unittest.TestCase):
    def setUp(self):
        self.factor = Factor(["a", "b", "b", "a", "a", "c", "c", "c"])

    def test_getitem(self):
        self.assertEqual(self.factor[0], "a")
        self.assertEqual(self.factor[-1], "c")

        subf = self.factor[[0, 1, 2]]
        tm.assert_almost_equal(subf.labels, [0, 1, 1])

        subf = self.factor[self.factor.asarray() == "c"]
        tm.assert_almost_equal(subf.labels, [2, 2, 2])

    def test_constructor_unsortable(self):
        arr = np.array([1, 2, 3, datetime.now()], dtype="O")

        # it works!
        factor = Factor(arr)

    def test_factor_agg(self):
        import pandas.core.frame as frame

        arr = np.arange(len(self.factor))

        f = np.sum
        agged = frame.factor_agg(self.factor, arr, f)
        labels = self.factor.labels
        for i, idx in enumerate(self.factor.levels):
            self.assertEqual(f(arr[labels == i]), agged[i])
Пример #2
0
class TestFactor(unittest.TestCase):

    def setUp(self):
        self.factor = Factor(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])

    def test_getitem(self):
        self.assertEqual(self.factor[0], 'a')
        self.assertEqual(self.factor[-1], 'c')

        subf = self.factor[[0, 1, 2]]
        tm.assert_almost_equal(subf.labels, [0, 1, 1])

        subf = self.factor[self.factor.asarray() == 'c']
        tm.assert_almost_equal(subf.labels, [2, 2, 2])

    def test_factor_agg(self):
        import pandas.core.frame as frame

        arr = np.arange(len(self.factor))

        f = np.sum
        agged = frame.factor_agg(self.factor, arr, f)
        labels = self.factor.labels
        for i, idx in enumerate(self.factor.levels):
            self.assertEqual(f(arr[labels == i]), agged[i])
Пример #3
0
class TestFactor(unittest.TestCase):
    def setUp(self):
        self.factor = Factor(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])

    def test_getitem(self):
        self.assertEqual(self.factor[0], 'a')
        self.assertEqual(self.factor[-1], 'c')

        subf = self.factor[[0, 1, 2]]
        tm.assert_almost_equal(subf.labels, [0, 1, 1])

        subf = self.factor[self.factor.asarray() == 'c']
        tm.assert_almost_equal(subf.labels, [2, 2, 2])

    def test_constructor_unsortable(self):
        arr = np.array([1, 2, 3, datetime.now()], dtype='O')

        # it works!
        factor = Factor(arr)

    def test_factor_agg(self):
        import pandas.core.frame as frame

        arr = np.arange(len(self.factor))

        f = np.sum
        agged = frame.factor_agg(self.factor, arr, f)
        labels = self.factor.labels
        for i, idx in enumerate(self.factor.levels):
            self.assertEqual(f(arr[labels == i]), agged[i])
Пример #4
0
def panel_index(time, panels, names=['time', 'panel']):
    """
    Returns a multi-index suitable for a panel-like DataFrame

    Parameters
    ----------
    time : array-like
        Time index, does not have to repeat
    panels : array-like
        Panel index, does not have to repeat
    names : list, optional
        List containing the names of the indices

    Returns
    -------
    multi_index : MultiIndex
        Time index is the first level, the panels are the second level.

    Examples
    --------
    >>> years = range(1960,1963)
    >>> panels = ['A', 'B', 'C']
    >>> panel_idx = panel_index(years, panels)
    >>> panel_idx
    MultiIndex([(1960, 'A'), (1961, 'A'), (1962, 'A'), (1960, 'B'),
                (1961, 'B'), (1962, 'B'), (1960, 'C'), (1961, 'C'),
                (1962, 'C')], dtype=object)

    or

    >>> import numpy as np
    >>> years = np.repeat(range(1960,1963), 3)
    >>> panels = np.tile(['A', 'B', 'C'], 3)
    >>> panel_idx = panel_index(years, panels)
    >>> panel_idx
    MultiIndex([(1960, 'A'), (1960, 'B'), (1960, 'C'), (1961, 'A'),
                (1961, 'B'), (1961, 'C'), (1962, 'A'), (1962, 'B'),
                (1962, 'C')], dtype=object)
    """
    time, panels = _ensure_like_indices(time, panels)
    time_factor = Factor(time)
    panel_factor = Factor(panels)

    labels = [time_factor.labels, panel_factor.labels]
    levels = [time_factor.levels, panel_factor.levels]
    return MultiIndex(levels, labels, sortorder=None, names=names)
Пример #5
0
    def fromRecords(cls, data, major_field, minor_field,
                    exclude=None):
        """
        Create LongPanel from DataFrame or record / structured ndarray
        object

        Parameters
        ----------
        data : DataFrame, structured or record array, or dict
        major_field : string
        minor_field : string
            Name of field
        exclude : list-like, default None

        Returns
        -------
        LongPanel
        """
        if isinstance(data, np.ndarray):
            # Dtype when you have data
            if not issubclass(data.dtype.type, np.void):
                raise ValueError('Input was not a structured array!')

            columns = data.dtype.names
            data = dict((k, data[k]) for k in columns)
        elif isinstance(data, DataFrame):
            data = data._series.copy()
        elif isinstance(data, dict):
            # otherwise will pop columns out of original
            data = data.copy()

        if exclude is None:
            exclude = set()
        else:
            exclude = set(exclude)

        for col in exclude:
            del data[col]

        major = Factor.fromarray(data.pop(major_field))
        minor = Factor.fromarray(data.pop(minor_field))
        index = MultiIndex(levels=[major.levels, minor.levels],
                           labels=[major.labels, minor.labels])
        return LongPanel(data, index=index)
Пример #6
0
 def setUp(self):
     self.factor = Factor(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
Пример #7
0
 def setUp(self):
     self.factor = Factor(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
Пример #8
0
def _make_concat_multiindex(indexes, keys, levels=None, names=None):
    if ((levels is None and isinstance(keys[0], tuple))
            or (levels is not None and len(levels) > 1)):
        zipped = zip(*keys)
        if names is None:
            names = [None] * len(zipped)

        if levels is None:
            levels = [Factor(zp).levels for zp in zipped]
        else:
            levels = [_ensure_index(x) for x in levels]
    else:
        zipped = [keys]
        if names is None:
            names = [None]

        if levels is None:
            levels = [_ensure_index(keys)]
        else:
            levels = [_ensure_index(x) for x in levels]

    if not _all_indexes_same(indexes):
        label_list = []

        # things are potentially different sizes, so compute the exact labels
        # for each level and pass those to MultiIndex.from_arrays

        for hlevel, level in zip(zipped, levels):
            to_concat = []
            for key, index in zip(hlevel, indexes):
                i = level.get_loc(key)
                to_concat.append(np.repeat(i, len(index)))
            label_list.append(np.concatenate(to_concat))

        concat_index = _concat_indexes(indexes)

        # these go at the end
        if isinstance(concat_index, MultiIndex):
            levels.extend(concat_index.levels)
            label_list.extend(concat_index.labels)
        else:
            factor = Factor(concat_index)
            levels.append(factor.levels)
            label_list.append(factor.labels)

        # also copies
        names = names + _get_consensus_names(indexes)

        return MultiIndex(levels=levels, labels=label_list, names=names)

    new_index = indexes[0]
    n = len(new_index)
    kpieces = len(indexes)

    # also copies
    new_names = list(names)
    new_levels = list(levels)

    # construct labels
    new_labels = []

    # do something a bit more speedy

    for hlevel, level in zip(zipped, levels):
        mapped = level.get_indexer(hlevel)
        new_labels.append(np.repeat(mapped, n))

    if isinstance(new_index, MultiIndex):
        new_levels.extend(new_index.levels)
        new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels])
        new_names.extend(new_index.names)
    else:
        new_levels.append(new_index)
        new_names.append(new_index.name)
        new_labels.append(np.tile(np.arange(n), kpieces))

    return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)
Пример #9
0
 def setUp(self):
     self.factor = Factor(["a", "b", "b", "a", "a", "c", "c", "c"])
Пример #10
0
 def setUp(self):
     self.factor = Factor.fromarray(['a', 'b', 'b', 'a',
                                     'a', 'c', 'c', 'c'])
Пример #11
0
    def _read_panel_table(self, group, where=None):
        from pandas.core.index import unique_int64, Factor
        from pandas.core.common import _asarray_tuplesafe
        from pandas.core.internals import BlockManager
        from pandas.core.reshape import block2d_to_block3d

        table = getattr(group, 'table')

        # create the selection
        sel = Selection(table, where)
        sel.select()
        fields = table._v_attrs.fields

        columns = _maybe_convert(sel.values['column'],
                                 table._v_attrs.columns_kind)
        index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind)
        values = sel.values['values']

        major = Factor(index)
        minor = Factor(columns)

        J, K = len(major.levels), len(minor.levels)
        key = major.labels * K + minor.labels

        if len(unique_int64(key)) == len(key):
            sorter, _ = lib.groupsort_indexer(key, J * K)

            # the data need to be sorted
            sorted_values = values.take(sorter, axis=0)
            major_labels = major.labels.take(sorter)
            minor_labels = minor.labels.take(sorter)

            block = block2d_to_block3d(sorted_values, fields, (J, K),
                                       major_labels, minor_labels)

            mgr = BlockManager([block],
                               [block.items, major.levels, minor.levels])
            wp = Panel(mgr)
        else:
            if not self._quiet:  # pragma: no cover
                print(
                    'Duplicate entries in table, taking most recently '
                    'appended')

            # reconstruct
            long_index = MultiIndex.from_arrays([index, columns])
            lp = DataFrame(values, index=long_index, columns=fields)

            # need a better algorithm
            tuple_index = long_index.get_tuple_index()
            index_map = lib.map_indices_object(tuple_index)

            unique_tuples = lib.fast_unique(tuple_index)
            unique_tuples = _asarray_tuplesafe(unique_tuples)

            indexer = lib.merge_indexer_object(unique_tuples, index_map)

            new_index = long_index.take(indexer)
            new_values = lp.values.take(indexer, axis=0)

            lp = DataFrame(new_values, index=new_index, columns=lp.columns)
            wp = lp.to_panel()

        if sel.column_filter:
            new_minor = sorted(set(wp.minor_axis) & sel.column_filter)
            wp = wp.reindex(minor=new_minor)
        return wp
Пример #12
0
    def test_constructor_unsortable(self):
        arr = np.array([1, 2, 3, datetime.now()], dtype='O')

        # it works!
        factor = Factor(arr)