Exemplo n.º 1
0
    def test_constructor_empty(self, input_class):
        empty = Series()
        empty2 = Series(input_class())

        # these are Index() and RangeIndex() which don't compare type equal
        # but are just .equals
        assert_series_equal(empty, empty2, check_index_type=False)

        # With explicit dtype:
        empty = Series(dtype='float64')
        empty2 = Series(input_class(), dtype='float64')
        assert_series_equal(empty, empty2, check_index_type=False)

        # GH 18515 : with dtype=category:
        empty = Series(dtype='category')
        empty2 = Series(input_class(), dtype='category')
        assert_series_equal(empty, empty2, check_index_type=False)

        if input_class is not list:
            # With index:
            empty = Series(index=lrange(10))
            empty2 = Series(input_class(), index=lrange(10))
            assert_series_equal(empty, empty2)

            # With index and dtype float64:
            empty = Series(np.nan, index=lrange(10))
            empty2 = Series(input_class(), index=lrange(10), dtype='float64')
            assert_series_equal(empty, empty2)

            # GH 19853 : with empty string, index and dtype str
            empty = Series('', dtype=str, index=range(3))
            empty2 = Series('', index=range(3))
            assert_series_equal(empty, empty2)
Exemplo n.º 2
0
    def test_setitem_ndarray_1d(self):
        # GH5508

        # len of indexer vs length of the 1d ndarray
        df = DataFrame(index=Index(lrange(1, 11)))
        df['foo'] = np.zeros(10, dtype=np.float64)
        df['bar'] = np.zeros(10, dtype=np.complex)

        # invalid
        def f():
            df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j,
                                                     2.2, 1.0])

        pytest.raises(ValueError, f)

        # valid
        df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j,
                                                 2.2, 1.0])

        result = df.loc[df.index[2:6], 'bar']
        expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6],
                          name='bar')
        tm.assert_series_equal(result, expected)

        # dtype getting changed?
        df = DataFrame(index=Index(lrange(1, 11)))
        df['foo'] = np.zeros(10, dtype=np.float64)
        df['bar'] = np.zeros(10, dtype=np.complex)

        def f():
            df[2:5] = np.arange(1, 4) * 1j

        pytest.raises(ValueError, f)
Exemplo n.º 3
0
def test_loc_getitem_setitem_integer_slice_keyerrors():
    s = Series(np.random.randn(10), index=lrange(0, 20, 2))

    # this is OK
    cp = s.copy()
    cp.iloc[4:10] = 0
    assert (cp.iloc[4:10] == 0).all()

    # so is this
    cp = s.copy()
    cp.iloc[3:11] = 0
    assert (cp.iloc[3:11] == 0).values.all()

    result = s.iloc[2:6]
    result2 = s.loc[3:11]
    expected = s.reindex([4, 6, 8, 10])

    assert_series_equal(result, expected)
    assert_series_equal(result2, expected)

    # non-monotonic, raise KeyError
    s2 = s.iloc[lrange(5) + lrange(5, 10)[::-1]]
    with pytest.raises(KeyError, match=r"^3L?$"):
        s2.loc[3:11]
    with pytest.raises(KeyError, match=r"^3L?$"):
        s2.loc[3:11] = 0
Exemplo n.º 4
0
    def test_parse_dates_column_list(self):
        from pandas.core.datetools import to_datetime

        data = '''date;destination;ventilationcode;unitcode;units;aux_date
01/01/2010;P;P;50;1;12/1/2011
01/01/2010;P;R;50;1;13/1/2011
15/01/2010;P;P;50;1;14/1/2011
01/05/2010;P;P;50;1;15/1/2011'''

        expected = self.read_csv(StringIO(data), sep=";", index_col=lrange(4))

        lev = expected.index.levels[0]
        levels = list(expected.index.levels)
        levels[0] = lev.to_datetime(dayfirst=True)
        # hack to get this to work - remove for final test
        levels[0].name = lev.name
        expected.index.set_levels(levels, inplace=True)
        expected['aux_date'] = to_datetime(expected['aux_date'],
                                           dayfirst=True)
        expected['aux_date'] = lmap(Timestamp, expected['aux_date'])
        tm.assertIsInstance(expected['aux_date'][0], datetime)

        df = self.read_csv(StringIO(data), sep=";", index_col=lrange(4),
                           parse_dates=[0, 5], dayfirst=True)
        tm.assert_frame_equal(df, expected)

        df = self.read_csv(StringIO(data), sep=";", index_col=lrange(4),
                           parse_dates=['date', 'aux_date'], dayfirst=True)
        tm.assert_frame_equal(df, expected)
Exemplo n.º 5
0
def test_parse_ragged_csv(c_parser_only):
    parser = c_parser_only
    data = """1,2,3
1,2,3,4
1,2,3,4,5
1,2
1,2,3,4"""

    nice_data = """1,2,3,,
1,2,3,4,
1,2,3,4,5
1,2,,,
1,2,3,4,"""
    result = parser.read_csv(StringIO(data), header=None,
                             names=["a", "b", "c", "d", "e"])

    expected = parser.read_csv(StringIO(nice_data), header=None,
                               names=["a", "b", "c", "d", "e"])

    tm.assert_frame_equal(result, expected)

    # too many columns, cause segfault if not careful
    data = "1,2\n3,4,5"

    result = parser.read_csv(StringIO(data), header=None,
                             names=lrange(50))
    expected = parser.read_csv(StringIO(data), header=None,
                               names=lrange(3)).reindex(columns=lrange(50))

    tm.assert_frame_equal(result, expected)
Exemplo n.º 6
0
def _get_skiprows_iter(skiprows):
    """Get an iterator given an integer, slice or container.

    Parameters
    ----------
    skiprows : int, slice, container
        The iterator to use to skip rows; can also be a slice.

    Raises
    ------
    TypeError
        * If `skiprows` is not a slice, integer, or Container

    Raises
    ------
    TypeError
        * If `skiprows` is not a slice, integer, or Container

    Returns
    -------
    it : iterable
        A proper iterator to use to skip rows of a DataFrame.
    """
    if isinstance(skiprows, slice):
        return lrange(skiprows.start or 0, skiprows.stop, skiprows.step or 1)
    elif isinstance(skiprows, numbers.Integral):
        return lrange(skiprows)
    elif isinstance(skiprows, collections.Container):
        return skiprows
    else:
        raise TypeError('{0} is not a valid type for skipping'
                        ' rows'.format(type(skiprows)))
Exemplo n.º 7
0
    def test_parse_ragged_csv(self):
        data = """1,2,3
1,2,3,4
1,2,3,4,5
1,2
1,2,3,4"""

        nice_data = """1,2,3,,
1,2,3,4,
1,2,3,4,5
1,2,,,
1,2,3,4,"""
        result = self.read_csv(StringIO(data), header=None,
                               names=['a', 'b', 'c', 'd', 'e'])

        expected = self.read_csv(StringIO(nice_data), header=None,
                                 names=['a', 'b', 'c', 'd', 'e'])

        tm.assert_frame_equal(result, expected)

        # too many columns, cause segfault if not careful
        data = "1,2\n3,4,5"

        result = self.read_csv(StringIO(data), header=None,
                               names=lrange(50))
        expected = self.read_csv(StringIO(data), header=None,
                                 names=lrange(3)).reindex(columns=lrange(50))

        tm.assert_frame_equal(result, expected)
Exemplo n.º 8
0
def test_is_():
    mi = MultiIndex.from_tuples(lzip(range(10), range(10)))
    assert mi.is_(mi)
    assert mi.is_(mi.view())
    assert mi.is_(mi.view().view().view().view())
    mi2 = mi.view()
    # names are metadata, they don't change id
    mi2.names = ["A", "B"]
    assert mi2.is_(mi)
    assert mi.is_(mi2)

    assert mi.is_(mi.set_names(["C", "D"]))
    mi2 = mi.view()
    mi2.set_names(["E", "F"], inplace=True)
    assert mi.is_(mi2)
    # levels are inherent properties, they change identity
    mi3 = mi2.set_levels([lrange(10), lrange(10)])
    assert not mi3.is_(mi2)
    # shouldn't change
    assert mi2.is_(mi)
    mi4 = mi3.view()

    # GH 17464 - Remove duplicate MultiIndex levels
    mi4.set_levels([lrange(10), lrange(10)], inplace=True)
    assert not mi4.is_(mi3)
    mi5 = mi.view()
    mi5.set_levels(mi5.levels, inplace=True)
    assert not mi5.is_(mi)
Exemplo n.º 9
0
    def test_to_csv_from_csv1(self):

        with ensure_clean('__tmp_to_csv_from_csv1__') as path:
            self.frame['A'][:5] = nan

            self.frame.to_csv(path)
            self.frame.to_csv(path, columns=['A', 'B'])
            self.frame.to_csv(path, header=False)
            self.frame.to_csv(path, index=False)

            # test roundtrip
            self.tsframe.to_csv(path)
            recons = DataFrame.from_csv(path)

            assert_frame_equal(self.tsframe, recons)

            self.tsframe.to_csv(path, index_label='index')
            recons = DataFrame.from_csv(path, index_col=None)
            assert(len(recons.columns) == len(self.tsframe.columns) + 1)

            # no index
            self.tsframe.to_csv(path, index=False)
            recons = DataFrame.from_csv(path, index_col=None)
            assert_almost_equal(self.tsframe.values, recons.values)

            # corner case
            dm = DataFrame({'s1': Series(lrange(3), lrange(3)),
                            's2': Series(lrange(2), lrange(2))})
            dm.to_csv(path)
            recons = DataFrame.from_csv(path)
            assert_frame_equal(dm, recons)
Exemplo n.º 10
0
    def test_multi_assign(self):

        # GH 3626, an assignement of a sub-df to a df
        df = DataFrame({'FC':['a','b','a','b','a','b'],
                        'PF':[0,0,0,0,1,1],
                        'col1':lrange(6),
                        'col2':lrange(6,12)})
        df.ix[1,0]=np.nan
        df2 = df.copy()

        mask=~df2.FC.isnull()
        cols=['col1', 'col2']

        dft = df2 * 2
        dft.ix[3,3] = np.nan

        expected = DataFrame({'FC':['a',np.nan,'a','b','a','b'],
                              'PF':[0,0,0,0,1,1],
                              'col1':Series([0,1,4,6,8,10]),
                              'col2':[12,7,16,np.nan,20,22]})


        # frame on rhs
        df2.ix[mask, cols]= dft.ix[mask, cols]
        assert_frame_equal(df2,expected)

        df2.ix[mask, cols]= dft.ix[mask, cols]
        assert_frame_equal(df2,expected)

        # with an ndarray on rhs
        df2 = df.copy()
        df2.ix[mask, cols]= dft.ix[mask, cols].values
        assert_frame_equal(df2,expected)
        df2.ix[mask, cols]= dft.ix[mask, cols].values
        assert_frame_equal(df2,expected)
Exemplo n.º 11
0
    def _reindex_multi(self, axes, copy, fill_value):
        """ we are guaranteed non-Nones in the axes! """
        items = axes['items']
        major = axes['major_axis']
        minor = axes['minor_axis']
        a0, a1, a2 = len(items), len(major), len(minor)

        values = self.values
        new_values = np.empty((a0, a1, a2), dtype=values.dtype)

        new_items, indexer0 = self.items.reindex(items)
        new_major, indexer1 = self.major_axis.reindex(major)
        new_minor, indexer2 = self.minor_axis.reindex(minor)

        if indexer0 is None:
            indexer0 = lrange(len(new_items))

        if indexer1 is None:
            indexer1 = lrange(len(new_major))

        if indexer2 is None:
            indexer2 = lrange(len(new_minor))

        for i, ind in enumerate(indexer0):
            com.take_2d_multi(values[ind], (indexer1, indexer2),
                              out=new_values[i])

        return Panel(new_values, items=new_items, major_axis=new_major,
                     minor_axis=new_minor)
Exemplo n.º 12
0
    def test_fillna(self):
        df = self.zframe.reindex(lrange(5))
        dense = self.zorig.reindex(lrange(5))

        result = df.fillna(0)
        expected = dense.fillna(0)
        tm.assert_sp_frame_equal(result, expected.to_sparse(fill_value=0),
                                 exact_indices=False)
        tm.assert_frame_equal(result.to_dense(), expected)

        result = df.copy()
        result.fillna(0, inplace=True)
        expected = dense.fillna(0)

        tm.assert_sp_frame_equal(result, expected.to_sparse(fill_value=0),
                                 exact_indices=False)
        tm.assert_frame_equal(result.to_dense(), expected)

        result = df.copy()
        result = df['A']
        result.fillna(0, inplace=True)

        expected = dense['A'].fillna(0)
        # this changes internal SparseArray repr
        # tm.assert_sp_series_equal(result, expected.to_sparse(fill_value=0))
        tm.assert_series_equal(result.to_dense(), expected)
Exemplo n.º 13
0
def test_get_loc_level():
    index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
        lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
            [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])

    loc, new_index = index.get_loc_level((0, 1))
    expected = slice(1, 2)
    exp_index = index[expected].droplevel(0).droplevel(0)
    assert loc == expected
    assert new_index.equals(exp_index)

    loc, new_index = index.get_loc_level((0, 1, 0))
    expected = 1
    assert loc == expected
    assert new_index is None

    pytest.raises(KeyError, index.get_loc_level, (2, 2))
    # GH 22221: unused label
    pytest.raises(KeyError, index.drop(2).get_loc_level, 2)
    # Unused label on unsorted level:
    pytest.raises(KeyError, index.drop(1, level=2).get_loc_level, 2, 2)

    index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array(
        [0, 0, 0, 0]), np.array([0, 1, 2, 3])])
    result, new_index = index.get_loc_level((2000, slice(None, None)))
    expected = slice(None, None)
    assert result == expected
    assert new_index.equals(index.droplevel(0))
Exemplo n.º 14
0
def test_first_last_nth_dtypes(df_mixed_floats):

    df = df_mixed_floats.copy()
    df['E'] = True
    df['F'] = 1

    # tests for first / last / nth
    grouped = df.groupby('A')
    first = grouped.first()
    expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']]
    expected.index = Index(['bar', 'foo'], name='A')
    expected = expected.sort_index()
    assert_frame_equal(first, expected)

    last = grouped.last()
    expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']]
    expected.index = Index(['bar', 'foo'], name='A')
    expected = expected.sort_index()
    assert_frame_equal(last, expected)

    nth = grouped.nth(1)
    expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']]
    expected.index = Index(['bar', 'foo'], name='A')
    expected = expected.sort_index()
    assert_frame_equal(nth, expected)

    # GH 2763, first/last shifting dtypes
    idx = lrange(10)
    idx.append(9)
    s = Series(data=lrange(11), index=idx, name='IntCol')
    assert s.dtype == 'int64'
    f = s.groupby(level=0).first()
    assert f.dtype == 'int64'
Exemplo n.º 15
0
    def test_dropna(self):
        df = DataFrame(np.random.randn(6, 4))
        df[2][:2] = np.nan

        dropped = df.dropna(axis=1)
        expected = df.loc[:, [0, 1, 3]]
        inp = df.copy()
        inp.dropna(axis=1, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=0)
        expected = df.loc[lrange(2, 6)]
        inp = df.copy()
        inp.dropna(axis=0, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        # threshold
        dropped = df.dropna(axis=1, thresh=5)
        expected = df.loc[:, [0, 1, 3]]
        inp = df.copy()
        inp.dropna(axis=1, thresh=5, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=0, thresh=4)
        expected = df.loc[lrange(2, 6)]
        inp = df.copy()
        inp.dropna(axis=0, thresh=4, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=1, thresh=4)
        assert_frame_equal(dropped, df)

        dropped = df.dropna(axis=1, thresh=3)
        assert_frame_equal(dropped, df)

        # subset
        dropped = df.dropna(axis=0, subset=[0, 1, 3])
        inp = df.copy()
        inp.dropna(axis=0, subset=[0, 1, 3], inplace=True)
        assert_frame_equal(dropped, df)
        assert_frame_equal(inp, df)

        # all
        dropped = df.dropna(axis=1, how='all')
        assert_frame_equal(dropped, df)

        df[2] = np.nan
        dropped = df.dropna(axis=1, how='all')
        expected = df.loc[:, [0, 1, 3]]
        assert_frame_equal(dropped, expected)

        # bad input
        msg = ("No axis named 3 for object type"
               " <class 'pandas.core.frame.DataFrame'>")
        with pytest.raises(ValueError, match=msg):
            df.dropna(axis=3)
Exemplo n.º 16
0
    def test_fillna_dtype_conversion(self):
        # make sure that fillna on an empty frame works
        df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
        result = df.get_dtype_counts().sort_values()
        expected = Series({'object': 5})
        assert_series_equal(result, expected)

        result = df.fillna(1)
        expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
        result = result.get_dtype_counts().sort_values()
        expected = Series({'int64': 5})
        assert_series_equal(result, expected)

        # empty block
        df = DataFrame(index=lrange(3), columns=['A', 'B'], dtype='float64')
        result = df.fillna('nan')
        expected = DataFrame('nan', index=lrange(3), columns=['A', 'B'])
        assert_frame_equal(result, expected)

        # equiv of replace
        df = DataFrame(dict(A=[1, np.nan], B=[1., 2.]))
        for v in ['', 1, np.nan, 1.0]:
            expected = df.replace(np.nan, v)
            result = df.fillna(v)
            assert_frame_equal(result, expected)
Exemplo n.º 17
0
def test_where_unsafe_float(float_dtype):
    s = Series(np.arange(10), dtype=float_dtype)
    mask = s < 5

    s[mask] = lrange(2, 7)
    expected = Series(lrange(2, 7) + lrange(5, 10), dtype=float_dtype)

    assert_series_equal(s, expected)
Exemplo n.º 18
0
    def test_dropna(self):
        df = DataFrame(np.random.randn(6, 4))
        df[2][:2] = nan

        dropped = df.dropna(axis=1)
        expected = df.loc[:, [0, 1, 3]]
        inp = df.copy()
        inp.dropna(axis=1, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=0)
        expected = df.loc[lrange(2, 6)]
        inp = df.copy()
        inp.dropna(axis=0, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        # threshold
        dropped = df.dropna(axis=1, thresh=5)
        expected = df.loc[:, [0, 1, 3]]
        inp = df.copy()
        inp.dropna(axis=1, thresh=5, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=0, thresh=4)
        expected = df.loc[lrange(2, 6)]
        inp = df.copy()
        inp.dropna(axis=0, thresh=4, inplace=True)
        assert_frame_equal(dropped, expected)
        assert_frame_equal(inp, expected)

        dropped = df.dropna(axis=1, thresh=4)
        assert_frame_equal(dropped, df)

        dropped = df.dropna(axis=1, thresh=3)
        assert_frame_equal(dropped, df)

        # subset
        dropped = df.dropna(axis=0, subset=[0, 1, 3])
        inp = df.copy()
        inp.dropna(axis=0, subset=[0, 1, 3], inplace=True)
        assert_frame_equal(dropped, df)
        assert_frame_equal(inp, df)

        # all
        dropped = df.dropna(axis=1, how='all')
        assert_frame_equal(dropped, df)

        df[2] = nan
        dropped = df.dropna(axis=1, how='all')
        expected = df.loc[:, [0, 1, 3]]
        assert_frame_equal(dropped, expected)

        # bad input
        pytest.raises(ValueError, df.dropna, axis=3)
Exemplo n.º 19
0
    def test_lrange(self):
        results = lrange(10),
        expecteds = list(builtins.range(10)),
        lengths = 10,

        results += lrange(1, 10, 2),
        lengths += 5,
        expecteds += list(builtins.range(1, 10, 2)),
        self.check_results(results, expecteds, lengths)
Exemplo n.º 20
0
def test_indexing():
    idx = date_range("2001-1-1", periods=20, freq='M')
    ts = Series(np.random.rand(len(idx)), index=idx)

    # getting

    # GH 3070, make sure semantics work on Series/Frame
    expected = ts['2001']
    expected.name = 'A'

    df = DataFrame(dict(A=ts))
    result = df['2001']['A']
    assert_series_equal(expected, result)

    # setting
    ts['2001'] = 1
    expected = ts['2001']
    expected.name = 'A'

    df.loc['2001', 'A'] = 1

    result = df['2001']['A']
    assert_series_equal(expected, result)

    # GH3546 (not including times on the last day)
    idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00',
                     freq='H')
    ts = Series(lrange(len(idx)), index=idx)
    expected = ts['2013-05']
    assert_series_equal(expected, ts)

    idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59',
                     freq='S')
    ts = Series(lrange(len(idx)), index=idx)
    expected = ts['2013-05']
    assert_series_equal(expected, ts)

    idx = [Timestamp('2013-05-31 00:00'),
           Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))]
    ts = Series(lrange(len(idx)), index=idx)
    expected = ts['2013']
    assert_series_equal(expected, ts)

    # GH14826, indexing with a seconds resolution string / datetime object
    df = DataFrame(np.random.rand(5, 5),
                   columns=['open', 'high', 'low', 'close', 'volume'],
                   index=date_range('2012-01-02 18:01:00',
                                    periods=5, tz='US/Central', freq='s'))
    expected = df.loc[[df.index[2]]]

    # this is a single date, so will raise
    with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
        df['2012-01-02 18:01:02']
    msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)"
    with pytest.raises(KeyError, match=msg):
        df[df.index[2]]
Exemplo n.º 21
0
def test_where_unsafe():
    # see gh-9731
    s = Series(np.arange(10), dtype="int64")
    values = [2.5, 3.5, 4.5, 5.5]

    mask = s > 5
    expected = Series(lrange(6) + values, dtype="float64")

    s[mask] = values
    assert_series_equal(s, expected)

    # see gh-3235
    s = Series(np.arange(10), dtype='int64')
    mask = s < 5
    s[mask] = lrange(2, 7)
    expected = Series(lrange(2, 7) + lrange(5, 10), dtype='int64')
    assert_series_equal(s, expected)
    assert s.dtype == expected.dtype

    s = Series(np.arange(10), dtype='int64')
    mask = s > 5
    s[mask] = [0] * 4
    expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype='int64')
    assert_series_equal(s, expected)

    s = Series(np.arange(10))
    mask = s > 5

    def f():
        s[mask] = [5, 4, 3, 2, 1]

    pytest.raises(ValueError, f)

    def f():
        s[mask] = [0] * 5

    pytest.raises(ValueError, f)

    # dtype changes
    s = Series([1, 2, 3, 4])
    result = s.where(s > 2, np.nan)
    expected = Series([np.nan, np.nan, 3, 4])
    assert_series_equal(result, expected)

    # GH 4667
    # setting with None changes dtype
    s = Series(range(10)).astype(float)
    s[8] = None
    result = s[8]
    assert isna(result)

    s = Series(range(10)).astype(float)
    s[s > 8] = None
    result = s[isna(s)]
    expected = Series(np.nan, index=[9])
    assert_series_equal(result, expected)
Exemplo n.º 22
0
    def test_constructor_generator(self):
        gen = (i for i in range(10))

        result = Series(gen)
        exp = Series(lrange(10))
        assert_series_equal(result, exp)

        gen = (i for i in range(10))
        result = Series(gen, index=lrange(10, 20))
        exp.index = lrange(10, 20)
        assert_series_equal(result, exp)
Exemplo n.º 23
0
    def test_iloc_getitem_frame(self):
        df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2),
                       columns=lrange(0, 8, 2))

        result = df.iloc[2]
        with catch_warnings(record=True):
            exp = df.ix[4]
        tm.assert_series_equal(result, exp)

        result = df.iloc[2, 2]
        with catch_warnings(record=True):
            exp = df.ix[4, 4]
        assert result == exp

        # slice
        result = df.iloc[4:8]
        with catch_warnings(record=True):
            expected = df.ix[8:14]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, 2:3]
        with catch_warnings(record=True):
            expected = df.ix[:, 4:5]
        tm.assert_frame_equal(result, expected)

        # list of integers
        result = df.iloc[[0, 1, 3]]
        with catch_warnings(record=True):
            expected = df.ix[[0, 2, 6]]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[[0, 1, 3], [0, 1]]
        with catch_warnings(record=True):
            expected = df.ix[[0, 2, 6], [0, 2]]
        tm.assert_frame_equal(result, expected)

        # neg indicies
        result = df.iloc[[-1, 1, 3], [-1, 1]]
        with catch_warnings(record=True):
            expected = df.ix[[18, 2, 6], [6, 2]]
        tm.assert_frame_equal(result, expected)

        # dups indicies
        result = df.iloc[[-1, -1, 1, 3], [-1, 1]]
        with catch_warnings(record=True):
            expected = df.ix[[18, 18, 2, 6], [6, 2]]
        tm.assert_frame_equal(result, expected)

        # with index-like
        s = Series(index=lrange(1, 5))
        result = df.iloc[s.index]
        with catch_warnings(record=True):
            expected = df.ix[[2, 4, 6, 8]]
        tm.assert_frame_equal(result, expected)
Exemplo n.º 24
0
 def test_insert_benchmark(self):
     # from the vb_suite/frame_methods/frame_insert_columns
     N = 10
     K = 5
     df = DataFrame(index=lrange(N))
     new_col = np.random.randn(N)
     for i in range(K):
         df[i] = new_col
     expected = DataFrame(np.repeat(new_col, K).reshape(N, K),
                          index=lrange(N))
     assert_frame_equal(df, expected)
Exemplo n.º 25
0
    def test_constructor_empty(self):
        empty = Series()
        empty2 = Series([])

        # the are Index() and RangeIndex() which don't compare type equal
        # but are just .equals
        assert_series_equal(empty, empty2, check_index_type=False)

        empty = Series(index=lrange(10))
        empty2 = Series(np.nan, index=lrange(10))
        assert_series_equal(empty, empty2)
Exemplo n.º 26
0
    def test_iloc_mask(self):

        # GH 3631, iloc with a mask (of a series) should raise
        df = DataFrame(lrange(5), list('ABCDE'), columns=['a'])
        mask = (df.a%2 == 0)
        self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask]))
        mask.index = lrange(len(mask))
        self.assertRaises(NotImplementedError, df.iloc.__getitem__, tuple([mask]))

        # ndarray ok
        result = df.iloc[np.array([True] * len(mask),dtype=bool)]
        assert_frame_equal(result,df)

        # the possibilities
        locs = np.arange(4)
        nums = 2**locs
        reps = lmap(bin, nums)
        df = DataFrame({'locs':locs, 'nums':nums}, reps)

        expected = {
            (None,'')     : '0b1100',
            (None,'.loc')  : '0b1100',
            (None,'.iloc') : '0b1100',
            ('index','')  : '0b11',
            ('index','.loc')  : '0b11',
            ('index','.iloc') : 'iLocation based boolean indexing cannot use an indexable as a mask',
            ('locs','')      : 'Unalignable boolean Series key provided',
            ('locs','.loc')   : 'Unalignable boolean Series key provided',
            ('locs','.iloc')  : 'iLocation based boolean indexing on an integer type is not available',
            }

        import warnings
        warnings.filterwarnings(action='ignore', category=UserWarning)
        result = dict()
        for idx in [None, 'index', 'locs']:
            mask = (df.nums>2).values
            if idx:
                mask = Series(mask, list(reversed(getattr(df, idx))))
            for method in ['', '.loc', '.iloc']:
                try:
                    if method:
                        accessor = getattr(df, method[1:])
                    else:
                        accessor = df
                    ans = str(bin(accessor[mask]['nums'].sum()))
                except Exception as e:
                    ans = str(e)

                key = tuple([idx,method])
                r = expected.get(key)
                if r != ans:
                    raise AssertionError("[%s] does not match [%s], received [%s]" %
                                         (key,ans,r))
        warnings.filterwarnings(action='always', category=UserWarning)
Exemplo n.º 27
0
    def test_constructor_map(self):
        # GH8909
        m = map(lambda x: x, range(10))

        result = Series(m)
        exp = Series(lrange(10))
        assert_series_equal(result, exp)

        m = map(lambda x: x, range(10))
        result = Series(m, index=lrange(10, 20))
        exp.index = lrange(10, 20)
        assert_series_equal(result, exp)
Exemplo n.º 28
0
    def test_constructor_pass_none(self):
        s = Series(None, index=lrange(5))
        assert s.dtype == np.float64

        s = Series(None, index=lrange(5), dtype=object)
        assert s.dtype == np.object_

        # GH 7431
        # inference on the index
        s = Series(index=np.array([None]))
        expected = Series(index=Index([None]))
        assert_series_equal(s, expected)
Exemplo n.º 29
0
    def test_bar_log_no_subplots(self):
        # GH3254, GH3298 matplotlib/matplotlib#1882, #1892
        # regressions in 1.2.1
        expected = np.array([1., 10.])

        if not self.mpl_le_1_2_1:
            expected = np.hstack((.1, expected, 100))

        # no subplots
        df = DataFrame({'A': [3] * 5, 'B': lrange(1, 6)}, index=lrange(5))
        ax = df.plot(kind='bar', grid=True, log=True)
        assert_array_equal(ax.yaxis.get_ticklocs(), expected)
Exemplo n.º 30
0
    def test_sort_index_duplicates(self):

        # with 9816, these are all translated to .sort_values

        df = DataFrame([lrange(5, 9), lrange(4)],
                       columns=['a', 'a', 'b', 'b'])

        with assertRaisesRegexp(ValueError, 'duplicate'):
            # use .sort_values #9816
            with tm.assert_produces_warning(FutureWarning):
                df.sort_index(by='a')
        with assertRaisesRegexp(ValueError, 'duplicate'):
            df.sort_values(by='a')

        with assertRaisesRegexp(ValueError, 'duplicate'):
            # use .sort_values #9816
            with tm.assert_produces_warning(FutureWarning):
                df.sort_index(by=['a'])
        with assertRaisesRegexp(ValueError, 'duplicate'):
            df.sort_values(by=['a'])

        with assertRaisesRegexp(ValueError, 'duplicate'):
            # use .sort_values #9816
            with tm.assert_produces_warning(FutureWarning):
                # multi-column 'by' is separate codepath
                df.sort_index(by=['a', 'b'])
        with assertRaisesRegexp(ValueError, 'duplicate'):
            # multi-column 'by' is separate codepath
            df.sort_values(by=['a', 'b'])

        # with multi-index
        # GH4370
        df = DataFrame(np.random.randn(4, 2),
                       columns=MultiIndex.from_tuples([('a', 0), ('a', 1)]))
        with assertRaisesRegexp(ValueError, 'levels'):
            # use .sort_values #9816
            with tm.assert_produces_warning(FutureWarning):
                df.sort_index(by='a')
        with assertRaisesRegexp(ValueError, 'levels'):
            df.sort_values(by='a')

        # convert tuples to a list of tuples
        # use .sort_values #9816
        with tm.assert_produces_warning(FutureWarning):
            df.sort_index(by=[('a', 1)])
        expected = df.sort_values(by=[('a', 1)])

        # use .sort_values #9816
        with tm.assert_produces_warning(FutureWarning):
            df.sort_index(by=('a', 1))
        result = df.sort_values(by=('a', 1))
        assert_frame_equal(result, expected)
Exemplo n.º 31
0
    def test_float_none_comparison(self):
        df = DataFrame(np.random.randn(8, 3),
                       index=lrange(8),
                       columns=['A', 'B', 'C'])

        pytest.raises(TypeError, df.__eq__, None)
Exemplo n.º 32
0
 def test_map_type_inference(self):
     s = Series(lrange(3))
     s2 = s.map(lambda x: np.where(x == 0, 0, 1))
     self.assertTrue(issubclass(s2.dtype.type, np.integer))
Exemplo n.º 33
0
 def test_valid_object_plot(self):
     s = Series(lrange(10), dtype=object)
     for kind in plotting._core._common_kinds:
         if not _ok_for_gaussian_kde(kind):
             continue
         _check_plot_works(s.plot, kind=kind)
Exemplo n.º 34
0
    def get_forward_data(self, months, call=True, put=False, near=False,
                         above_below=2):
        """
        Gets either call, put, or both data for months starting in the current
        month and going out in the future a specified amount of time.

        Parameters
        ----------
        months: number, int
            How many months to go out in the collection of the data. This is
            inclusive.

        call: bool, optional (default=True)
            Whether or not to collect data for call options

        put: bool, optional (default=False)
            Whether or not to collect data for put options.

        near: bool, optional (default=False)
            Whether this function should get only the data near the
            current stock price. Uses Options.get_near_stock_price

        above_below: number, int, optional (default=2)
            The number of strike prices above and below the stock price that
            should be taken if the near option is set to True

        Returns
        -------
        data : dict of str, DataFrame
        """
        warnings.warn("get_forward_data() is deprecated", FutureWarning)
        in_months = lrange(CUR_MONTH, CUR_MONTH + months + 1)
        in_years = [CUR_YEAR] * (months + 1)

        # Figure out how many items in in_months go past 12
        to_change = 0
        for i in range(months):
            if in_months[i] > 12:
                in_months[i] -= 12
                to_change += 1

        # Change the corresponding items in the in_years list.
        for i in range(1, to_change + 1):
            in_years[-i] += 1

        to_ret = Series({'calls': call, 'puts': put})
        to_ret = to_ret[to_ret].index
        data = {}

        for name in to_ret:
            all_data = DataFrame()

            for mon in range(months):
                m2 = in_months[mon]
                y2 = in_years[mon]

                if not near:
                    m1 = _two_char_month(m2)
                    nam = name + str(m1) + str(y2)[2:]

                    try:  # Try to access on the instance
                        frame = getattr(self, nam)
                    except AttributeError:
                        meth_name = 'get_{0}_data'.format(name[:-1])
                        frame = getattr(self, meth_name)(m2, y2)
                else:
                    frame = self.get_near_stock_price(call=call, put=put,
                                                      above_below=above_below,
                                                      month=m2, year=y2)
                tick = str(frame.Symbol[0])
                start = len(self.symbol)
                year = tick[start:start + 2]
                month = tick[start + 2:start + 4]
                day = tick[start + 4:start + 6]
                expiry = month + '-' + day + '-' + year
                frame['Expiry'] = expiry

                if not mon:
                    all_data = all_data.join(frame, how='right')
                else:
                    all_data = concat([all_data, frame])
            data[name] = all_data
        ret = [data[k] for k in to_ret]
        if len(ret) == 1:
            return ret.pop()
        if len(ret) != 2:
            raise AssertionError("should be len 2")
        return ret
Exemplo n.º 35
0
 def test_groupby_level_index_names(self):
     # GH4014 this used to raise ValueError since 'exp'>1 (in py2)
     df = DataFrame({'exp': ['A'] * 3 + ['B'] * 3,
                     'var1': lrange(6), }).set_index('exp')
     df.groupby(level='exp')
     pytest.raises(ValueError, df.groupby, level='foo')
Exemplo n.º 36
0
 def tolist(self):
     return lrange(self._start, self._stop, self._step)
Exemplo n.º 37
0
 def test_fillna_col_reordering(self):
     cols = ["COL." + str(i) for i in range(5, 0, -1)]
     data = np.random.rand(20, 5)
     df = DataFrame(index=lrange(20), columns=cols, data=data)
     filled = df.fillna(method='ffill')
     self.assertEqual(df.columns.tolist(), filled.columns.tolist())
Exemplo n.º 38
0
 def test_int_name_format(self):
     index = Index(['a', 'b', 'c'], name=0)
     s = Series(lrange(3), index)
     df = DataFrame(lrange(3), index=index)
     repr(s)
     repr(df)
Exemplo n.º 39
0
def makeIntIndex(k=10):
    return Index(lrange(k))
Exemplo n.º 40
0
    def test_series_from_json_to_json(self):
        def _check_orient(series,
                          orient,
                          dtype=None,
                          numpy=False,
                          check_index_type=True):
            series = series.sort_index()
            unser = read_json(series.to_json(orient=orient),
                              typ='series',
                              orient=orient,
                              numpy=numpy,
                              dtype=dtype)
            unser = unser.sort_index()
            if orient == "records" or orient == "values":
                assert_almost_equal(series.values, unser.values)
            else:
                if orient == "split":
                    assert_series_equal(series,
                                        unser,
                                        check_index_type=check_index_type)
                else:
                    assert_series_equal(series,
                                        unser,
                                        check_names=False,
                                        check_index_type=check_index_type)

        def _check_all_orients(series, dtype=None, check_index_type=True):
            _check_orient(series,
                          "columns",
                          dtype=dtype,
                          check_index_type=check_index_type)
            _check_orient(series,
                          "records",
                          dtype=dtype,
                          check_index_type=check_index_type)
            _check_orient(series,
                          "split",
                          dtype=dtype,
                          check_index_type=check_index_type)
            _check_orient(series,
                          "index",
                          dtype=dtype,
                          check_index_type=check_index_type)
            _check_orient(series, "values", dtype=dtype)

            _check_orient(series,
                          "columns",
                          dtype=dtype,
                          numpy=True,
                          check_index_type=check_index_type)
            _check_orient(series,
                          "records",
                          dtype=dtype,
                          numpy=True,
                          check_index_type=check_index_type)
            _check_orient(series,
                          "split",
                          dtype=dtype,
                          numpy=True,
                          check_index_type=check_index_type)
            _check_orient(series,
                          "index",
                          dtype=dtype,
                          numpy=True,
                          check_index_type=check_index_type)
            _check_orient(series,
                          "values",
                          dtype=dtype,
                          numpy=True,
                          check_index_type=check_index_type)

        # basic
        _check_all_orients(self.series)
        self.assertEqual(self.series.to_json(),
                         self.series.to_json(orient="index"))

        objSeries = Series([str(d) for d in self.objSeries],
                           index=self.objSeries.index,
                           name=self.objSeries.name)
        _check_all_orients(objSeries, dtype=False)

        # empty_series has empty index with object dtype
        # which cannot be revert
        self.assertEqual(self.empty_series.index.dtype, np.object_)
        _check_all_orients(self.empty_series, check_index_type=False)

        _check_all_orients(self.ts)

        # dtype
        s = Series(lrange(6), index=['a', 'b', 'c', 'd', 'e', 'f'])
        _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64)
        _check_all_orients(Series(s, dtype=np.int), dtype=np.int)
Exemplo n.º 41
0
 def test_bytestring_with_unicode(self):
     idx = Index(lrange(1000))
     if PY3:
         bytes(idx)
     else:
         str(idx)
Exemplo n.º 42
0
def test_to_html_multiindex_odd_even_truncate(max_rows, expected, datapath):
    # GH 14882 - Issue on truncation with odd length DataFrame
    index = MultiIndex.from_product([[100, 200, 300],
                                     [10, 20, 30],
                                     [1, 2, 3, 4, 5, 6, 7]],
                                    names=['a', 'b', 'c'])
    df = DataFrame({'n': range(len(index))}, index=index)
    result = df.to_html(max_rows=max_rows)
    expected = expected_html(datapath, expected)
    assert result == expected


@pytest.mark.parametrize('df,formatters,expected', [
    (DataFrame(
        [[0, 1], [2, 3], [4, 5], [6, 7]],
        columns=['foo', None], index=lrange(4)),
     {'__index__': lambda x: 'abcd' [x]},
     'index_formatter'),

    (DataFrame(
        {'months': [datetime(2016, 1, 1), datetime(2016, 2, 2)]}),
     {'months': lambda x: x.strftime('%Y-%m')},
     'datetime64_monthformatter'),

    (DataFrame({'hod': pd.to_datetime(['10:10:10.100', '12:12:12.120'],
                                      format='%H:%M:%S.%f')}),
     {'hod': lambda x: x.strftime('%H:%M')},
     'datetime64_hourformatter')
])
def test_to_html_formatters(df, formatters, expected, datapath):
    expected = expected_html(datapath, expected)
Exemplo n.º 43
0
    def test_constructor_nan(self, input_arg):
        empty = Series(dtype='float64', index=lrange(10))
        empty2 = Series(input_arg, index=lrange(10))

        assert_series_equal(empty, empty2, check_index_type=False)
Exemplo n.º 44
0
    def test_constructor_dtype_datetime64(self):

        s = Series(iNaT, dtype='M8[ns]', index=lrange(5))
        assert isna(s).all()

        # in theory this should be all nulls, but since
        # we are not specifying a dtype is ambiguous
        s = Series(iNaT, index=lrange(5))
        assert not isna(s).all()

        s = Series(nan, dtype='M8[ns]', index=lrange(5))
        assert isna(s).all()

        s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]')
        assert isna(s[1])
        assert s.dtype == 'M8[ns]'

        s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]')
        assert isna(s[1])
        assert s.dtype == 'M8[ns]'

        # GH3416
        dates = [
            np.datetime64(datetime(2013, 1, 1)),
            np.datetime64(datetime(2013, 1, 2)),
            np.datetime64(datetime(2013, 1, 3)),
        ]

        s = Series(dates)
        assert s.dtype == 'M8[ns]'

        s.iloc[0] = np.nan
        assert s.dtype == 'M8[ns]'

        # invalid astypes
        for t in ['s', 'D', 'us', 'ms']:
            pytest.raises(TypeError, s.astype, 'M8[%s]' % t)

        # GH3414 related
        pytest.raises(TypeError, lambda x: Series(
            Series(dates).astype('int') / 1000000, dtype='M8[ms]'))
        pytest.raises(TypeError,
                      lambda x: Series(dates, dtype='datetime64'))

        # invalid dates can be help as object
        result = Series([datetime(2, 1, 1)])
        assert result[0] == datetime(2, 1, 1, 0, 0)

        result = Series([datetime(3000, 1, 1)])
        assert result[0] == datetime(3000, 1, 1, 0, 0)

        # don't mix types
        result = Series([Timestamp('20130101'), 1], index=['a', 'b'])
        assert result['a'] == Timestamp('20130101')
        assert result['b'] == 1

        # GH6529
        # coerce datetime64 non-ns properly
        dates = date_range('01-Jan-2015', '01-Dec-2015', freq='M')
        values2 = dates.view(np.ndarray).astype('datetime64[ns]')
        expected = Series(values2, index=dates)

        for dtype in ['s', 'D', 'ms', 'us', 'ns']:
            values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype))
            result = Series(values1, dates)
            assert_series_equal(result, expected)

        # GH 13876
        # coerce to non-ns to object properly
        expected = Series(values2, index=dates, dtype=object)
        for dtype in ['s', 'D', 'ms', 'us', 'ns']:
            values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype))
            result = Series(values1, index=dates, dtype=object)
            assert_series_equal(result, expected)

        # leave datetime.date alone
        dates2 = np.array([d.date() for d in dates.to_pydatetime()],
                          dtype=object)
        series1 = Series(dates2, dates)
        tm.assert_numpy_array_equal(series1.values, dates2)
        assert series1.dtype == object

        # these will correctly infer a datetime
        s = Series([None, pd.NaT, '2013-08-05 15:30:00.000001'])
        assert s.dtype == 'datetime64[ns]'
        s = Series([np.nan, pd.NaT, '2013-08-05 15:30:00.000001'])
        assert s.dtype == 'datetime64[ns]'
        s = Series([pd.NaT, None, '2013-08-05 15:30:00.000001'])
        assert s.dtype == 'datetime64[ns]'
        s = Series([pd.NaT, np.nan, '2013-08-05 15:30:00.000001'])
        assert s.dtype == 'datetime64[ns]'

        # tz-aware (UTC and other tz's)
        # GH 8411
        dr = date_range('20130101', periods=3)
        assert Series(dr).iloc[0].tz is None
        dr = date_range('20130101', periods=3, tz='UTC')
        assert str(Series(dr).iloc[0].tz) == 'UTC'
        dr = date_range('20130101', periods=3, tz='US/Eastern')
        assert str(Series(dr).iloc[0].tz) == 'US/Eastern'

        # non-convertible
        s = Series([1479596223000, -1479590, pd.NaT])
        assert s.dtype == 'object'
        assert s[2] is pd.NaT
        assert 'NaT' in str(s)

        # if we passed a NaT it remains
        s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), pd.NaT])
        assert s.dtype == 'object'
        assert s[2] is pd.NaT
        assert 'NaT' in str(s)

        # if we passed a nan it remains
        s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan])
        assert s.dtype == 'object'
        assert s[2] is np.nan
        assert 'NaN' in str(s)
Exemplo n.º 45
0
    def test_typ(self):

        s = Series(lrange(6), index=['a','b','c','d','e','f'], dtype='int64')
        result = read_json(s.to_json(),typ=None)
        assert_series_equal(result,s)
Exemplo n.º 46
0
def randu(n):
    choices = u("").join(map(unichr, lrange(1488, 1488 + 26)))
    choices += string.digits
    return ''.join([random.choice(choices) for _ in range(n)])
Exemplo n.º 47
0
    def test_multi_assign(self):

        # GH 3626, an assignment of a sub-df to a df
        df = DataFrame({
            'FC': ['a', 'b', 'a', 'b', 'a', 'b'],
            'PF': [0, 0, 0, 0, 1, 1],
            'col1': lrange(6),
            'col2': lrange(6, 12)
        })
        df.iloc[1, 0] = np.nan
        df2 = df.copy()

        mask = ~df2.FC.isna()
        cols = ['col1', 'col2']

        dft = df2 * 2
        dft.iloc[3, 3] = np.nan

        expected = DataFrame({
            'FC': ['a', np.nan, 'a', 'b', 'a', 'b'],
            'PF': [0, 0, 0, 0, 1, 1],
            'col1': Series([0, 1, 4, 6, 8, 10]),
            'col2': [12, 7, 16, np.nan, 20, 22]
        })

        # frame on rhs
        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        df2.loc[mask, cols] = dft.loc[mask, cols]
        tm.assert_frame_equal(df2, expected)

        # with an ndarray on rhs
        # coerces to float64 because values has float64 dtype
        # GH 14001
        expected = DataFrame({
            'FC': ['a', np.nan, 'a', 'b', 'a', 'b'],
            'PF': [0, 0, 0, 0, 1, 1],
            'col1': [0., 1., 4., 6., 8., 10.],
            'col2': [12, 7, 16, np.nan, 20, 22]
        })
        df2 = df.copy()
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)
        df2.loc[mask, cols] = dft.loc[mask, cols].values
        tm.assert_frame_equal(df2, expected)

        # broadcasting on the rhs is required
        df = DataFrame(
            dict(A=[1, 2, 0, 0, 0],
                 B=[0, 0, 0, 10, 11],
                 C=[0, 0, 0, 10, 11],
                 D=[3, 4, 5, 6, 7]))

        expected = df.copy()
        mask = expected['A'] == 0
        for col in ['A', 'B']:
            expected.loc[mask, col] = df['D']

        df.loc[df['A'] == 0, ['A', 'B']] = df['D']
        tm.assert_frame_equal(df, expected)
Exemplo n.º 48
0
    def test_reindex_fill_value(self):
        df = DataFrame(np.random.randn(10, 4))

        # axis=0
        result = df.reindex(lrange(15))
        assert np.isnan(result.values[-5:]).all()

        result = df.reindex(lrange(15), fill_value=0)
        expected = df.reindex(lrange(15)).fillna(0)
        assert_frame_equal(result, expected)

        # axis=1
        result = df.reindex(columns=lrange(5), fill_value=0.)
        expected = df.copy()
        expected[4] = 0.
        assert_frame_equal(result, expected)

        result = df.reindex(columns=lrange(5), fill_value=0)
        expected = df.copy()
        expected[4] = 0
        assert_frame_equal(result, expected)

        result = df.reindex(columns=lrange(5), fill_value='foo')
        expected = df.copy()
        expected[4] = 'foo'
        assert_frame_equal(result, expected)

        # reindex_axis
        with tm.assert_produces_warning(FutureWarning):
            result = df.reindex_axis(lrange(15), fill_value=0., axis=0)
        expected = df.reindex(lrange(15)).fillna(0)
        assert_frame_equal(result, expected)

        with tm.assert_produces_warning(FutureWarning):
            result = df.reindex_axis(lrange(5), fill_value=0., axis=1)
        expected = df.reindex(columns=lrange(5)).fillna(0)
        assert_frame_equal(result, expected)

        # other dtypes
        df['foo'] = 'foo'
        result = df.reindex(lrange(15), fill_value=0)
        expected = df.reindex(lrange(15)).fillna(0)
        assert_frame_equal(result, expected)
Exemplo n.º 49
0
    def test_to_html_multiindex(self):
        columns = MultiIndex.from_tuples(list(
            zip(np.arange(2).repeat(2), np.mod(lrange(4), 2))),
                                         names=['CL0', 'CL1'])
        df = DataFrame([list('abcd'), list('efgh')], columns=columns)
        result = df.to_html(justify='left')
        expected = ('<table border="1" class="dataframe">\n'
                    '  <thead>\n'
                    '    <tr>\n'
                    '      <th>CL0</th>\n'
                    '      <th colspan="2" halign="left">0</th>\n'
                    '      <th colspan="2" halign="left">1</th>\n'
                    '    </tr>\n'
                    '    <tr>\n'
                    '      <th>CL1</th>\n'
                    '      <th>0</th>\n'
                    '      <th>1</th>\n'
                    '      <th>0</th>\n'
                    '      <th>1</th>\n'
                    '    </tr>\n'
                    '  </thead>\n'
                    '  <tbody>\n'
                    '    <tr>\n'
                    '      <th>0</th>\n'
                    '      <td>a</td>\n'
                    '      <td>b</td>\n'
                    '      <td>c</td>\n'
                    '      <td>d</td>\n'
                    '    </tr>\n'
                    '    <tr>\n'
                    '      <th>1</th>\n'
                    '      <td>e</td>\n'
                    '      <td>f</td>\n'
                    '      <td>g</td>\n'
                    '      <td>h</td>\n'
                    '    </tr>\n'
                    '  </tbody>\n'
                    '</table>')

        assert result == expected

        columns = MultiIndex.from_tuples(
            list(zip(range(4), np.mod(lrange(4), 2))))
        df = DataFrame([list('abcd'), list('efgh')], columns=columns)

        result = df.to_html(justify='right')
        expected = ('<table border="1" class="dataframe">\n'
                    '  <thead>\n'
                    '    <tr>\n'
                    '      <th></th>\n'
                    '      <th>0</th>\n'
                    '      <th>1</th>\n'
                    '      <th>2</th>\n'
                    '      <th>3</th>\n'
                    '    </tr>\n'
                    '    <tr>\n'
                    '      <th></th>\n'
                    '      <th>0</th>\n'
                    '      <th>1</th>\n'
                    '      <th>0</th>\n'
                    '      <th>1</th>\n'
                    '    </tr>\n'
                    '  </thead>\n'
                    '  <tbody>\n'
                    '    <tr>\n'
                    '      <th>0</th>\n'
                    '      <td>a</td>\n'
                    '      <td>b</td>\n'
                    '      <td>c</td>\n'
                    '      <td>d</td>\n'
                    '    </tr>\n'
                    '    <tr>\n'
                    '      <th>1</th>\n'
                    '      <td>e</td>\n'
                    '      <td>f</td>\n'
                    '      <td>g</td>\n'
                    '      <td>h</td>\n'
                    '    </tr>\n'
                    '  </tbody>\n'
                    '</table>')

        assert result == expected
Exemplo n.º 50
0
    def test_reindex_multi(self):
        df = DataFrame(np.random.randn(3, 3))

        result = df.reindex(index=lrange(4), columns=lrange(4))
        expected = df.reindex(lrange(4)).reindex(columns=lrange(4))

        assert_frame_equal(result, expected)

        df = DataFrame(np.random.randint(0, 10, (3, 3)))

        result = df.reindex(index=lrange(4), columns=lrange(4))
        expected = df.reindex(lrange(4)).reindex(columns=lrange(4))

        assert_frame_equal(result, expected)

        df = DataFrame(np.random.randint(0, 10, (3, 3)))

        result = df.reindex(index=lrange(2), columns=lrange(2))
        expected = df.reindex(lrange(2)).reindex(columns=lrange(2))

        assert_frame_equal(result, expected)

        df = DataFrame(np.random.randn(5, 3) + 1j, columns=['a', 'b', 'c'])

        result = df.reindex(index=[0, 1], columns=['a', 'b'])
        expected = df.reindex([0, 1]).reindex(columns=['a', 'b'])

        assert_frame_equal(result, expected)
Exemplo n.º 51
0
def _generate_marginal_results(table,
                               data,
                               values,
                               rows,
                               cols,
                               aggfunc,
                               observed,
                               grand_margin,
                               margins_name='All'):
    if len(cols) > 0:
        # need to "interleave" the margins
        table_pieces = []
        margin_keys = []

        def _all_key(key):
            return (key, margins_name) + ('', ) * (len(cols) - 1)

        if len(rows) > 0:
            margin = data[rows + values].groupby(
                rows, observed=observed).agg(aggfunc)
            cat_axis = 1

            for key, piece in table.groupby(level=0,
                                            axis=cat_axis,
                                            observed=observed):
                all_key = _all_key(key)

                # we are going to mutate this, so need to copy!
                piece = piece.copy()
                try:
                    piece[all_key] = margin[key]
                except TypeError:

                    # we cannot reshape, so coerce the axis
                    piece.set_axis(
                        piece._get_axis(cat_axis)._to_safe_for_reshape(),
                        axis=cat_axis,
                        inplace=True)
                    piece[all_key] = margin[key]

                table_pieces.append(piece)
                margin_keys.append(all_key)
        else:
            margin = grand_margin
            cat_axis = 0
            for key, piece in table.groupby(level=0,
                                            axis=cat_axis,
                                            observed=observed):
                all_key = _all_key(key)
                table_pieces.append(piece)
                table_pieces.append(Series(margin[key], index=[all_key]))
                margin_keys.append(all_key)

        result = concat(table_pieces, axis=cat_axis)

        if len(rows) == 0:
            return result
    else:
        result = table
        margin_keys = table.columns

    if len(cols) > 0:
        row_margin = data[cols + values].groupby(
            cols, observed=observed).agg(aggfunc)
        row_margin = row_margin.stack()

        # slight hack
        new_order = [len(cols)] + lrange(len(cols))
        row_margin.index = row_margin.index.reorder_levels(new_order)
    else:
        row_margin = Series(np.nan, index=result.columns)

    return result, margin_keys, row_margin
Exemplo n.º 52
0
 def test_repr_big(self):
     # big one
     biggie = DataFrame(np.zeros((200, 4)),
                        columns=lrange(4),
                        index=lrange(200))
     repr(biggie)
Exemplo n.º 53
0
    def test_append_dtypes(self):

        # GH 5754
        # row appends of different dtypes (so need to do by-item)
        # can sometimes infer the correct type

        df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(5))
        df2 = DataFrame()
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)

        df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1))
        df2 = DataFrame({'bar': 'foo'}, index=lrange(1, 2))
        result = df1.append(df2)
        expected = DataFrame({'bar': [Timestamp('20130101'), 'foo']})
        assert_frame_equal(result, expected)

        df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1))
        df2 = DataFrame({'bar': np.nan}, index=lrange(1, 2))
        result = df1.append(df2)
        expected = DataFrame(
            {'bar': Series([Timestamp('20130101'), np.nan], dtype='M8[ns]')})
        assert_frame_equal(result, expected)

        df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1))
        df2 = DataFrame({'bar': np.nan}, index=lrange(1, 2), dtype=object)
        result = df1.append(df2)
        expected = DataFrame(
            {'bar': Series([Timestamp('20130101'), np.nan], dtype='M8[ns]')})
        assert_frame_equal(result, expected)

        df1 = DataFrame({'bar': np.nan}, index=lrange(1))
        df2 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1, 2))
        result = df1.append(df2)
        expected = DataFrame(
            {'bar': Series([np.nan, Timestamp('20130101')], dtype='M8[ns]')})
        assert_frame_equal(result, expected)

        df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1))
        df2 = DataFrame({'bar': 1}, index=lrange(1, 2), dtype=object)
        result = df1.append(df2)
        expected = DataFrame({'bar': Series([Timestamp('20130101'), 1])})
        assert_frame_equal(result, expected)
Exemplo n.º 54
0
 def test_values(self):
     self.assertRaises(Exception, Panel, np.random.randn(5, 5, 5),
                       lrange(5), lrange(5), lrange(4))
Exemplo n.º 55
0
def pivot_annual(series, freq=None):
    """
    Deprecated. Use ``pivot_table`` instead.

    Group a series by years, taking leap years into account.

    The output has as many rows as distinct years in the original series,
    and as many columns as the length of a leap year in the units corresponding
    to the original frequency (366 for daily frequency, 366*24 for hourly...).
    The first column of the output corresponds to Jan. 1st, 00:00:00,
    while the last column corresponds to Dec, 31st, 23:59:59.
    Entries corresponding to Feb. 29th are masked for non-leap years.

    For example, if the initial series has a daily frequency, the 59th column
    of the output always corresponds to Feb. 28th, the 61st column to Mar. 1st,
    and the 60th column is masked for non-leap years.
    With a hourly initial frequency, the (59*24)th column of the output always
    correspond to Feb. 28th 23:00, the (61*24)th column to Mar. 1st, 00:00, and
    the 24 columns between (59*24) and (61*24) are masked.

    If the original frequency is less than daily, the output is equivalent to
    ``series.convert('A', func=None)``.

    Parameters
    ----------
    series : Series
    freq : string or None, default None

    Returns
    -------
    annual : DataFrame
    """

    msg = "pivot_annual is deprecated. Use pivot_table instead"
    warnings.warn(msg, FutureWarning)

    index = series.index
    year = index.year
    years = algorithms.unique1d(year)

    if freq is not None:
        freq = freq.upper()
    else:
        freq = series.index.freq

    if freq == 'D':
        width = 366
        offset = np.asarray(index.dayofyear) - 1

        # adjust for leap year
        offset[(~isleapyear(year)) & (offset >= 59)] += 1

        columns = lrange(1, 367)
        # todo: strings like 1/1, 1/25, etc.?
    elif freq in ('M', 'BM'):
        width = 12
        offset = np.asarray(index.month) - 1
        columns = lrange(1, 13)
    elif freq == 'H':
        width = 8784
        grouped = series.groupby(series.index.year)
        defaulted = grouped.apply(lambda x: x.reset_index(drop=True))
        defaulted.index = defaulted.index.droplevel(0)
        offset = np.asarray(defaulted.index)
        offset[~isleapyear(year) & (offset >= 1416)] += 24
        columns = lrange(1, 8785)
    else:
        raise NotImplementedError(freq)

    flat_index = (year - years.min()) * width + offset
    flat_index = _ensure_platform_int(flat_index)

    values = np.empty((len(years), width))
    values.fill(np.nan)
    values.put(flat_index, series.values)

    return DataFrame(values, index=years, columns=columns)
Exemplo n.º 56
0
    def test_dti_take_dont_lose_meta(self, tzstr):
        rng = date_range('1/1/2000', periods=20, tz=tzstr)

        result = rng.take(lrange(5))
        assert result.tz == rng.tz
        assert result.freq == rng.freq
Exemplo n.º 57
0
 def test_constructor_convert_index_once(self):
     arr = np.array([1.5, 2.5, 3.5])
     sdf = SparseDataFrame(columns=lrange(4), index=arr)
     assert sdf[0].index is sdf[1].index
def raw_frame():
    index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                               ['one', 'two', 'three']],
                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                       names=['first', 'second'])
    raw_frame = DataFrame(np.random.randn(10, 3),
                          index=index,
                          columns=Index(['A', 'B', 'C'], name='exp'))
    raw_frame.iloc[1, [1, 2]] = np.nan
    raw_frame.iloc[7, [0, 1]] = np.nan
    return raw_frame


@pytest.mark.parametrize("op, level, axis, skipna, sort",
                         product(AGG_FUNCTIONS, lrange(2), lrange(2),
                                 [True, False], [True, False]))
def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna,
                                      sort):
    # GH6944
    # GH 17537
    # explicitly test the whitelist methods

    if axis == 0:
        frame = raw_frame
    else:
        frame = raw_frame.T

    if op in AGG_FUNCTIONS_WITH_SKIPNA:
        grouped = frame.groupby(level=level, axis=axis, sort=sort)
        result = getattr(grouped, op)(skipna=skipna)
 def test_getitem_list_periods(self):
     # GH 7710
     rng = period_range(start='2012-01-01', periods=10, freq='D')
     ts = Series(lrange(len(rng)), index=rng)
     exp = ts.iloc[[1]]
     tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp)
    def test_set_index_makes_timeseries(self):
        idx = tm.makeDateIndex(10)

        s = Series(lrange(10))
        s.index = idx
        assert s.index.is_all_dates