def test_constructor_empty(self, input_class): empty = Series() empty2 = Series(input_class()) # these are Index() and RangeIndex() which don't compare type equal # but are just .equals assert_series_equal(empty, empty2, check_index_type=False) # With explicit dtype: empty = Series(dtype='float64') empty2 = Series(input_class(), dtype='float64') assert_series_equal(empty, empty2, check_index_type=False) # GH 18515 : with dtype=category: empty = Series(dtype='category') empty2 = Series(input_class(), dtype='category') assert_series_equal(empty, empty2, check_index_type=False) if input_class is not list: # With index: empty = Series(index=lrange(10)) empty2 = Series(input_class(), index=lrange(10)) assert_series_equal(empty, empty2) # With index and dtype float64: empty = Series(np.nan, index=lrange(10)) empty2 = Series(input_class(), index=lrange(10), dtype='float64') assert_series_equal(empty, empty2) # GH 19853 : with empty string, index and dtype str empty = Series('', dtype=str, index=range(3)) empty2 = Series('', index=range(3)) assert_series_equal(empty, empty2)
def test_setitem_ndarray_1d(self): # GH5508 # len of indexer vs length of the 1d ndarray df = DataFrame(index=Index(lrange(1, 11))) df['foo'] = np.zeros(10, dtype=np.float64) df['bar'] = np.zeros(10, dtype=np.complex) # invalid def f(): df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) pytest.raises(ValueError, f) # valid df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) result = df.loc[df.index[2:6], 'bar'] expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], name='bar') tm.assert_series_equal(result, expected) # dtype getting changed? df = DataFrame(index=Index(lrange(1, 11))) df['foo'] = np.zeros(10, dtype=np.float64) df['bar'] = np.zeros(10, dtype=np.complex) def f(): df[2:5] = np.arange(1, 4) * 1j pytest.raises(ValueError, f)
def test_loc_getitem_setitem_integer_slice_keyerrors(): s = Series(np.random.randn(10), index=lrange(0, 20, 2)) # this is OK cp = s.copy() cp.iloc[4:10] = 0 assert (cp.iloc[4:10] == 0).all() # so is this cp = s.copy() cp.iloc[3:11] = 0 assert (cp.iloc[3:11] == 0).values.all() result = s.iloc[2:6] result2 = s.loc[3:11] expected = s.reindex([4, 6, 8, 10]) assert_series_equal(result, expected) assert_series_equal(result2, expected) # non-monotonic, raise KeyError s2 = s.iloc[lrange(5) + lrange(5, 10)[::-1]] with pytest.raises(KeyError, match=r"^3L?$"): s2.loc[3:11] with pytest.raises(KeyError, match=r"^3L?$"): s2.loc[3:11] = 0
def test_parse_dates_column_list(self): from pandas.core.datetools import to_datetime data = '''date;destination;ventilationcode;unitcode;units;aux_date 01/01/2010;P;P;50;1;12/1/2011 01/01/2010;P;R;50;1;13/1/2011 15/01/2010;P;P;50;1;14/1/2011 01/05/2010;P;P;50;1;15/1/2011''' expected = self.read_csv(StringIO(data), sep=";", index_col=lrange(4)) lev = expected.index.levels[0] levels = list(expected.index.levels) levels[0] = lev.to_datetime(dayfirst=True) # hack to get this to work - remove for final test levels[0].name = lev.name expected.index.set_levels(levels, inplace=True) expected['aux_date'] = to_datetime(expected['aux_date'], dayfirst=True) expected['aux_date'] = lmap(Timestamp, expected['aux_date']) tm.assertIsInstance(expected['aux_date'][0], datetime) df = self.read_csv(StringIO(data), sep=";", index_col=lrange(4), parse_dates=[0, 5], dayfirst=True) tm.assert_frame_equal(df, expected) df = self.read_csv(StringIO(data), sep=";", index_col=lrange(4), parse_dates=['date', 'aux_date'], dayfirst=True) tm.assert_frame_equal(df, expected)
def test_parse_ragged_csv(c_parser_only): parser = c_parser_only data = """1,2,3 1,2,3,4 1,2,3,4,5 1,2 1,2,3,4""" nice_data = """1,2,3,, 1,2,3,4, 1,2,3,4,5 1,2,,, 1,2,3,4,""" result = parser.read_csv(StringIO(data), header=None, names=["a", "b", "c", "d", "e"]) expected = parser.read_csv(StringIO(nice_data), header=None, names=["a", "b", "c", "d", "e"]) tm.assert_frame_equal(result, expected) # too many columns, cause segfault if not careful data = "1,2\n3,4,5" result = parser.read_csv(StringIO(data), header=None, names=lrange(50)) expected = parser.read_csv(StringIO(data), header=None, names=lrange(3)).reindex(columns=lrange(50)) tm.assert_frame_equal(result, expected)
def _get_skiprows_iter(skiprows): """Get an iterator given an integer, slice or container. Parameters ---------- skiprows : int, slice, container The iterator to use to skip rows; can also be a slice. Raises ------ TypeError * If `skiprows` is not a slice, integer, or Container Raises ------ TypeError * If `skiprows` is not a slice, integer, or Container Returns ------- it : iterable A proper iterator to use to skip rows of a DataFrame. """ if isinstance(skiprows, slice): return lrange(skiprows.start or 0, skiprows.stop, skiprows.step or 1) elif isinstance(skiprows, numbers.Integral): return lrange(skiprows) elif isinstance(skiprows, collections.Container): return skiprows else: raise TypeError('{0} is not a valid type for skipping' ' rows'.format(type(skiprows)))
def test_parse_ragged_csv(self): data = """1,2,3 1,2,3,4 1,2,3,4,5 1,2 1,2,3,4""" nice_data = """1,2,3,, 1,2,3,4, 1,2,3,4,5 1,2,,, 1,2,3,4,""" result = self.read_csv(StringIO(data), header=None, names=['a', 'b', 'c', 'd', 'e']) expected = self.read_csv(StringIO(nice_data), header=None, names=['a', 'b', 'c', 'd', 'e']) tm.assert_frame_equal(result, expected) # too many columns, cause segfault if not careful data = "1,2\n3,4,5" result = self.read_csv(StringIO(data), header=None, names=lrange(50)) expected = self.read_csv(StringIO(data), header=None, names=lrange(3)).reindex(columns=lrange(50)) tm.assert_frame_equal(result, expected)
def test_is_(): mi = MultiIndex.from_tuples(lzip(range(10), range(10))) assert mi.is_(mi) assert mi.is_(mi.view()) assert mi.is_(mi.view().view().view().view()) mi2 = mi.view() # names are metadata, they don't change id mi2.names = ["A", "B"] assert mi2.is_(mi) assert mi.is_(mi2) assert mi.is_(mi.set_names(["C", "D"])) mi2 = mi.view() mi2.set_names(["E", "F"], inplace=True) assert mi.is_(mi2) # levels are inherent properties, they change identity mi3 = mi2.set_levels([lrange(10), lrange(10)]) assert not mi3.is_(mi2) # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() # GH 17464 - Remove duplicate MultiIndex levels mi4.set_levels([lrange(10), lrange(10)], inplace=True) assert not mi4.is_(mi3) mi5 = mi.view() mi5.set_levels(mi5.levels, inplace=True) assert not mi5.is_(mi)
def test_to_csv_from_csv1(self): with ensure_clean('__tmp_to_csv_from_csv1__') as path: self.frame['A'][:5] = nan self.frame.to_csv(path) self.frame.to_csv(path, columns=['A', 'B']) self.frame.to_csv(path, header=False) self.frame.to_csv(path, index=False) # test roundtrip self.tsframe.to_csv(path) recons = DataFrame.from_csv(path) assert_frame_equal(self.tsframe, recons) self.tsframe.to_csv(path, index_label='index') recons = DataFrame.from_csv(path, index_col=None) assert(len(recons.columns) == len(self.tsframe.columns) + 1) # no index self.tsframe.to_csv(path, index=False) recons = DataFrame.from_csv(path, index_col=None) assert_almost_equal(self.tsframe.values, recons.values) # corner case dm = DataFrame({'s1': Series(lrange(3), lrange(3)), 's2': Series(lrange(2), lrange(2))}) dm.to_csv(path) recons = DataFrame.from_csv(path) assert_frame_equal(dm, recons)
def test_multi_assign(self): # GH 3626, an assignement of a sub-df to a df df = DataFrame({'FC':['a','b','a','b','a','b'], 'PF':[0,0,0,0,1,1], 'col1':lrange(6), 'col2':lrange(6,12)}) df.ix[1,0]=np.nan df2 = df.copy() mask=~df2.FC.isnull() cols=['col1', 'col2'] dft = df2 * 2 dft.ix[3,3] = np.nan expected = DataFrame({'FC':['a',np.nan,'a','b','a','b'], 'PF':[0,0,0,0,1,1], 'col1':Series([0,1,4,6,8,10]), 'col2':[12,7,16,np.nan,20,22]}) # frame on rhs df2.ix[mask, cols]= dft.ix[mask, cols] assert_frame_equal(df2,expected) df2.ix[mask, cols]= dft.ix[mask, cols] assert_frame_equal(df2,expected) # with an ndarray on rhs df2 = df.copy() df2.ix[mask, cols]= dft.ix[mask, cols].values assert_frame_equal(df2,expected) df2.ix[mask, cols]= dft.ix[mask, cols].values assert_frame_equal(df2,expected)
def _reindex_multi(self, axes, copy, fill_value): """ we are guaranteed non-Nones in the axes! """ items = axes['items'] major = axes['major_axis'] minor = axes['minor_axis'] a0, a1, a2 = len(items), len(major), len(minor) values = self.values new_values = np.empty((a0, a1, a2), dtype=values.dtype) new_items, indexer0 = self.items.reindex(items) new_major, indexer1 = self.major_axis.reindex(major) new_minor, indexer2 = self.minor_axis.reindex(minor) if indexer0 is None: indexer0 = lrange(len(new_items)) if indexer1 is None: indexer1 = lrange(len(new_major)) if indexer2 is None: indexer2 = lrange(len(new_minor)) for i, ind in enumerate(indexer0): com.take_2d_multi(values[ind], (indexer1, indexer2), out=new_values[i]) return Panel(new_values, items=new_items, major_axis=new_major, minor_axis=new_minor)
def test_fillna(self): df = self.zframe.reindex(lrange(5)) dense = self.zorig.reindex(lrange(5)) result = df.fillna(0) expected = dense.fillna(0) tm.assert_sp_frame_equal(result, expected.to_sparse(fill_value=0), exact_indices=False) tm.assert_frame_equal(result.to_dense(), expected) result = df.copy() result.fillna(0, inplace=True) expected = dense.fillna(0) tm.assert_sp_frame_equal(result, expected.to_sparse(fill_value=0), exact_indices=False) tm.assert_frame_equal(result.to_dense(), expected) result = df.copy() result = df['A'] result.fillna(0, inplace=True) expected = dense['A'].fillna(0) # this changes internal SparseArray repr # tm.assert_sp_series_equal(result, expected.to_sparse(fill_value=0)) tm.assert_series_equal(result.to_dense(), expected)
def test_get_loc_level(): index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) loc, new_index = index.get_loc_level((0, 1)) expected = slice(1, 2) exp_index = index[expected].droplevel(0).droplevel(0) assert loc == expected assert new_index.equals(exp_index) loc, new_index = index.get_loc_level((0, 1, 0)) expected = 1 assert loc == expected assert new_index is None pytest.raises(KeyError, index.get_loc_level, (2, 2)) # GH 22221: unused label pytest.raises(KeyError, index.drop(2).get_loc_level, 2) # Unused label on unsorted level: pytest.raises(KeyError, index.drop(1, level=2).get_loc_level, 2, 2) index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) result, new_index = index.get_loc_level((2000, slice(None, None))) expected = slice(None, None) assert result == expected assert new_index.equals(index.droplevel(0))
def test_first_last_nth_dtypes(df_mixed_floats): df = df_mixed_floats.copy() df['E'] = True df['F'] = 1 # tests for first / last / nth grouped = df.groupby('A') first = grouped.first() expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']] expected.index = Index(['bar', 'foo'], name='A') expected = expected.sort_index() assert_frame_equal(first, expected) last = grouped.last() expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']] expected.index = Index(['bar', 'foo'], name='A') expected = expected.sort_index() assert_frame_equal(last, expected) nth = grouped.nth(1) expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']] expected.index = Index(['bar', 'foo'], name='A') expected = expected.sort_index() assert_frame_equal(nth, expected) # GH 2763, first/last shifting dtypes idx = lrange(10) idx.append(9) s = Series(data=lrange(11), index=idx, name='IntCol') assert s.dtype == 'int64' f = s.groupby(level=0).first() assert f.dtype == 'int64'
def test_dropna(self): df = DataFrame(np.random.randn(6, 4)) df[2][:2] = np.nan dropped = df.dropna(axis=1) expected = df.loc[:, [0, 1, 3]] inp = df.copy() inp.dropna(axis=1, inplace=True) assert_frame_equal(dropped, expected) assert_frame_equal(inp, expected) dropped = df.dropna(axis=0) expected = df.loc[lrange(2, 6)] inp = df.copy() inp.dropna(axis=0, inplace=True) assert_frame_equal(dropped, expected) assert_frame_equal(inp, expected) # threshold dropped = df.dropna(axis=1, thresh=5) expected = df.loc[:, [0, 1, 3]] inp = df.copy() inp.dropna(axis=1, thresh=5, inplace=True) assert_frame_equal(dropped, expected) assert_frame_equal(inp, expected) dropped = df.dropna(axis=0, thresh=4) expected = df.loc[lrange(2, 6)] inp = df.copy() inp.dropna(axis=0, thresh=4, inplace=True) assert_frame_equal(dropped, expected) assert_frame_equal(inp, expected) dropped = df.dropna(axis=1, thresh=4) assert_frame_equal(dropped, df) dropped = df.dropna(axis=1, thresh=3) assert_frame_equal(dropped, df) # subset dropped = df.dropna(axis=0, subset=[0, 1, 3]) inp = df.copy() inp.dropna(axis=0, subset=[0, 1, 3], inplace=True) assert_frame_equal(dropped, df) assert_frame_equal(inp, df) # all dropped = df.dropna(axis=1, how='all') assert_frame_equal(dropped, df) df[2] = np.nan dropped = df.dropna(axis=1, how='all') expected = df.loc[:, [0, 1, 3]] assert_frame_equal(dropped, expected) # bad input msg = ("No axis named 3 for object type" " <class 'pandas.core.frame.DataFrame'>") with pytest.raises(ValueError, match=msg): df.dropna(axis=3)
def test_fillna_dtype_conversion(self): # make sure that fillna on an empty frame works df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) result = df.get_dtype_counts().sort_values() expected = Series({'object': 5}) assert_series_equal(result, expected) result = df.fillna(1) expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) result = result.get_dtype_counts().sort_values() expected = Series({'int64': 5}) assert_series_equal(result, expected) # empty block df = DataFrame(index=lrange(3), columns=['A', 'B'], dtype='float64') result = df.fillna('nan') expected = DataFrame('nan', index=lrange(3), columns=['A', 'B']) assert_frame_equal(result, expected) # equiv of replace df = DataFrame(dict(A=[1, np.nan], B=[1., 2.])) for v in ['', 1, np.nan, 1.0]: expected = df.replace(np.nan, v) result = df.fillna(v) assert_frame_equal(result, expected)
def test_where_unsafe_float(float_dtype): s = Series(np.arange(10), dtype=float_dtype) mask = s < 5 s[mask] = lrange(2, 7) expected = Series(lrange(2, 7) + lrange(5, 10), dtype=float_dtype) assert_series_equal(s, expected)
def test_dropna(self): df = DataFrame(np.random.randn(6, 4)) df[2][:2] = nan dropped = df.dropna(axis=1) expected = df.loc[:, [0, 1, 3]] inp = df.copy() inp.dropna(axis=1, inplace=True) assert_frame_equal(dropped, expected) assert_frame_equal(inp, expected) dropped = df.dropna(axis=0) expected = df.loc[lrange(2, 6)] inp = df.copy() inp.dropna(axis=0, inplace=True) assert_frame_equal(dropped, expected) assert_frame_equal(inp, expected) # threshold dropped = df.dropna(axis=1, thresh=5) expected = df.loc[:, [0, 1, 3]] inp = df.copy() inp.dropna(axis=1, thresh=5, inplace=True) assert_frame_equal(dropped, expected) assert_frame_equal(inp, expected) dropped = df.dropna(axis=0, thresh=4) expected = df.loc[lrange(2, 6)] inp = df.copy() inp.dropna(axis=0, thresh=4, inplace=True) assert_frame_equal(dropped, expected) assert_frame_equal(inp, expected) dropped = df.dropna(axis=1, thresh=4) assert_frame_equal(dropped, df) dropped = df.dropna(axis=1, thresh=3) assert_frame_equal(dropped, df) # subset dropped = df.dropna(axis=0, subset=[0, 1, 3]) inp = df.copy() inp.dropna(axis=0, subset=[0, 1, 3], inplace=True) assert_frame_equal(dropped, df) assert_frame_equal(inp, df) # all dropped = df.dropna(axis=1, how='all') assert_frame_equal(dropped, df) df[2] = nan dropped = df.dropna(axis=1, how='all') expected = df.loc[:, [0, 1, 3]] assert_frame_equal(dropped, expected) # bad input pytest.raises(ValueError, df.dropna, axis=3)
def test_lrange(self): results = lrange(10), expecteds = list(builtins.range(10)), lengths = 10, results += lrange(1, 10, 2), lengths += 5, expecteds += list(builtins.range(1, 10, 2)), self.check_results(results, expecteds, lengths)
def test_indexing(): idx = date_range("2001-1-1", periods=20, freq='M') ts = Series(np.random.rand(len(idx)), index=idx) # getting # GH 3070, make sure semantics work on Series/Frame expected = ts['2001'] expected.name = 'A' df = DataFrame(dict(A=ts)) result = df['2001']['A'] assert_series_equal(expected, result) # setting ts['2001'] = 1 expected = ts['2001'] expected.name = 'A' df.loc['2001', 'A'] = 1 result = df['2001']['A'] assert_series_equal(expected, result) # GH3546 (not including times on the last day) idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', freq='H') ts = Series(lrange(len(idx)), index=idx) expected = ts['2013-05'] assert_series_equal(expected, ts) idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', freq='S') ts = Series(lrange(len(idx)), index=idx) expected = ts['2013-05'] assert_series_equal(expected, ts) idx = [Timestamp('2013-05-31 00:00'), Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999))] ts = Series(lrange(len(idx)), index=idx) expected = ts['2013'] assert_series_equal(expected, ts) # GH14826, indexing with a seconds resolution string / datetime object df = DataFrame(np.random.rand(5, 5), columns=['open', 'high', 'low', 'close', 'volume'], index=date_range('2012-01-02 18:01:00', periods=5, tz='US/Central', freq='s')) expected = df.loc[[df.index[2]]] # this is a single date, so will raise with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"): df['2012-01-02 18:01:02'] msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)" with pytest.raises(KeyError, match=msg): df[df.index[2]]
def test_where_unsafe(): # see gh-9731 s = Series(np.arange(10), dtype="int64") values = [2.5, 3.5, 4.5, 5.5] mask = s > 5 expected = Series(lrange(6) + values, dtype="float64") s[mask] = values assert_series_equal(s, expected) # see gh-3235 s = Series(np.arange(10), dtype='int64') mask = s < 5 s[mask] = lrange(2, 7) expected = Series(lrange(2, 7) + lrange(5, 10), dtype='int64') assert_series_equal(s, expected) assert s.dtype == expected.dtype s = Series(np.arange(10), dtype='int64') mask = s > 5 s[mask] = [0] * 4 expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype='int64') assert_series_equal(s, expected) s = Series(np.arange(10)) mask = s > 5 def f(): s[mask] = [5, 4, 3, 2, 1] pytest.raises(ValueError, f) def f(): s[mask] = [0] * 5 pytest.raises(ValueError, f) # dtype changes s = Series([1, 2, 3, 4]) result = s.where(s > 2, np.nan) expected = Series([np.nan, np.nan, 3, 4]) assert_series_equal(result, expected) # GH 4667 # setting with None changes dtype s = Series(range(10)).astype(float) s[8] = None result = s[8] assert isna(result) s = Series(range(10)).astype(float) s[s > 8] = None result = s[isna(s)] expected = Series(np.nan, index=[9]) assert_series_equal(result, expected)
def test_constructor_generator(self): gen = (i for i in range(10)) result = Series(gen) exp = Series(lrange(10)) assert_series_equal(result, exp) gen = (i for i in range(10)) result = Series(gen, index=lrange(10, 20)) exp.index = lrange(10, 20) assert_series_equal(result, exp)
def test_iloc_getitem_frame(self): df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2), columns=lrange(0, 8, 2)) result = df.iloc[2] with catch_warnings(record=True): exp = df.ix[4] tm.assert_series_equal(result, exp) result = df.iloc[2, 2] with catch_warnings(record=True): exp = df.ix[4, 4] assert result == exp # slice result = df.iloc[4:8] with catch_warnings(record=True): expected = df.ix[8:14] tm.assert_frame_equal(result, expected) result = df.iloc[:, 2:3] with catch_warnings(record=True): expected = df.ix[:, 4:5] tm.assert_frame_equal(result, expected) # list of integers result = df.iloc[[0, 1, 3]] with catch_warnings(record=True): expected = df.ix[[0, 2, 6]] tm.assert_frame_equal(result, expected) result = df.iloc[[0, 1, 3], [0, 1]] with catch_warnings(record=True): expected = df.ix[[0, 2, 6], [0, 2]] tm.assert_frame_equal(result, expected) # neg indicies result = df.iloc[[-1, 1, 3], [-1, 1]] with catch_warnings(record=True): expected = df.ix[[18, 2, 6], [6, 2]] tm.assert_frame_equal(result, expected) # dups indicies result = df.iloc[[-1, -1, 1, 3], [-1, 1]] with catch_warnings(record=True): expected = df.ix[[18, 18, 2, 6], [6, 2]] tm.assert_frame_equal(result, expected) # with index-like s = Series(index=lrange(1, 5)) result = df.iloc[s.index] with catch_warnings(record=True): expected = df.ix[[2, 4, 6, 8]] tm.assert_frame_equal(result, expected)
def test_insert_benchmark(self): # from the vb_suite/frame_methods/frame_insert_columns N = 10 K = 5 df = DataFrame(index=lrange(N)) new_col = np.random.randn(N) for i in range(K): df[i] = new_col expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=lrange(N)) assert_frame_equal(df, expected)
def test_constructor_empty(self): empty = Series() empty2 = Series([]) # the are Index() and RangeIndex() which don't compare type equal # but are just .equals assert_series_equal(empty, empty2, check_index_type=False) empty = Series(index=lrange(10)) empty2 = Series(np.nan, index=lrange(10)) assert_series_equal(empty, empty2)
def test_iloc_mask(self): # GH 3631, iloc with a mask (of a series) should raise df = DataFrame(lrange(5), list('ABCDE'), columns=['a']) mask = (df.a%2 == 0) self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask])) mask.index = lrange(len(mask)) self.assertRaises(NotImplementedError, df.iloc.__getitem__, tuple([mask])) # ndarray ok result = df.iloc[np.array([True] * len(mask),dtype=bool)] assert_frame_equal(result,df) # the possibilities locs = np.arange(4) nums = 2**locs reps = lmap(bin, nums) df = DataFrame({'locs':locs, 'nums':nums}, reps) expected = { (None,'') : '0b1100', (None,'.loc') : '0b1100', (None,'.iloc') : '0b1100', ('index','') : '0b11', ('index','.loc') : '0b11', ('index','.iloc') : 'iLocation based boolean indexing cannot use an indexable as a mask', ('locs','') : 'Unalignable boolean Series key provided', ('locs','.loc') : 'Unalignable boolean Series key provided', ('locs','.iloc') : 'iLocation based boolean indexing on an integer type is not available', } import warnings warnings.filterwarnings(action='ignore', category=UserWarning) result = dict() for idx in [None, 'index', 'locs']: mask = (df.nums>2).values if idx: mask = Series(mask, list(reversed(getattr(df, idx)))) for method in ['', '.loc', '.iloc']: try: if method: accessor = getattr(df, method[1:]) else: accessor = df ans = str(bin(accessor[mask]['nums'].sum())) except Exception as e: ans = str(e) key = tuple([idx,method]) r = expected.get(key) if r != ans: raise AssertionError("[%s] does not match [%s], received [%s]" % (key,ans,r)) warnings.filterwarnings(action='always', category=UserWarning)
def test_constructor_map(self): # GH8909 m = map(lambda x: x, range(10)) result = Series(m) exp = Series(lrange(10)) assert_series_equal(result, exp) m = map(lambda x: x, range(10)) result = Series(m, index=lrange(10, 20)) exp.index = lrange(10, 20) assert_series_equal(result, exp)
def test_constructor_pass_none(self): s = Series(None, index=lrange(5)) assert s.dtype == np.float64 s = Series(None, index=lrange(5), dtype=object) assert s.dtype == np.object_ # GH 7431 # inference on the index s = Series(index=np.array([None])) expected = Series(index=Index([None])) assert_series_equal(s, expected)
def test_bar_log_no_subplots(self): # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 # regressions in 1.2.1 expected = np.array([1., 10.]) if not self.mpl_le_1_2_1: expected = np.hstack((.1, expected, 100)) # no subplots df = DataFrame({'A': [3] * 5, 'B': lrange(1, 6)}, index=lrange(5)) ax = df.plot(kind='bar', grid=True, log=True) assert_array_equal(ax.yaxis.get_ticklocs(), expected)
def test_sort_index_duplicates(self): # with 9816, these are all translated to .sort_values df = DataFrame([lrange(5, 9), lrange(4)], columns=['a', 'a', 'b', 'b']) with assertRaisesRegexp(ValueError, 'duplicate'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by='a') with assertRaisesRegexp(ValueError, 'duplicate'): df.sort_values(by='a') with assertRaisesRegexp(ValueError, 'duplicate'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by=['a']) with assertRaisesRegexp(ValueError, 'duplicate'): df.sort_values(by=['a']) with assertRaisesRegexp(ValueError, 'duplicate'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): # multi-column 'by' is separate codepath df.sort_index(by=['a', 'b']) with assertRaisesRegexp(ValueError, 'duplicate'): # multi-column 'by' is separate codepath df.sort_values(by=['a', 'b']) # with multi-index # GH4370 df = DataFrame(np.random.randn(4, 2), columns=MultiIndex.from_tuples([('a', 0), ('a', 1)])) with assertRaisesRegexp(ValueError, 'levels'): # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by='a') with assertRaisesRegexp(ValueError, 'levels'): df.sort_values(by='a') # convert tuples to a list of tuples # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by=[('a', 1)]) expected = df.sort_values(by=[('a', 1)]) # use .sort_values #9816 with tm.assert_produces_warning(FutureWarning): df.sort_index(by=('a', 1)) result = df.sort_values(by=('a', 1)) assert_frame_equal(result, expected)
def test_float_none_comparison(self): df = DataFrame(np.random.randn(8, 3), index=lrange(8), columns=['A', 'B', 'C']) pytest.raises(TypeError, df.__eq__, None)
def test_map_type_inference(self): s = Series(lrange(3)) s2 = s.map(lambda x: np.where(x == 0, 0, 1)) self.assertTrue(issubclass(s2.dtype.type, np.integer))
def test_valid_object_plot(self): s = Series(lrange(10), dtype=object) for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue _check_plot_works(s.plot, kind=kind)
def get_forward_data(self, months, call=True, put=False, near=False, above_below=2): """ Gets either call, put, or both data for months starting in the current month and going out in the future a specified amount of time. Parameters ---------- months: number, int How many months to go out in the collection of the data. This is inclusive. call: bool, optional (default=True) Whether or not to collect data for call options put: bool, optional (default=False) Whether or not to collect data for put options. near: bool, optional (default=False) Whether this function should get only the data near the current stock price. Uses Options.get_near_stock_price above_below: number, int, optional (default=2) The number of strike prices above and below the stock price that should be taken if the near option is set to True Returns ------- data : dict of str, DataFrame """ warnings.warn("get_forward_data() is deprecated", FutureWarning) in_months = lrange(CUR_MONTH, CUR_MONTH + months + 1) in_years = [CUR_YEAR] * (months + 1) # Figure out how many items in in_months go past 12 to_change = 0 for i in range(months): if in_months[i] > 12: in_months[i] -= 12 to_change += 1 # Change the corresponding items in the in_years list. for i in range(1, to_change + 1): in_years[-i] += 1 to_ret = Series({'calls': call, 'puts': put}) to_ret = to_ret[to_ret].index data = {} for name in to_ret: all_data = DataFrame() for mon in range(months): m2 = in_months[mon] y2 = in_years[mon] if not near: m1 = _two_char_month(m2) nam = name + str(m1) + str(y2)[2:] try: # Try to access on the instance frame = getattr(self, nam) except AttributeError: meth_name = 'get_{0}_data'.format(name[:-1]) frame = getattr(self, meth_name)(m2, y2) else: frame = self.get_near_stock_price(call=call, put=put, above_below=above_below, month=m2, year=y2) tick = str(frame.Symbol[0]) start = len(self.symbol) year = tick[start:start + 2] month = tick[start + 2:start + 4] day = tick[start + 4:start + 6] expiry = month + '-' + day + '-' + year frame['Expiry'] = expiry if not mon: all_data = all_data.join(frame, how='right') else: all_data = concat([all_data, frame]) data[name] = all_data ret = [data[k] for k in to_ret] if len(ret) == 1: return ret.pop() if len(ret) != 2: raise AssertionError("should be len 2") return ret
def test_groupby_level_index_names(self): # GH4014 this used to raise ValueError since 'exp'>1 (in py2) df = DataFrame({'exp': ['A'] * 3 + ['B'] * 3, 'var1': lrange(6), }).set_index('exp') df.groupby(level='exp') pytest.raises(ValueError, df.groupby, level='foo')
def tolist(self): return lrange(self._start, self._stop, self._step)
def test_fillna_col_reordering(self): cols = ["COL." + str(i) for i in range(5, 0, -1)] data = np.random.rand(20, 5) df = DataFrame(index=lrange(20), columns=cols, data=data) filled = df.fillna(method='ffill') self.assertEqual(df.columns.tolist(), filled.columns.tolist())
def test_int_name_format(self): index = Index(['a', 'b', 'c'], name=0) s = Series(lrange(3), index) df = DataFrame(lrange(3), index=index) repr(s) repr(df)
def makeIntIndex(k=10): return Index(lrange(k))
def test_series_from_json_to_json(self): def _check_orient(series, orient, dtype=None, numpy=False, check_index_type=True): series = series.sort_index() unser = read_json(series.to_json(orient=orient), typ='series', orient=orient, numpy=numpy, dtype=dtype) unser = unser.sort_index() if orient == "records" or orient == "values": assert_almost_equal(series.values, unser.values) else: if orient == "split": assert_series_equal(series, unser, check_index_type=check_index_type) else: assert_series_equal(series, unser, check_names=False, check_index_type=check_index_type) def _check_all_orients(series, dtype=None, check_index_type=True): _check_orient(series, "columns", dtype=dtype, check_index_type=check_index_type) _check_orient(series, "records", dtype=dtype, check_index_type=check_index_type) _check_orient(series, "split", dtype=dtype, check_index_type=check_index_type) _check_orient(series, "index", dtype=dtype, check_index_type=check_index_type) _check_orient(series, "values", dtype=dtype) _check_orient(series, "columns", dtype=dtype, numpy=True, check_index_type=check_index_type) _check_orient(series, "records", dtype=dtype, numpy=True, check_index_type=check_index_type) _check_orient(series, "split", dtype=dtype, numpy=True, check_index_type=check_index_type) _check_orient(series, "index", dtype=dtype, numpy=True, check_index_type=check_index_type) _check_orient(series, "values", dtype=dtype, numpy=True, check_index_type=check_index_type) # basic _check_all_orients(self.series) self.assertEqual(self.series.to_json(), self.series.to_json(orient="index")) objSeries = Series([str(d) for d in self.objSeries], index=self.objSeries.index, name=self.objSeries.name) _check_all_orients(objSeries, dtype=False) # empty_series has empty index with object dtype # which cannot be revert self.assertEqual(self.empty_series.index.dtype, np.object_) _check_all_orients(self.empty_series, check_index_type=False) _check_all_orients(self.ts) # dtype s = Series(lrange(6), index=['a', 'b', 'c', 'd', 'e', 'f']) _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64) _check_all_orients(Series(s, dtype=np.int), dtype=np.int)
def test_bytestring_with_unicode(self): idx = Index(lrange(1000)) if PY3: bytes(idx) else: str(idx)
def test_to_html_multiindex_odd_even_truncate(max_rows, expected, datapath): # GH 14882 - Issue on truncation with odd length DataFrame index = MultiIndex.from_product([[100, 200, 300], [10, 20, 30], [1, 2, 3, 4, 5, 6, 7]], names=['a', 'b', 'c']) df = DataFrame({'n': range(len(index))}, index=index) result = df.to_html(max_rows=max_rows) expected = expected_html(datapath, expected) assert result == expected @pytest.mark.parametrize('df,formatters,expected', [ (DataFrame( [[0, 1], [2, 3], [4, 5], [6, 7]], columns=['foo', None], index=lrange(4)), {'__index__': lambda x: 'abcd' [x]}, 'index_formatter'), (DataFrame( {'months': [datetime(2016, 1, 1), datetime(2016, 2, 2)]}), {'months': lambda x: x.strftime('%Y-%m')}, 'datetime64_monthformatter'), (DataFrame({'hod': pd.to_datetime(['10:10:10.100', '12:12:12.120'], format='%H:%M:%S.%f')}), {'hod': lambda x: x.strftime('%H:%M')}, 'datetime64_hourformatter') ]) def test_to_html_formatters(df, formatters, expected, datapath): expected = expected_html(datapath, expected)
def test_constructor_nan(self, input_arg): empty = Series(dtype='float64', index=lrange(10)) empty2 = Series(input_arg, index=lrange(10)) assert_series_equal(empty, empty2, check_index_type=False)
def test_constructor_dtype_datetime64(self): s = Series(iNaT, dtype='M8[ns]', index=lrange(5)) assert isna(s).all() # in theory this should be all nulls, but since # we are not specifying a dtype is ambiguous s = Series(iNaT, index=lrange(5)) assert not isna(s).all() s = Series(nan, dtype='M8[ns]', index=lrange(5)) assert isna(s).all() s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]') assert isna(s[1]) assert s.dtype == 'M8[ns]' s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]') assert isna(s[1]) assert s.dtype == 'M8[ns]' # GH3416 dates = [ np.datetime64(datetime(2013, 1, 1)), np.datetime64(datetime(2013, 1, 2)), np.datetime64(datetime(2013, 1, 3)), ] s = Series(dates) assert s.dtype == 'M8[ns]' s.iloc[0] = np.nan assert s.dtype == 'M8[ns]' # invalid astypes for t in ['s', 'D', 'us', 'ms']: pytest.raises(TypeError, s.astype, 'M8[%s]' % t) # GH3414 related pytest.raises(TypeError, lambda x: Series( Series(dates).astype('int') / 1000000, dtype='M8[ms]')) pytest.raises(TypeError, lambda x: Series(dates, dtype='datetime64')) # invalid dates can be help as object result = Series([datetime(2, 1, 1)]) assert result[0] == datetime(2, 1, 1, 0, 0) result = Series([datetime(3000, 1, 1)]) assert result[0] == datetime(3000, 1, 1, 0, 0) # don't mix types result = Series([Timestamp('20130101'), 1], index=['a', 'b']) assert result['a'] == Timestamp('20130101') assert result['b'] == 1 # GH6529 # coerce datetime64 non-ns properly dates = date_range('01-Jan-2015', '01-Dec-2015', freq='M') values2 = dates.view(np.ndarray).astype('datetime64[ns]') expected = Series(values2, index=dates) for dtype in ['s', 'D', 'ms', 'us', 'ns']: values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype)) result = Series(values1, dates) assert_series_equal(result, expected) # GH 13876 # coerce to non-ns to object properly expected = Series(values2, index=dates, dtype=object) for dtype in ['s', 'D', 'ms', 'us', 'ns']: values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype)) result = Series(values1, index=dates, dtype=object) assert_series_equal(result, expected) # leave datetime.date alone dates2 = np.array([d.date() for d in dates.to_pydatetime()], dtype=object) series1 = Series(dates2, dates) tm.assert_numpy_array_equal(series1.values, dates2) assert series1.dtype == object # these will correctly infer a datetime s = Series([None, pd.NaT, '2013-08-05 15:30:00.000001']) assert s.dtype == 'datetime64[ns]' s = Series([np.nan, pd.NaT, '2013-08-05 15:30:00.000001']) assert s.dtype == 'datetime64[ns]' s = Series([pd.NaT, None, '2013-08-05 15:30:00.000001']) assert s.dtype == 'datetime64[ns]' s = Series([pd.NaT, np.nan, '2013-08-05 15:30:00.000001']) assert s.dtype == 'datetime64[ns]' # tz-aware (UTC and other tz's) # GH 8411 dr = date_range('20130101', periods=3) assert Series(dr).iloc[0].tz is None dr = date_range('20130101', periods=3, tz='UTC') assert str(Series(dr).iloc[0].tz) == 'UTC' dr = date_range('20130101', periods=3, tz='US/Eastern') assert str(Series(dr).iloc[0].tz) == 'US/Eastern' # non-convertible s = Series([1479596223000, -1479590, pd.NaT]) assert s.dtype == 'object' assert s[2] is pd.NaT assert 'NaT' in str(s) # if we passed a NaT it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), pd.NaT]) assert s.dtype == 'object' assert s[2] is pd.NaT assert 'NaT' in str(s) # if we passed a nan it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan]) assert s.dtype == 'object' assert s[2] is np.nan assert 'NaN' in str(s)
def test_typ(self): s = Series(lrange(6), index=['a','b','c','d','e','f'], dtype='int64') result = read_json(s.to_json(),typ=None) assert_series_equal(result,s)
def randu(n): choices = u("").join(map(unichr, lrange(1488, 1488 + 26))) choices += string.digits return ''.join([random.choice(choices) for _ in range(n)])
def test_multi_assign(self): # GH 3626, an assignment of a sub-df to a df df = DataFrame({ 'FC': ['a', 'b', 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], 'col1': lrange(6), 'col2': lrange(6, 12) }) df.iloc[1, 0] = np.nan df2 = df.copy() mask = ~df2.FC.isna() cols = ['col1', 'col2'] dft = df2 * 2 dft.iloc[3, 3] = np.nan expected = DataFrame({ 'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], 'col1': Series([0, 1, 4, 6, 8, 10]), 'col2': [12, 7, 16, np.nan, 20, 22] }) # frame on rhs df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) # with an ndarray on rhs # coerces to float64 because values has float64 dtype # GH 14001 expected = DataFrame({ 'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], 'col1': [0., 1., 4., 6., 8., 10.], 'col2': [12, 7, 16, np.nan, 20, 22] }) df2 = df.copy() df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) # broadcasting on the rhs is required df = DataFrame( dict(A=[1, 2, 0, 0, 0], B=[0, 0, 0, 10, 11], C=[0, 0, 0, 10, 11], D=[3, 4, 5, 6, 7])) expected = df.copy() mask = expected['A'] == 0 for col in ['A', 'B']: expected.loc[mask, col] = df['D'] df.loc[df['A'] == 0, ['A', 'B']] = df['D'] tm.assert_frame_equal(df, expected)
def test_reindex_fill_value(self): df = DataFrame(np.random.randn(10, 4)) # axis=0 result = df.reindex(lrange(15)) assert np.isnan(result.values[-5:]).all() result = df.reindex(lrange(15), fill_value=0) expected = df.reindex(lrange(15)).fillna(0) assert_frame_equal(result, expected) # axis=1 result = df.reindex(columns=lrange(5), fill_value=0.) expected = df.copy() expected[4] = 0. assert_frame_equal(result, expected) result = df.reindex(columns=lrange(5), fill_value=0) expected = df.copy() expected[4] = 0 assert_frame_equal(result, expected) result = df.reindex(columns=lrange(5), fill_value='foo') expected = df.copy() expected[4] = 'foo' assert_frame_equal(result, expected) # reindex_axis with tm.assert_produces_warning(FutureWarning): result = df.reindex_axis(lrange(15), fill_value=0., axis=0) expected = df.reindex(lrange(15)).fillna(0) assert_frame_equal(result, expected) with tm.assert_produces_warning(FutureWarning): result = df.reindex_axis(lrange(5), fill_value=0., axis=1) expected = df.reindex(columns=lrange(5)).fillna(0) assert_frame_equal(result, expected) # other dtypes df['foo'] = 'foo' result = df.reindex(lrange(15), fill_value=0) expected = df.reindex(lrange(15)).fillna(0) assert_frame_equal(result, expected)
def test_to_html_multiindex(self): columns = MultiIndex.from_tuples(list( zip(np.arange(2).repeat(2), np.mod(lrange(4), 2))), names=['CL0', 'CL1']) df = DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html(justify='left') expected = ('<table border="1" class="dataframe">\n' ' <thead>\n' ' <tr>\n' ' <th>CL0</th>\n' ' <th colspan="2" halign="left">0</th>\n' ' <th colspan="2" halign="left">1</th>\n' ' </tr>\n' ' <tr>\n' ' <th>CL1</th>\n' ' <th>0</th>\n' ' <th>1</th>\n' ' <th>0</th>\n' ' <th>1</th>\n' ' </tr>\n' ' </thead>\n' ' <tbody>\n' ' <tr>\n' ' <th>0</th>\n' ' <td>a</td>\n' ' <td>b</td>\n' ' <td>c</td>\n' ' <td>d</td>\n' ' </tr>\n' ' <tr>\n' ' <th>1</th>\n' ' <td>e</td>\n' ' <td>f</td>\n' ' <td>g</td>\n' ' <td>h</td>\n' ' </tr>\n' ' </tbody>\n' '</table>') assert result == expected columns = MultiIndex.from_tuples( list(zip(range(4), np.mod(lrange(4), 2)))) df = DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html(justify='right') expected = ('<table border="1" class="dataframe">\n' ' <thead>\n' ' <tr>\n' ' <th></th>\n' ' <th>0</th>\n' ' <th>1</th>\n' ' <th>2</th>\n' ' <th>3</th>\n' ' </tr>\n' ' <tr>\n' ' <th></th>\n' ' <th>0</th>\n' ' <th>1</th>\n' ' <th>0</th>\n' ' <th>1</th>\n' ' </tr>\n' ' </thead>\n' ' <tbody>\n' ' <tr>\n' ' <th>0</th>\n' ' <td>a</td>\n' ' <td>b</td>\n' ' <td>c</td>\n' ' <td>d</td>\n' ' </tr>\n' ' <tr>\n' ' <th>1</th>\n' ' <td>e</td>\n' ' <td>f</td>\n' ' <td>g</td>\n' ' <td>h</td>\n' ' </tr>\n' ' </tbody>\n' '</table>') assert result == expected
def test_reindex_multi(self): df = DataFrame(np.random.randn(3, 3)) result = df.reindex(index=lrange(4), columns=lrange(4)) expected = df.reindex(lrange(4)).reindex(columns=lrange(4)) assert_frame_equal(result, expected) df = DataFrame(np.random.randint(0, 10, (3, 3))) result = df.reindex(index=lrange(4), columns=lrange(4)) expected = df.reindex(lrange(4)).reindex(columns=lrange(4)) assert_frame_equal(result, expected) df = DataFrame(np.random.randint(0, 10, (3, 3))) result = df.reindex(index=lrange(2), columns=lrange(2)) expected = df.reindex(lrange(2)).reindex(columns=lrange(2)) assert_frame_equal(result, expected) df = DataFrame(np.random.randn(5, 3) + 1j, columns=['a', 'b', 'c']) result = df.reindex(index=[0, 1], columns=['a', 'b']) expected = df.reindex([0, 1]).reindex(columns=['a', 'b']) assert_frame_equal(result, expected)
def _generate_marginal_results(table, data, values, rows, cols, aggfunc, observed, grand_margin, margins_name='All'): if len(cols) > 0: # need to "interleave" the margins table_pieces = [] margin_keys = [] def _all_key(key): return (key, margins_name) + ('', ) * (len(cols) - 1) if len(rows) > 0: margin = data[rows + values].groupby( rows, observed=observed).agg(aggfunc) cat_axis = 1 for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): all_key = _all_key(key) # we are going to mutate this, so need to copy! piece = piece.copy() try: piece[all_key] = margin[key] except TypeError: # we cannot reshape, so coerce the axis piece.set_axis( piece._get_axis(cat_axis)._to_safe_for_reshape(), axis=cat_axis, inplace=True) piece[all_key] = margin[key] table_pieces.append(piece) margin_keys.append(all_key) else: margin = grand_margin cat_axis = 0 for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): all_key = _all_key(key) table_pieces.append(piece) table_pieces.append(Series(margin[key], index=[all_key])) margin_keys.append(all_key) result = concat(table_pieces, axis=cat_axis) if len(rows) == 0: return result else: result = table margin_keys = table.columns if len(cols) > 0: row_margin = data[cols + values].groupby( cols, observed=observed).agg(aggfunc) row_margin = row_margin.stack() # slight hack new_order = [len(cols)] + lrange(len(cols)) row_margin.index = row_margin.index.reorder_levels(new_order) else: row_margin = Series(np.nan, index=result.columns) return result, margin_keys, row_margin
def test_repr_big(self): # big one biggie = DataFrame(np.zeros((200, 4)), columns=lrange(4), index=lrange(200)) repr(biggie)
def test_append_dtypes(self): # GH 5754 # row appends of different dtypes (so need to do by-item) # can sometimes infer the correct type df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(5)) df2 = DataFrame() result = df1.append(df2) expected = df1.copy() assert_frame_equal(result, expected) df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1)) df2 = DataFrame({'bar': 'foo'}, index=lrange(1, 2)) result = df1.append(df2) expected = DataFrame({'bar': [Timestamp('20130101'), 'foo']}) assert_frame_equal(result, expected) df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1)) df2 = DataFrame({'bar': np.nan}, index=lrange(1, 2)) result = df1.append(df2) expected = DataFrame( {'bar': Series([Timestamp('20130101'), np.nan], dtype='M8[ns]')}) assert_frame_equal(result, expected) df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1)) df2 = DataFrame({'bar': np.nan}, index=lrange(1, 2), dtype=object) result = df1.append(df2) expected = DataFrame( {'bar': Series([Timestamp('20130101'), np.nan], dtype='M8[ns]')}) assert_frame_equal(result, expected) df1 = DataFrame({'bar': np.nan}, index=lrange(1)) df2 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1, 2)) result = df1.append(df2) expected = DataFrame( {'bar': Series([np.nan, Timestamp('20130101')], dtype='M8[ns]')}) assert_frame_equal(result, expected) df1 = DataFrame({'bar': Timestamp('20130101')}, index=lrange(1)) df2 = DataFrame({'bar': 1}, index=lrange(1, 2), dtype=object) result = df1.append(df2) expected = DataFrame({'bar': Series([Timestamp('20130101'), 1])}) assert_frame_equal(result, expected)
def test_values(self): self.assertRaises(Exception, Panel, np.random.randn(5, 5, 5), lrange(5), lrange(5), lrange(4))
def pivot_annual(series, freq=None): """ Deprecated. Use ``pivot_table`` instead. Group a series by years, taking leap years into account. The output has as many rows as distinct years in the original series, and as many columns as the length of a leap year in the units corresponding to the original frequency (366 for daily frequency, 366*24 for hourly...). The first column of the output corresponds to Jan. 1st, 00:00:00, while the last column corresponds to Dec, 31st, 23:59:59. Entries corresponding to Feb. 29th are masked for non-leap years. For example, if the initial series has a daily frequency, the 59th column of the output always corresponds to Feb. 28th, the 61st column to Mar. 1st, and the 60th column is masked for non-leap years. With a hourly initial frequency, the (59*24)th column of the output always correspond to Feb. 28th 23:00, the (61*24)th column to Mar. 1st, 00:00, and the 24 columns between (59*24) and (61*24) are masked. If the original frequency is less than daily, the output is equivalent to ``series.convert('A', func=None)``. Parameters ---------- series : Series freq : string or None, default None Returns ------- annual : DataFrame """ msg = "pivot_annual is deprecated. Use pivot_table instead" warnings.warn(msg, FutureWarning) index = series.index year = index.year years = algorithms.unique1d(year) if freq is not None: freq = freq.upper() else: freq = series.index.freq if freq == 'D': width = 366 offset = np.asarray(index.dayofyear) - 1 # adjust for leap year offset[(~isleapyear(year)) & (offset >= 59)] += 1 columns = lrange(1, 367) # todo: strings like 1/1, 1/25, etc.? elif freq in ('M', 'BM'): width = 12 offset = np.asarray(index.month) - 1 columns = lrange(1, 13) elif freq == 'H': width = 8784 grouped = series.groupby(series.index.year) defaulted = grouped.apply(lambda x: x.reset_index(drop=True)) defaulted.index = defaulted.index.droplevel(0) offset = np.asarray(defaulted.index) offset[~isleapyear(year) & (offset >= 1416)] += 24 columns = lrange(1, 8785) else: raise NotImplementedError(freq) flat_index = (year - years.min()) * width + offset flat_index = _ensure_platform_int(flat_index) values = np.empty((len(years), width)) values.fill(np.nan) values.put(flat_index, series.values) return DataFrame(values, index=years, columns=columns)
def test_dti_take_dont_lose_meta(self, tzstr): rng = date_range('1/1/2000', periods=20, tz=tzstr) result = rng.take(lrange(5)) assert result.tz == rng.tz assert result.freq == rng.freq
def test_constructor_convert_index_once(self): arr = np.array([1.5, 2.5, 3.5]) sdf = SparseDataFrame(columns=lrange(4), index=arr) assert sdf[0].index is sdf[1].index
def raw_frame(): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) raw_frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(['A', 'B', 'C'], name='exp')) raw_frame.iloc[1, [1, 2]] = np.nan raw_frame.iloc[7, [0, 1]] = np.nan return raw_frame @pytest.mark.parametrize("op, level, axis, skipna, sort", product(AGG_FUNCTIONS, lrange(2), lrange(2), [True, False], [True, False])) def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna, sort): # GH6944 # GH 17537 # explicitly test the whitelist methods if axis == 0: frame = raw_frame else: frame = raw_frame.T if op in AGG_FUNCTIONS_WITH_SKIPNA: grouped = frame.groupby(level=level, axis=axis, sort=sort) result = getattr(grouped, op)(skipna=skipna)
def test_getitem_list_periods(self): # GH 7710 rng = period_range(start='2012-01-01', periods=10, freq='D') ts = Series(lrange(len(rng)), index=rng) exp = ts.iloc[[1]] tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp)
def test_set_index_makes_timeseries(self): idx = tm.makeDateIndex(10) s = Series(lrange(10)) s.index = idx assert s.index.is_all_dates