def test_categorical_warnings_and_errors(self): # Warning for non-string labels # Error for labels too long original = pd.DataFrame.from_records( [['a' * 10000], ['b' * 10000], ['c' * 10000], ['d' * 10000]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with tm.ensure_clean() as path: tm.assertRaises(ValueError, original.to_stata, path) original = pd.DataFrame.from_records( [['a'], ['b'], ['c'], ['d'], [1]], columns=['Too_long']) original = pd.concat([original[col].astype('category') for col in original], axis=1) with warnings.catch_warnings(record=True) as w: original.to_stata(path) tm.assert_equal(len(w), 1) # should get a warning for mixed content
def test_series_groupby_errors(): s = pd.Series([1, 2, 2, 1, 1]) ss = dd.from_pandas(s, npartitions=2) msg = "Grouper for '1' not 1-dimensional" with tm.assertRaisesRegexp(ValueError, msg): s.groupby([1, 2]) # pandas with tm.assertRaisesRegexp(ValueError, msg): ss.groupby([1, 2]) # dask should raise the same error msg = "Grouper for '2' not 1-dimensional" with tm.assertRaisesRegexp(ValueError, msg): s.groupby([2]) # pandas with tm.assertRaisesRegexp(ValueError, msg): ss.groupby([2]) # dask should raise the same error msg = "No group keys passed!" with tm.assertRaisesRegexp(ValueError, msg): s.groupby([]) # pandas with tm.assertRaisesRegexp(ValueError, msg): ss.groupby([]) # dask should raise the same error sss = dd.from_pandas(s, npartitions=3) assert raises(NotImplementedError, lambda: ss.groupby(sss)) with tm.assertRaises(KeyError): s.groupby('x') # pandas with tm.assertRaises(KeyError): ss.groupby('x') # dask should raise the same error
def test_loc2d(): # index indexer is always regarded as slice for duplicated values assert_eq(d.loc[5, 'a'], full.loc[5:5, 'a']) # assert_eq(d.loc[[5], 'a'], full.loc[[5], 'a']) assert_eq(d.loc[5, ['a']], full.loc[5:5, ['a']]) # assert_eq(d.loc[[5], ['a']], full.loc[[5], ['a']]) assert_eq(d.loc[3:8, 'a'], full.loc[3:8, 'a']) assert_eq(d.loc[:8, 'a'], full.loc[:8, 'a']) assert_eq(d.loc[3:, 'a'], full.loc[3:, 'a']) assert_eq(d.loc[[8], 'a'], full.loc[[8], 'a']) assert_eq(d.loc[3:8, ['a']], full.loc[3:8, ['a']]) assert_eq(d.loc[:8, ['a']], full.loc[:8, ['a']]) assert_eq(d.loc[3:, ['a']], full.loc[3:, ['a']]) assert_eq(d.loc[[3, 4, 3], ['a']], full.loc[[3, 4, 3], ['a']]) # 3d with tm.assertRaises(pd.core.indexing.IndexingError): d.loc[3, 3, 3] # Series should raise with tm.assertRaises(pd.core.indexing.IndexingError): d.a.loc[3, 3] with tm.assertRaises(pd.core.indexing.IndexingError): d.a.loc[3:, 3] with tm.assertRaises(pd.core.indexing.IndexingError): d.a.loc[d.a % 2 == 0, 3]
def test_errorbar_plot(self): s = Series(np.arange(10)) s_err = np.random.randn(10) # test line and bar plots kinds = ['line', 'bar'] for kind in kinds: _check_plot_works(s.plot, yerr=Series(s_err), kind=kind) _check_plot_works(s.plot, yerr=s_err, kind=kind) _check_plot_works(s.plot, yerr=s_err.tolist(), kind=kind) _check_plot_works(s.plot, xerr=s_err) # test time series plotting ix = date_range('1/1/2000', '1/1/2001', freq='M') ts = Series(np.arange(12), index=ix) ts_err = Series(np.random.randn(12), index=ix) _check_plot_works(ts.plot, yerr=ts_err) # check incorrect lengths and types with tm.assertRaises(ValueError): s.plot(yerr=np.arange(11)) s_err = ['zzz']*10 with tm.assertRaises(TypeError): s.plot(yerr=s_err)
def test_errorbar_plot(self): d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} df = DataFrame(d) d_err = {'x': np.ones(12)*0.2, 'y': np.ones(12)*0.4} df_err = DataFrame(d_err) # check line plots _check_plot_works(df.plot, yerr=df_err, logy=True) _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True) kinds = ['line', 'bar', 'barh'] for kind in kinds: _check_plot_works(df.plot, yerr=df_err['x'], kind=kind) _check_plot_works(df.plot, yerr=d_err, kind=kind) _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind) _check_plot_works(df.plot, yerr=df_err['x'], xerr=df_err['x'], kind=kind) _check_plot_works(df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind) _check_plot_works((df+1).plot, yerr=df_err, xerr=df_err, kind='bar', log=True) # yerr is raw error values _check_plot_works(df['y'].plot, yerr=np.ones(12)*0.4) _check_plot_works(df.plot, yerr=np.ones((2, 12))*0.4) # yerr is column name df['yerr'] = np.ones(12)*0.2 _check_plot_works(df.plot, y='y', x='x', yerr='yerr') with tm.assertRaises(ValueError): df.plot(yerr=np.random.randn(11)) df_err = DataFrame({'x': ['zzz']*12, 'y': ['zzz']*12}) with tm.assertRaises(TypeError): df.plot(yerr=df_err)
def test_drop_column(self): expected = self.read_csv(self.csv15) expected["byte_"] = expected["byte_"].astype(np.int8) expected["int_"] = expected["int_"].astype(np.int16) expected["long_"] = expected["long_"].astype(np.int32) expected["float_"] = expected["float_"].astype(np.float32) expected["double_"] = expected["double_"].astype(np.float64) expected["date_td"] = expected["date_td"].apply(datetime.strptime, args=("%Y-%m-%d",)) columns = ["byte_", "int_", "long_"] expected = expected[columns] dropped = read_stata(self.dta15_117, convert_dates=True, columns=columns) tm.assert_frame_equal(expected, dropped) # See PR 10757 columns = ["int_", "long_", "byte_"] expected = expected[columns] reordered = read_stata(self.dta15_117, convert_dates=True, columns=columns) tm.assert_frame_equal(expected, reordered) with tm.assertRaises(ValueError): columns = ["byte_", "byte_"] read_stata(self.dta15_117, convert_dates=True, columns=columns) with tm.assertRaises(ValueError): columns = ["byte_", "int_", "long_", "not_found"] read_stata(self.dta15_117, convert_dates=True, columns=columns)
def test_invalid_encoding(self): # test case for testing invalid encoding data = self.data['string'] with tm.assertRaises(ValueError): data.to_clipboard(encoding='ascii') with tm.assertRaises(NotImplementedError): pd.read_clipboard(encoding='ascii')
def test_drop_column(self): expected = self.read_csv(self.csv15) expected['byte_'] = expected['byte_'].astype(np.int8) expected['int_'] = expected['int_'].astype(np.int16) expected['long_'] = expected['long_'].astype(np.int32) expected['float_'] = expected['float_'].astype(np.float32) expected['double_'] = expected['double_'].astype(np.float64) expected['date_td'] = expected['date_td'].apply(datetime.strptime, args=('%Y-%m-%d',)) columns = ['byte_', 'int_', 'long_'] expected = expected[columns] dropped = read_stata(self.dta15_117, convert_dates=True, columns=columns) tm.assert_frame_equal(expected, dropped) # See PR 10757 columns = ['int_', 'long_', 'byte_'] expected = expected[columns] reordered = read_stata(self.dta15_117, convert_dates=True, columns=columns) tm.assert_frame_equal(expected, reordered) with tm.assertRaises(ValueError): columns = ['byte_', 'byte_'] read_stata(self.dta15_117, convert_dates=True, columns=columns) with tm.assertRaises(ValueError): columns = ['byte_', 'int_', 'long_', 'not_found'] read_stata(self.dta15_117, convert_dates=True, columns=columns)
def test_nested_scope(self): from pandas.computation.ops import UndefinedVariableError engine = self.engine parser = self.parser # smoke test x = 1 # noqa result = pd.eval('x + 1', engine=engine, parser=parser) self.assertEqual(result, 2) df = DataFrame(np.random.randn(5, 3)) df2 = DataFrame(np.random.randn(5, 3)) # don't have the pandas parser with tm.assertRaises(SyntaxError): df.query('(@df>0) & (@df2>0)', engine=engine, parser=parser) with tm.assertRaises(UndefinedVariableError): df.query('(df>0) & (df2>0)', engine=engine, parser=parser) expected = df[(df > 0) & (df2 > 0)] result = pd.eval('df[(df > 0) & (df2 > 0)]', engine=engine, parser=parser) assert_frame_equal(expected, result) expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)] result = pd.eval('df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]', engine=engine, parser=parser) assert_frame_equal(expected, result)
def test_getitem_day(self): # GH 6716 # Confirm DatetimeIndex and PeriodIndex works identically didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) for idx in [didx, pidx]: # getitem against index should raise ValueError values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H', '2013/02/01 09:00'] for v in values: if _np_version_under1p9: with tm.assertRaises(ValueError): idx[v] else: # GH7116 # these show deprecations as we are trying # to slice with non-integer indexers # with tm.assertRaises(IndexError): # idx[v] continue s = Series(np.random.rand(len(idx)), index=idx) tm.assert_series_equal(s['2013/01'], s[0:31]) tm.assert_series_equal(s['2013/02'], s[31:59]) tm.assert_series_equal(s['2014'], s[365:]) invalid = ['2013/02/01 9H', '2013/02/01 09:00'] for v in invalid: with tm.assertRaises(KeyError): s[v]
def test_implementation_limits(self): min_td = Timedelta(Timedelta.min) max_td = Timedelta(Timedelta.max) # GH 12727 # timedelta limits correspond to int64 boundaries self.assertTrue(min_td.value == np.iinfo(np.int64).min + 1) self.assertTrue(max_td.value == np.iinfo(np.int64).max) # Beyond lower limit, a NAT before the Overflow self.assertIsInstance(min_td - Timedelta(1, 'ns'), pd.tslib.NaTType) with tm.assertRaises(OverflowError): min_td - Timedelta(2, 'ns') with tm.assertRaises(OverflowError): max_td + Timedelta(1, 'ns') # Same tests using the internal nanosecond values td = Timedelta(min_td.value - 1, 'ns') self.assertIsInstance(td, pd.tslib.NaTType) with tm.assertRaises(OverflowError): Timedelta(min_td.value - 2, 'ns') with tm.assertRaises(OverflowError): Timedelta(max_td.value + 1, 'ns')
def test_hist_layout(self): df = DataFrame(randn(100, 3)) layout_to_expected_size = ( {'layout': None, 'expected_size': (2, 2)}, # default is 2x2 {'layout': (2, 2), 'expected_size': (2, 2)}, {'layout': (4, 1), 'expected_size': (4, 1)}, {'layout': (1, 4), 'expected_size': (1, 4)}, {'layout': (3, 3), 'expected_size': (3, 3)}, {'layout': (-1, 4), 'expected_size': (1, 4)}, {'layout': (4, -1), 'expected_size': (4, 1)}, {'layout': (-1, 2), 'expected_size': (2, 2)}, {'layout': (2, -1), 'expected_size': (2, 2)} ) for layout_test in layout_to_expected_size: axes = df.hist(layout=layout_test['layout']) expected = layout_test['expected_size'] self._check_axes_shape(axes, axes_num=3, layout=expected) # layout too small for all 4 plots with tm.assertRaises(ValueError): df.hist(layout=(1, 1)) # invalid format for layout with tm.assertRaises(ValueError): df.hist(layout=(1,)) with tm.assertRaises(ValueError): df.hist(layout=(-1, -1))
def test_no_order(self): _skip_if_no_scipy() s = Series([0, 1, np.nan, 3]) with tm.assertRaises(ValueError): s.interpolate(method='polynomial') with tm.assertRaises(ValueError): s.interpolate(method='spline')
def test_hist_layout(self): df = self.hist_df with tm.assertRaises(ValueError): df.height.hist(layout=(1, 1)) with tm.assertRaises(ValueError): df.height.hist(layout=[1, 1])
def test_take_filling_fill_value(self): # same tests as GH 12631 sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0) result = sparse.take(np.array([1, 0, -1])) expected = SparseArray([0, np.nan, 4], fill_value=0) tm.assert_sp_array_equal(result, expected) # fill_value result = sparse.take(np.array([1, 0, -1]), fill_value=True) expected = SparseArray([0, np.nan, 0], fill_value=0) tm.assert_sp_array_equal(result, expected) # allow_fill=False result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = SparseArray([0, np.nan, 4], fill_value=0) tm.assert_sp_array_equal(result, expected) msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') with tm.assertRaisesRegexp(ValueError, msg): sparse.take(np.array([1, 0, -2]), fill_value=True) with tm.assertRaisesRegexp(ValueError, msg): sparse.take(np.array([1, 0, -5]), fill_value=True) with tm.assertRaises(IndexError): sparse.take(np.array([1, -6])) with tm.assertRaises(IndexError): sparse.take(np.array([1, 5])) with tm.assertRaises(IndexError): sparse.take(np.array([1, 5]), fill_value=True)
def test_nonunique_raises(self): df = pd.DataFrame([[1, 2]], columns=['A', 'A']) with tm.assertRaises(ValueError): df.style with tm.assertRaises(ValueError): Styler(df)
def test_get_division(): pdf = pd.DataFrame(np.random.randn(10, 5), columns=list("abcde")) ddf = dd.from_pandas(pdf, 3) assert ddf.divisions == (0, 4, 8, 9) # DataFrame div1 = ddf.get_division(0) assert isinstance(div1, dd.DataFrame) assert eq(div1, pdf.loc[0:3]) div2 = ddf.get_division(1) assert eq(div2, pdf.loc[4:7]) div3 = ddf.get_division(2) assert eq(div3, pdf.loc[8:9]) assert len(div1) + len(div2) + len(div3) == len(pdf) # Series div1 = ddf.a.get_division(0) assert isinstance(div1, dd.Series) assert eq(div1, pdf.a.loc[0:3]) div2 = ddf.a.get_division(1) assert eq(div2, pdf.a.loc[4:7]) div3 = ddf.a.get_division(2) assert eq(div3, pdf.a.loc[8:9]) assert len(div1) + len(div2) + len(div3) == len(pdf.a) with tm.assertRaises(ValueError): ddf.get_division(-1) with tm.assertRaises(ValueError): ddf.get_division(3)
def check_str_query_method(self, parser, engine): tm.skip_if_no_ne(engine) df = DataFrame(randn(10, 1), columns=['b']) df['strings'] = Series(list('aabbccddee')) expect = df[df.strings == 'a'] if parser != 'pandas': col = 'strings' lst = '"a"' lhs = [col] * 2 + [lst] * 2 rhs = lhs[::-1] eq, ne = '==', '!=' ops = 2 * ([eq] + [ne]) for lhs, op, rhs in zip(lhs, ops, rhs): ex = '{lhs} {op} {rhs}'.format(lhs=lhs, op=op, rhs=rhs) assertRaises(NotImplementedError, df.query, ex, engine=engine, parser=parser, local_dict={'strings': df.strings}) else: res = df.query('"a" == strings', engine=engine, parser=parser) assert_frame_equal(res, expect) res = df.query('strings == "a"', engine=engine, parser=parser) assert_frame_equal(res, expect) assert_frame_equal(res, df[df.strings.isin(['a'])]) expect = df[df.strings != 'a'] res = df.query('strings != "a"', engine=engine, parser=parser) assert_frame_equal(res, expect) res = df.query('"a" != strings', engine=engine, parser=parser) assert_frame_equal(res, expect) assert_frame_equal(res, df[~df.strings.isin(['a'])])
def test_hist_layout(self): import matplotlib.pyplot as plt df = DataFrame(randn(100, 4)) layout_to_expected_size = ( {"layout": None, "expected_size": (2, 2)}, # default is 2x2 {"layout": (2, 2), "expected_size": (2, 2)}, {"layout": (4, 1), "expected_size": (4, 1)}, {"layout": (1, 4), "expected_size": (1, 4)}, {"layout": (3, 3), "expected_size": (3, 3)}, ) for layout_test in layout_to_expected_size: ax = df.hist(layout=layout_test["layout"]) self.assertEqual(len(ax), layout_test["expected_size"][0]) self.assertEqual(len(ax[0]), layout_test["expected_size"][1]) # layout too small for all 4 plots with tm.assertRaises(ValueError): df.hist(layout=(1, 1)) # invalid format for layout with tm.assertRaises(ValueError): df.hist(layout=(1,))
def test_nat_arithmetic(self): # GH 6873 nat = tslib.NaT t = Timestamp('2014-01-01') dt = datetime.datetime(2014, 1, 1) delta = datetime.timedelta(3600) # Timestamp / datetime for (left, right) in [(nat, nat), (nat, t), (dt, nat)]: # NaT + Timestamp-like should raise TypeError with tm.assertRaises(TypeError): left + right with tm.assertRaises(TypeError): right + left # NaT - Timestamp-like (or inverse) returns NaT self.assertTrue((left - right) is tslib.NaT) self.assertTrue((right - left) is tslib.NaT) # timedelta-like # offsets are tested in test_offsets.py for (left, right) in [(nat, delta)]: # NaT + timedelta-like returns NaT self.assertTrue((left + right) is tslib.NaT) # timedelta-like + NaT should raise TypeError with tm.assertRaises(TypeError): right + left self.assertTrue((left - right) is tslib.NaT) with tm.assertRaises(TypeError): right - left
def test_unstack_non_unique_index_names(self): idx = MultiIndex.from_tuples([("a", "b"), ("c", "d")], names=["c1", "c1"]) df = DataFrame([1, 2], index=idx) with tm.assertRaises(ValueError): df.unstack("c1") with tm.assertRaises(ValueError): df.T.stack("c1")
def test_s3_fails(self): with tm.assertRaises(IOError): read_csv('s3://nyqpug/asdf.csv') # Receive a permission error when trying to read a private bucket. # It's irrelevant here that this isn't actually a table. with tm.assertRaises(IOError): read_csv('s3://cant_get_it/')
def test_no_index(self): columns = ["x", "y"] original = DataFrame(np.reshape(np.arange(10.0), (5, 2)), columns=columns) original.index.name = "index_not_written" with tm.ensure_clean() as path: original.to_stata(path, write_index=False) written_and_read_again = self.read_dta(path) tm.assertRaises(KeyError, lambda: written_and_read_again["index_not_written"])
def test_catch_too_many_names(self): # see gh-5156 data = """\ 1,2,3 4,,6 7,8,9 10,11,12\n""" tm.assertRaises(ValueError, self.read_csv, StringIO(data), header=0, names=["a", "b", "c", "d"])
def test_construction_dti_with_mixed_timezones(self): # GH 11488 (not changed, added explicit tests) # no tz results in DatetimeIndex result = DatetimeIndex( [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') exp = DatetimeIndex( [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # same tz results in DatetimeIndex result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx') exp = DatetimeIndex( [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') ], tz='Asia/Tokyo', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # same tz results in DatetimeIndex (DST) result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'), Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # different tz coerces tz-naive to tz-awareIndex(dtype=object) result = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), Timestamp('2011-01-02 10:00')], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # tz mismatch affecting to tz-aware raises TypeError/ValueError with tm.assertRaises(ValueError): DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') with tm.assertRaises(TypeError): DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='Asia/Tokyo', name='idx') with tm.assertRaises(ValueError): DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx')
def test_unsortable(self): # GH 13714 arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object) if compat.PY2 and not pd._np_version_under1p10: # RuntimeWarning: tp_compare didn't return -1 or -2 for exception with tm.assert_produces_warning(RuntimeWarning): tm.assertRaises(TypeError, algos.safe_sort, arr) else: tm.assertRaises(TypeError, algos.safe_sort, arr)
def test_unstack_non_unique_index_names(self): idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')], names=['c1', 'c1']) df = DataFrame([1, 2], index=idx) with tm.assertRaises(ValueError): df.unstack('c1') with tm.assertRaises(ValueError): df.T.stack('c1')
def test_data_fail(self): from lxml.etree import XMLSyntaxError spam_data = os.path.join(DATA_PATH, 'spam.html') banklist_data = os.path.join(DATA_PATH, 'banklist.html') with tm.assertRaises(XMLSyntaxError): self.read_html(spam_data) with tm.assertRaises(XMLSyntaxError): self.read_html(banklist_data)
def test_spline_error(self): tm._skip_if_no_scipy() s = pd.Series(np.arange(10) ** 2) s[np.random.randint(0, 9, 3)] = np.nan with tm.assertRaises(ValueError): s.interpolate(method='spline') with tm.assertRaises(ValueError): s.interpolate(method='spline', order=0)
def test_hist_layout(self): n = 10 gender = tm.choice(['Male', 'Female'], size=n) df = DataFrame({'gender': gender, 'height': random.normal(66, 4, size=n), 'weight': random.normal(161, 32, size=n)}) with tm.assertRaises(ValueError): df.height.hist(layout=(1, 1)) with tm.assertRaises(ValueError): df.height.hist(layout=[1, 1])
def test_interp_nan_idx(self): df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [np.nan, 2, 3, 4]}) df = df.set_index('A') with tm.assertRaises(NotImplementedError): df.interpolate(method='values')
def test_to_gbq_with_no_project_id_given_should_fail(self): with tm.assertRaises(TypeError): gbq.to_gbq(DataFrame(), 'dataset.tablename')
def test_describe_quantiles_both(self): with tm.assertRaises(ValueError): tm.makeDataFrame().describe(percentile_width=50, percentiles=[25, 75])
def test_describe_percentiles_percent_or_raw(self): df = tm.makeDataFrame() with tm.assertRaises(ValueError): df.describe(percentiles=[10, 50, 100])
def test_describe_raises(self): with tm.assertRaises(NotImplementedError): tm.makePanel().describe()
def test_interpolate_non_ts(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) with tm.assertRaises(ValueError): s.interpolate(method='time')
def test_display_format_raises(self): df = pd.DataFrame(np.random.randn(2, 2)) with tm.assertRaises(TypeError): df.style.format(5) with tm.assertRaises(TypeError): df.style.format(True)
def test_bad_project_id(self): with tm.assertRaises(gbq.GenericGBQException): gbq.read_gbq("SELECT 1", project_id='001', private_key=_get_private_key_path())
def test_bad_table_name(self): with tm.assertRaises(gbq.GenericGBQException): gbq.read_gbq("SELECT * FROM [publicdata:samples.nope]", project_id=_get_project_id(), private_key=_get_private_key_path())
def test_plot_fails_with_dupe_color_and_style(self): x = Series(randn(2)) with tm.assertRaises(ValueError): x.plot(style='k--', color='k')
def test_bad_url_protocol(self): with tm.assertRaises(URLError): self.read_html('git://github.com', match='.*Water.*')
def test_invalid_flavor(): url = 'google.com' with tm.assertRaises(ValueError): read_html(url, 'google', flavor='not a* valid**++ flaver')
def test_fred_multi_bad_series(self): names = ['NOTAREALSERIES', 'CPIAUCSL', "ALSO FAKE"] with tm.assertRaises(HTTPError): DataReader(names, data_source="fred")
def test_bool_header_arg(self): # GH 6114 for arg in [True, False]: with tm.assertRaises(TypeError): read_html(self.spam_data, header=arg)
def test_interp_nonmono_raise(self): tm._skip_if_no_scipy() s = Series([1, np.nan, 3], index=[0, 2, 1]) with tm.assertRaises(ValueError): s.interpolate(method='krogh')
def test_malformed_query(self): with tm.assertRaises(gbq.GenericGBQException): gbq.read_gbq("SELCET * FORM [publicdata:samples.shakespeare]", project_id=_get_project_id(), private_key=_get_private_key_path())
def test_init_non_pandas(self): with tm.assertRaises(TypeError): Styler([1, 2, 3])
def test_malformed_query(self): with tm.assertRaises(gbq.GenericGBQException): gbq.read_gbq("SELCET * FORM [publicdata:samples.shakespeare]", project_id=PROJECT_ID)
def test_bad_table_name(self): with tm.assertRaises(gbq.GenericGBQException): gbq.read_gbq("SELECT * FROM [publicdata:samples.nope]", project_id=PROJECT_ID)
def test_read_gbq_with_empty_private_key_file_should_fail(self): with tm.ensure_clean() as empty_file_path: with tm.assertRaises(gbq.InvalidPrivateKeyFormat): gbq.read_gbq('SELECT 1', project_id='x', private_key=empty_file_path)
def test_tz_convert_and_localize(self): l0 = date_range('20140701', periods=5, freq='D') # TODO: l1 should be a PeriodIndex for testing # after GH2106 is addressed with tm.assertRaises(NotImplementedError): period_range('20140701', periods=1).tz_convert('UTC') with tm.assertRaises(NotImplementedError): period_range('20140701', periods=1).tz_localize('UTC') # l1 = period_range('20140701', periods=5, freq='D') l1 = date_range('20140701', periods=5, freq='D') int_idx = Index(range(5)) for fn in ['tz_localize', 'tz_convert']: if fn == 'tz_convert': l0 = l0.tz_localize('UTC') l1 = l1.tz_localize('UTC') for idx in [l0, l1]: l0_expected = getattr(idx, fn)('US/Pacific') l1_expected = getattr(idx, fn)('US/Pacific') df1 = DataFrame(np.ones(5), index=l0) df1 = getattr(df1, fn)('US/Pacific') self.assertTrue(df1.index.equals(l0_expected)) # MultiIndex # GH7846 df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) df3 = getattr(df2, fn)('US/Pacific', level=0) self.assertFalse(df3.index.levels[0].equals(l0)) self.assertTrue(df3.index.levels[0].equals(l0_expected)) self.assertTrue(df3.index.levels[1].equals(l1)) self.assertFalse(df3.index.levels[1].equals(l1_expected)) df3 = getattr(df2, fn)('US/Pacific', level=1) self.assertTrue(df3.index.levels[0].equals(l0)) self.assertFalse(df3.index.levels[0].equals(l0_expected)) self.assertTrue(df3.index.levels[1].equals(l1_expected)) self.assertFalse(df3.index.levels[1].equals(l1)) df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) df5 = getattr(df4, fn)('US/Pacific', level=1) self.assertTrue(df3.index.levels[0].equals(l0)) self.assertFalse(df3.index.levels[0].equals(l0_expected)) self.assertTrue(df3.index.levels[1].equals(l1_expected)) self.assertFalse(df3.index.levels[1].equals(l1)) # Bad Inputs for fn in ['tz_localize', 'tz_convert']: # Not DatetimeIndex / PeriodIndex with tm.assertRaisesRegexp(TypeError, 'DatetimeIndex'): df = DataFrame(index=int_idx) df = getattr(df, fn)('US/Pacific') # Not DatetimeIndex / PeriodIndex with tm.assertRaisesRegexp(TypeError, 'DatetimeIndex'): df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) df = getattr(df, fn)('US/Pacific', level=0) # Invalid level with tm.assertRaisesRegexp(ValueError, 'not valid'): df = DataFrame(index=l0) df = getattr(df, fn)('US/Pacific', level=1)
def test_read_gbq_with_private_key_json_wrong_types_should_fail(self): with tm.assertRaises(gbq.InvalidPrivateKeyFormat): gbq.read_gbq( 'SELECT 1', project_id='x', private_key='{ "client_email" : 1, "private_key" : True }')
def test_bad_project_id(self): with tm.assertRaises(gbq.GenericGBQException): gbq.read_gbq("SELECT 1", project_id='001')
def test_read_gbq_with_empty_private_key_json_should_fail(self): with tm.assertRaises(gbq.InvalidPrivateKeyFormat): gbq.read_gbq('SELECT 1', project_id='x', private_key='{}')
def test_read_gbq_with_no_project_id_given_should_fail(self): with tm.assertRaises(TypeError): gbq.read_gbq('SELECT "1" as NUMBER_1')
def test_invalid_kind(self): s = Series([1, 2]) with tm.assertRaises(ValueError): s.plot(kind='aasdf')
def test_to_csv_multiindex(self): pname = '__tmp_to_csv_multiindex__' frame = self.frame old_index = frame.index arrays = np.arange(len(old_index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index with ensure_clean(pname) as path: frame.to_csv(path, header=False) frame.to_csv(path, columns=['A', 'B']) # round trip frame.to_csv(path) df = DataFrame.from_csv(path, index_col=[0, 1], parse_dates=False) # TODO to_csv drops column name assert_frame_equal(frame, df, check_names=False) self.assertEqual(frame.index.names, df.index.names) # needed if setUP becomes a classmethod self.frame.index = old_index # try multiindex with dates tsframe = self.tsframe old_index = tsframe.index new_index = [old_index, np.arange(len(old_index))] tsframe.index = MultiIndex.from_arrays(new_index) tsframe.to_csv(path, index_label=['time', 'foo']) recons = DataFrame.from_csv(path, index_col=[0, 1]) # TODO to_csv drops column name assert_frame_equal(tsframe, recons, check_names=False) # do not load index tsframe.to_csv(path) recons = DataFrame.from_csv(path, index_col=None) np.testing.assert_equal( len(recons.columns), len(tsframe.columns) + 2) # no index tsframe.to_csv(path, index=False) recons = DataFrame.from_csv(path, index_col=None) assert_almost_equal(recons.values, self.tsframe.values) # needed if setUP becomes classmethod self.tsframe.index = old_index with ensure_clean(pname) as path: # GH3571, GH1651, GH3141 def _make_frame(names=None): if names is True: names = ['first', 'second'] return DataFrame(np.random.randint(0, 10, size=(3, 3)), columns=MultiIndex.from_tuples( [('bah', 'foo'), ('bah', 'bar'), ('ban', 'baz')], names=names), dtype='int64') # column & index are multi-index df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) df.to_csv(path, tupleize_cols=False) result = read_csv(path, header=[0, 1, 2, 3], index_col=[ 0, 1], tupleize_cols=False) assert_frame_equal(df, result) # column is mi df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4) df.to_csv(path, tupleize_cols=False) result = read_csv( path, header=[0, 1, 2, 3], index_col=0, tupleize_cols=False) assert_frame_equal(df, result) # dup column names? df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4) df.to_csv(path, tupleize_cols=False) result = read_csv(path, header=[0, 1, 2, 3], index_col=[ 0, 1, 2], tupleize_cols=False) assert_frame_equal(df, result) # writing with no index df = _make_frame() df.to_csv(path, tupleize_cols=False, index=False) result = read_csv(path, header=[0, 1], tupleize_cols=False) assert_frame_equal(df, result) # we lose the names here df = _make_frame(True) df.to_csv(path, tupleize_cols=False, index=False) result = read_csv(path, header=[0, 1], tupleize_cols=False) self.assertTrue(all([x is None for x in result.columns.names])) result.columns.names = df.columns.names assert_frame_equal(df, result) # tupleize_cols=True and index=False df = _make_frame(True) df.to_csv(path, tupleize_cols=True, index=False) result = read_csv( path, header=0, tupleize_cols=True, index_col=None) result.columns = df.columns assert_frame_equal(df, result) # whatsnew example df = _make_frame() df.to_csv(path, tupleize_cols=False) result = read_csv(path, header=[0, 1], index_col=[ 0], tupleize_cols=False) assert_frame_equal(df, result) df = _make_frame(True) df.to_csv(path, tupleize_cols=False) result = read_csv(path, header=[0, 1], index_col=[ 0], tupleize_cols=False) assert_frame_equal(df, result) # column & index are multi-index (compatibility) df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) df.to_csv(path, tupleize_cols=True) result = read_csv(path, header=0, index_col=[ 0, 1], tupleize_cols=True) result.columns = df.columns assert_frame_equal(df, result) # invalid options df = _make_frame(True) df.to_csv(path, tupleize_cols=False) # catch invalid headers with assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2\] are too many ' 'rows for this multi_index of columns'): read_csv(path, tupleize_cols=False, header=lrange(3), index_col=0) with assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2,3,4,5,6\], len of ' '7, but only 6 lines in file'): read_csv(path, tupleize_cols=False, header=lrange(7), index_col=0) for i in [4, 5, 6]: with tm.assertRaises(CParserError): read_csv(path, tupleize_cols=False, header=lrange(i), index_col=0) # write with cols with assertRaisesRegexp(TypeError, 'cannot specify cols with a ' 'MultiIndex'): df.to_csv(path, tupleize_cols=False, columns=['foo', 'bar']) with ensure_clean(pname) as path: # empty tsframe[:0].to_csv(path) recons = DataFrame.from_csv(path) exp = tsframe[:0] exp.index = [] self.assertTrue(recons.columns.equals(exp.columns)) self.assertEqual(len(recons), 0)
def test_bad_deprecate_kwarg(self): with tm.assertRaises(TypeError): @deprecate_kwarg('old', 'new', 0) def f4(new=None): pass
def test_to_gbq_should_fail_if_invalid_table_name_passed(self): with tm.assertRaises(gbq.NotFoundException): gbq.to_gbq(DataFrame(), 'invalid_table_name', project_id="1234")
def test_get_loc(self): idx = pd.date_range('2000-01-01', periods=3) for method in [None, 'pad', 'backfill', 'nearest']: self.assertEqual(idx.get_loc(idx[1], method), 1) self.assertEqual(idx.get_loc(idx[1].to_pydatetime(), method), 1) self.assertEqual(idx.get_loc(str(idx[1]), method), 1) if method is not None: self.assertEqual( idx.get_loc(idx[1], method, tolerance=pd.Timedelta('0 days')), 1) self.assertEqual(idx.get_loc('2000-01-01', method='nearest'), 0) self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest'), 1) self.assertEqual( idx.get_loc('2000-01-01T12', method='nearest', tolerance='1 day'), 1) self.assertEqual( idx.get_loc('2000-01-01T12', method='nearest', tolerance=pd.Timedelta('1D')), 1) self.assertEqual( idx.get_loc('2000-01-01T12', method='nearest', tolerance=np.timedelta64(1, 'D')), 1) self.assertEqual( idx.get_loc('2000-01-01T12', method='nearest', tolerance=timedelta(1)), 1) with tm.assertRaisesRegexp(ValueError, 'must be convertible'): idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') with tm.assertRaises(KeyError): idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3)) self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3)) self.assertEqual(idx.get_loc('1999', method='nearest'), 0) self.assertEqual(idx.get_loc('2001', method='nearest'), 2) with tm.assertRaises(KeyError): idx.get_loc('1999', method='pad') with tm.assertRaises(KeyError): idx.get_loc('2001', method='backfill') with tm.assertRaises(KeyError): idx.get_loc('foobar') with tm.assertRaises(TypeError): idx.get_loc(slice(2)) idx = pd.to_datetime(['2000-01-01', '2000-01-04']) self.assertEqual(idx.get_loc('2000-01-02', method='nearest'), 0) self.assertEqual(idx.get_loc('2000-01-03', method='nearest'), 1) self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 2)) # time indexing idx = pd.date_range('2000-01-01', periods=24, freq='H') tm.assert_numpy_array_equal(idx.get_loc(time(12)), np.array([12]), check_dtype=False) tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), np.array([]), check_dtype=False) with tm.assertRaises(NotImplementedError): idx.get_loc(time(12, 30), method='pad')