def setUp(self): import warnings warnings.filterwarnings(action='ignore', category=FutureWarning) self.series_ints = Series(np.random.rand(4), index=list(range(0, 8, 2))) self.frame_ints = DataFrame(np.random.randn(4, 4), index=list(range(0, 8, 2)), columns=list(range(0, 12, 3))) self.panel_ints = Panel(np.random.rand(4, 4, 4), items=list(range(0, 8, 2)), major_axis=list(range(0, 12, 3)), minor_axis=list(range(0, 16, 4))) self.series_labels = Series(np.random.randn(4), index=list('abcd')) self.frame_labels = DataFrame(np.random.randn(4, 4), index=list('abcd'), columns=list('ABCD')) self.panel_labels = Panel(np.random.randn(4, 4, 4), items=list('abcd'), major_axis=list('ABCD'), minor_axis=list('ZYXW')) self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8]) self.frame_mixed = DataFrame(np.random.randn(4, 4), index=[2, 4, 'null', 8]) self.panel_mixed = Panel(np.random.randn(4, 4, 4), items=[2, 4, 'null', 8]) self.series_ts = Series(np.random.randn(4), index=date_range('20130101', periods=4)) self.frame_ts = DataFrame(np.random.randn(4, 4), index=date_range('20130101', periods=4)) self.panel_ts = Panel(np.random.randn(4, 4, 4), items=date_range('20130101', periods=4)) #self.series_floats = Series(np.random.randn(4), index=[1.00, 2.00, 3.00, 4.00]) #self.frame_floats = DataFrame(np.random.randn(4, 4), columns=[1.00, 2.00, 3.00, 4.00]) #self.panel_floats = Panel(np.random.rand(4,4,4), items = [1.00,2.00,3.00,4.00]) self.frame_empty = DataFrame({}) self.series_empty = Series({}) self.panel_empty = Panel({}) # form agglomerates for o in self._objs: d = dict() for t in self._typs: d[t] = getattr(self, '%s_%s' % (o, t), None) setattr(self, o, d)
def rolling_corr_pairwise(df, window, min_periods=None): """ Computes pairwise rolling correlation matrices as Panel whose items are dates Parameters ---------- df : DataFrame window : int min_periods : int, default None Returns ------- correls : Panel """ from pandas import Panel from collections import defaultdict all_results = defaultdict(dict) for i, k1 in enumerate(df.columns): for k2 in df.columns[i:]: corr = rolling_corr(df[k1], df[k2], window, min_periods=min_periods) all_results[k1][k2] = corr all_results[k2][k1] = corr return Panel.from_dict(all_results).swapaxes('items', 'major')
def rolling_corr_pairwise(df, window, min_periods=None): """ Computes pairwise rolling correlation matrices as Panel whose items are dates. Parameters ---------- df : DataFrame window : int Size of the moving window. This is the number of observations used for calculating the statistic. min_periods : int, default None Minimum number of observations in window required to have a value (otherwise result is NA). Returns ------- correls : Panel """ from pandas import Panel from collections import defaultdict all_results = defaultdict(dict) for i, k1 in enumerate(df.columns): for k2 in df.columns[i:]: corr = rolling_corr(df[k1], df[k2], window, min_periods=min_periods) all_results[k1][k2] = corr all_results[k2][k1] = corr return Panel.from_dict(all_results).swapaxes('items', 'major')
def _flex_binary_moment(arg1, arg2, f, pairwise=False): if not (isinstance(arg1, (np.ndarray, Series, DataFrame)) and isinstance(arg2, (np.ndarray, Series, DataFrame))): raise TypeError("arguments to moment function must be of type " "np.ndarray/Series/DataFrame") if isinstance(arg1, (np.ndarray, Series)) and isinstance(arg2, (np.ndarray, Series)): X, Y = _prep_binary(arg1, arg2) return f(X, Y) elif isinstance(arg1, DataFrame): def dataframe_from_int_dict(data, frame_template): result = DataFrame(data, index=frame_template.index) result.columns = frame_template.columns[result.columns] return result results = {} if isinstance(arg2, DataFrame): if pairwise is False: if arg1 is arg2: # special case in order to handle duplicate column names for i, col in enumerate(arg1.columns): results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) return dataframe_from_int_dict(results, arg1) else: if not arg1.columns.is_unique: raise ValueError("'arg1' columns are not unique") if not arg2.columns.is_unique: raise ValueError("'arg2' columns are not unique") X, Y = arg1.align(arg2, join="outer") X = X + 0 * Y Y = Y + 0 * X res_columns = arg1.columns.union(arg2.columns) for col in res_columns: if col in X and col in Y: results[col] = f(X[col], Y[col]) return DataFrame(results, index=X.index, columns=res_columns) elif pairwise is True: results = defaultdict(dict) for i, k1 in enumerate(arg1.columns): for j, k2 in enumerate(arg2.columns): if j < i and arg2 is arg1: # Symmetric case results[i][j] = results[j][i] else: results[i][j] = f(*_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])) p = Panel.from_dict(results).swapaxes("items", "major") p.major_axis = arg1.columns[p.major_axis] p.minor_axis = arg2.columns[p.minor_axis] return p else: raise ValueError("'pairwise' is not True/False") else: results = {} for i, col in enumerate(arg1.columns): results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2)) return dataframe_from_int_dict(results, arg1) else: return _flex_binary_moment(arg2, arg1, f)
def test_iloc_panel_issue(self): # GH 3617 p = Panel(randn(4, 4, 4)) self.assert_(p.iloc[:3, :3, :3].shape == (3,3,3)) self.assert_(p.iloc[1, :3, :3].shape == (3,3)) self.assert_(p.iloc[:3, 1, :3].shape == (3,3)) self.assert_(p.iloc[:3, :3, 1].shape == (3,3)) self.assert_(p.iloc[1, 1, :3].shape == (3,)) self.assert_(p.iloc[1, :3, 1].shape == (3,)) self.assert_(p.iloc[:3, 1, 1].shape == (3,))
def _flex_binary_moment(arg1, arg2, f, pairwise=False): if not (isinstance(arg1, (np.ndarray, Series, DataFrame)) and isinstance(arg2, (np.ndarray, Series, DataFrame))): raise TypeError("arguments to moment function must be of type " "np.ndarray/Series/DataFrame") if isinstance(arg1, (np.ndarray, Series)) and \ isinstance(arg2, (np.ndarray,Series)): X, Y = _prep_binary(arg1, arg2) return f(X, Y) elif isinstance(arg1, DataFrame): results = {} if isinstance(arg2, DataFrame): X, Y = arg1.align(arg2, join='outer') if pairwise is False: X = X + 0 * Y Y = Y + 0 * X res_columns = arg1.columns.union(arg2.columns) for col in res_columns: if col in X and col in Y: results[col] = f(X[col], Y[col]) elif pairwise is True: results = defaultdict(dict) for i, k1 in enumerate(arg1.columns): for j, k2 in enumerate(arg2.columns): if j < i and arg2 is arg1: # Symmetric case results[k1][k2] = results[k2][k1] else: results[k1][k2] = f( *_prep_binary(arg1[k1], arg2[k2])) return Panel.from_dict(results).swapaxes('items', 'major') else: raise ValueError("'pairwise' is not True/False") else: res_columns = arg1.columns X, Y = arg1.align(arg2, axis=0, join='outer') results = {} for col in res_columns: results[col] = f(X[col], Y) return DataFrame(results, index=X.index, columns=res_columns) else: return _flex_binary_moment(arg2, arg1, f)
def _flex_binary_moment(arg1, arg2, f, pairwise=False): if not (isinstance(arg1,(np.ndarray, Series, DataFrame)) and isinstance(arg2,(np.ndarray, Series, DataFrame))): raise TypeError("arguments to moment function must be of type " "np.ndarray/Series/DataFrame") if isinstance(arg1, (np.ndarray, Series)) and \ isinstance(arg2, (np.ndarray,Series)): X, Y = _prep_binary(arg1, arg2) return f(X, Y) elif isinstance(arg1, DataFrame): results = {} if isinstance(arg2, DataFrame): X, Y = arg1.align(arg2, join='outer') if pairwise is False: X = X + 0 * Y Y = Y + 0 * X res_columns = arg1.columns.union(arg2.columns) for col in res_columns: if col in X and col in Y: results[col] = f(X[col], Y[col]) elif pairwise is True: results = defaultdict(dict) for i, k1 in enumerate(arg1.columns): for j, k2 in enumerate(arg2.columns): if j<i and arg2 is arg1: # Symmetric case results[k1][k2] = results[k2][k1] else: results[k1][k2] = f(*_prep_binary(arg1[k1], arg2[k2])) return Panel.from_dict(results).swapaxes('items', 'major') else: raise ValueError("'pairwise' is not True/False") else: res_columns = arg1.columns X, Y = arg1.align(arg2, axis=0, join='outer') results = {} for col in res_columns: results[col] = f(X[col], Y) return DataFrame(results, index=X.index, columns=res_columns) else: return _flex_binary_moment(arg2, arg1, f)
def _flex_binary_moment(arg1, arg2, f, pairwise=False): if not (isinstance(arg1, (np.ndarray, Series, DataFrame)) and isinstance(arg2, (np.ndarray, Series, DataFrame))): raise TypeError("arguments to moment function must be of type " "np.ndarray/Series/DataFrame") if isinstance(arg1, (np.ndarray, Series)) and \ isinstance(arg2, (np.ndarray,Series)): X, Y = _prep_binary(arg1, arg2) return f(X, Y) elif isinstance(arg1, DataFrame): def dataframe_from_int_dict(data, frame_template): result = DataFrame(data, index=frame_template.index) result.columns = frame_template.columns[result.columns] return result results = {} if isinstance(arg2, DataFrame): if pairwise is False: if arg1 is arg2: # special case in order to handle duplicate column names for i, col in enumerate(arg1.columns): results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) return dataframe_from_int_dict(results, arg1) else: if not arg1.columns.is_unique: raise ValueError("'arg1' columns are not unique") if not arg2.columns.is_unique: raise ValueError("'arg2' columns are not unique") X, Y = arg1.align(arg2, join='outer') X = X + 0 * Y Y = Y + 0 * X res_columns = arg1.columns.union(arg2.columns) for col in res_columns: if col in X and col in Y: results[col] = f(X[col], Y[col]) return DataFrame(results, index=X.index, columns=res_columns) elif pairwise is True: results = defaultdict(dict) for i, k1 in enumerate(arg1.columns): for j, k2 in enumerate(arg2.columns): if j < i and arg2 is arg1: # Symmetric case results[i][j] = results[j][i] else: results[i][j] = f( *_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])) p = Panel.from_dict(results).swapaxes('items', 'major') p.major_axis = arg1.columns[p.major_axis] p.minor_axis = arg2.columns[p.minor_axis] return p else: raise ValueError("'pairwise' is not True/False") else: results = {} for i, col in enumerate(arg1.columns): results[i] = f(*_prep_binary(arg1.iloc[:, i], arg2)) return dataframe_from_int_dict(results, arg1) else: return _flex_binary_moment(arg2, arg1, f)
_frame = DataFrame(randn(10000, 4), columns=list('ABCD'), dtype='float64') _frame2 = DataFrame(randn(100, 4), columns=list('ABCD'), dtype='float64') _mixed = DataFrame({'A': _frame['A'].copy(), 'B': _frame['B'].astype('float32'), 'C': _frame['C'].astype('int64'), 'D': _frame['D'].astype('int32')}) _mixed2 = DataFrame({'A': _frame2['A'].copy(), 'B': _frame2['B'].astype('float32'), 'C': _frame2['C'].astype('int64'), 'D': _frame2['D'].astype('int32')}) _integer = DataFrame( np.random.randint(1, 100, size=(10001, 4)), columns=list('ABCD'), dtype='int64') _integer2 = DataFrame(np.random.randint(1, 100, size=(101, 4)), columns=list('ABCD'), dtype='int64') _frame_panel = Panel(dict(ItemA=_frame.copy(), ItemB=( _frame.copy() + 3), ItemC=_frame.copy(), ItemD=_frame.copy())) _frame2_panel = Panel(dict(ItemA=_frame2.copy(), ItemB=(_frame2.copy() + 3), ItemC=_frame2.copy(), ItemD=_frame2.copy())) _integer_panel = Panel(dict(ItemA=_integer, ItemB=(_integer + 34).astype( 'int64'))) _integer2_panel = Panel(dict(ItemA=_integer2, ItemB=(_integer2 + 34).astype( 'int64'))) _mixed_panel = Panel(dict(ItemA=_mixed, ItemB=(_mixed + 3))) _mixed2_panel = Panel(dict(ItemA=_mixed2, ItemB=(_mixed2 + 3))) class TestExpressions(tm.TestCase): _multiprocess_can_split_ = False def setUp(self):
def test_partial_setting(self): # GH2578, allow ix and friends to partially set ### series ### s_orig = Series([1,2,3]) s = s_orig.copy() s[5] = 5 expected = Series([1,2,3,5],index=[0,1,2,5]) assert_series_equal(s,expected) s = s_orig.copy() s.loc[5] = 5 expected = Series([1,2,3,5],index=[0,1,2,5]) assert_series_equal(s,expected) s = s_orig.copy() s[5] = 5. expected = Series([1,2,3,5.],index=[0,1,2,5]) assert_series_equal(s,expected) s = s_orig.copy() s.loc[5] = 5. expected = Series([1,2,3,5.],index=[0,1,2,5]) assert_series_equal(s,expected) # iloc/iat raise s = s_orig.copy() def f(): s.iloc[3] = 5. self.assertRaises(IndexError, f) def f(): s.iat[3] = 5. self.assertRaises(IndexError, f) ### frame ### df_orig = DataFrame(np.arange(6).reshape(3,2),columns=['A','B']) # iloc/iat raise df = df_orig.copy() def f(): df.iloc[4,2] = 5. self.assertRaises(IndexError, f) def f(): df.iat[4,2] = 5. self.assertRaises(IndexError, f) # row setting where it exists expected = DataFrame(dict({ 'A' : [0,4,4], 'B' : [1,5,5] })) df = df_orig.copy() df.iloc[1] = df.iloc[2] assert_frame_equal(df,expected) expected = DataFrame(dict({ 'A' : [0,4,4], 'B' : [1,5,5] })) df = df_orig.copy() df.loc[1] = df.loc[2] assert_frame_equal(df,expected) expected = DataFrame(dict({ 'A' : [0,2,4,4], 'B' : [1,3,5,5] }),dtype='float64') df = df_orig.copy() df.loc[3] = df.loc[2] assert_frame_equal(df,expected) # single dtype frame, overwrite expected = DataFrame(dict({ 'A' : [0,2,4], 'B' : [0,2,4] })) df = df_orig.copy() df.ix[:,'B'] = df.ix[:,'A'] assert_frame_equal(df,expected) # mixed dtype frame, overwrite expected = DataFrame(dict({ 'A' : [0,2,4], 'B' : Series([0.,2.,4.]) })) df = df_orig.copy() df['B'] = df['B'].astype(np.float64) df.ix[:,'B'] = df.ix[:,'A'] assert_frame_equal(df,expected) # single dtype frame, partial setting expected = df_orig.copy() expected['C'] = df['A'].astype(np.float64) df = df_orig.copy() df.ix[:,'C'] = df.ix[:,'A'] assert_frame_equal(df,expected) # mixed frame, partial setting expected = df_orig.copy() expected['C'] = df['A'].astype(np.float64) df = df_orig.copy() df.ix[:,'C'] = df.ix[:,'A'] assert_frame_equal(df,expected) ### panel ### p_orig = Panel(np.arange(16).reshape(2,4,2),items=['Item1','Item2'],major_axis=pd.date_range('2001/1/12',periods=4),minor_axis=['A','B'],dtype='float64') # panel setting via item p_orig = Panel(np.arange(16).reshape(2,4,2),items=['Item1','Item2'],major_axis=pd.date_range('2001/1/12',periods=4),minor_axis=['A','B'],dtype='float64') expected = p_orig.copy() expected['Item3'] = expected['Item1'] p = p_orig.copy() p.loc['Item3'] = p['Item1'] assert_panel_equal(p,expected) # panel with aligned series expected = p_orig.copy() expected = expected.transpose(2,1,0) expected['C'] = DataFrame({ 'Item1' : [30,30,30,30], 'Item2' : [32,32,32,32] },index=p_orig.major_axis) expected = expected.transpose(2,1,0) p = p_orig.copy() p.loc[:,:,'C'] = Series([30,32],index=p_orig.items) assert_panel_equal(p,expected)