def test_extra_kwargs_2d(self): sigma = np.random.random((25, 25)) sigma = sigma + sigma.T - np.diag(np.diag(sigma)) data = sm_data.handle_data(self.y, self.X, 'drop', sigma=sigma) idx = ~np.isnan(np.c_[self.y, self.X]).any(axis=1) sigma = sigma[idx][:,idx] np.testing.assert_array_equal(data.sigma, sigma)
def setupClass(cls): cls.endog = endog = pandas.DataFrame(np.random.random((10,4)), columns=['y_1', 'y_2', 'y_3', 'y_4']) exog = pandas.DataFrame(np.random.random((10,2)), columns=['x_1','x_2']) exog.insert(0, 'const', 1) cls.exog = exog cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 3 neqs = 4 cls.col_input = np.random.random(nvars) cls.col_result = pandas.Series(cls.col_input, index=exog.columns) cls.row_input = np.random.random(nrows) cls.row_result = pandas.Series(cls.row_input, index=exog.index) cls.cov_input = np.random.random((nvars, nvars)) cls.cov_result = pandas.DataFrame(cls.cov_input, index = exog.columns, columns = exog.columns) cls.cov_eq_input = np.random.random((neqs, neqs)) cls.cov_eq_result = pandas.DataFrame(cls.cov_eq_input, index=endog.columns, columns=endog.columns) cls.col_eq_input = np.random.random((nvars, neqs)) cls.col_eq_result = pandas.DataFrame(cls.col_eq_input, index=exog.columns, columns=endog.columns) cls.xnames = ['const', 'x_1', 'x_2'] cls.ynames = ['y_1', 'y_2', 'y_3', 'y_4'] cls.row_labels = cls.exog.index
def setupClass(cls): super(TestArrays1dExog, cls).setupClass() cls.endog = np.random.random(10) exog = np.random.random(10) cls.data = sm_data.handle_data(cls.endog, exog) cls.exog = exog[:,None] cls.xnames = ['x1'] cls.ynames = 'y'
def setupClass(cls): super(TestStructarrays, cls).setupClass() cls.endog = np.random.random(9).view([("y_1", "f8")]).view(np.recarray) exog = np.random.random(9 * 3).view([("const", "f8"), ("x_1", "f8"), ("x_2", "f8")]).view(np.recarray) exog["const"] = 1 cls.exog = exog cls.data = sm_data.handle_data(cls.endog, cls.exog) cls.xnames = ["const", "x_1", "x_2"] cls.ynames = "y_1"
def test_drop(self): y = self.y X = self.X combined = np.c_[y, X] idx = ~np.isnan(combined).any(axis=1) y = y[idx] X = X[idx] data = sm_data.handle_data(self.y, self.X, 'drop') np.testing.assert_array_equal(data.endog, y) np.testing.assert_array_equal(data.exog, X)
def setupClass(cls): super(TestStructarrays, cls).setupClass() cls.endog = np.random.random(9).view([('y_1', 'f8')]).view(np.recarray) exog = np.random.random(9*3).view([('const', 'f8'),('x_1', 'f8'), ('x_2', 'f8')]).view(np.recarray) exog['const'] = 1 cls.exog = exog cls.data = sm_data.handle_data(cls.endog, cls.exog) cls.xnames = ['const', 'x_1', 'x_2'] cls.ynames = 'y_1'
def setupClass(cls): cls.endog = np.random.random(10) cls.exog = np.c_[np.ones(10), np.random.random((10,2))] cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 3 cls.col_result = cls.col_input = np.random.random(nvars) cls.row_result = cls.row_input = np.random.random(nrows) cls.cov_result = cls.cov_input = np.random.random((nvars, nvars)) cls.xnames = ['const', 'x1', 'x2'] cls.ynames = 'y' cls.row_labels = None
def setup_class(cls): cls.endog = np.random.random(10) cls.exog = np.c_[np.ones(10), np.random.random((10,2))] cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 3 cls.col_result = cls.col_input = np.random.random(nvars) cls.row_result = cls.row_input = np.random.random(nrows) cls.cov_result = cls.cov_input = np.random.random((nvars, nvars)) cls.xnames = ['const', 'x1', 'x2'] cls.ynames = 'y' cls.row_labels = None
def test_drop(self): y = self.y X = self.X combined = np.c_[y, X] idx = ~np.isnan(combined).any(axis=1) y = y.ix[idx] X = X.ix[idx] data = sm_data.handle_data(self.y, self.X, 'drop') np.testing.assert_array_equal(data.endog, y.values) ptesting.assert_series_equal(data._orig_endog, self.y.ix[idx]) np.testing.assert_array_equal(data.exog, X.values) ptesting.assert_frame_equal(data._orig_exog, self.X.ix[idx])
def test_drop(self): y = self.y X = self.X combined = np.c_[y, X] idx = ~np.isnan(combined).any(axis=1) y = y.ix[idx] X = X.ix[idx] data = sm_data.handle_data(self.y, self.X, 'drop') np.testing.assert_array_equal(data.endog, y.values) ptesting.assert_series_equal(data.orig_endog, self.y.ix[idx]) np.testing.assert_array_equal(data.exog, X.values) ptesting.assert_frame_equal(data.orig_exog, self.X.ix[idx])
def setupClass(cls): cls.endog = np.random.random((10,4)) cls.exog = np.c_[np.ones(10), np.random.random((10,2))] cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 3 neqs = 4 cls.col_result = cls.col_input = np.random.random(nvars) cls.row_result = cls.row_input = np.random.random(nrows) cls.cov_result = cls.cov_input = np.random.random((nvars, nvars)) cls.cov_eq_result = cls.cov_eq_input = np.random.random((neqs,neqs)) cls.col_eq_result = cls.col_eq_input = np.array((neqs, nvars)) cls.xnames = ['const', 'x1', 'x2'] cls.ynames = ['y1', 'y2', 'y3', 'y4'] cls.row_labels = None
def setupClass(cls): cls.endog = np.random.random((10, 4)) cls.exog = np.c_[np.ones(10), np.random.random((10, 2))] cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 3 neqs = 4 cls.col_result = cls.col_input = np.random.random(nvars) cls.row_result = cls.row_input = np.random.random(nrows) cls.cov_result = cls.cov_input = np.random.random((nvars, nvars)) cls.cov_eq_result = cls.cov_eq_input = np.random.random((neqs, neqs)) cls.col_eq_result = cls.col_eq_input = np.array((neqs, nvars)) cls.xnames = ["const", "x1", "x2"] cls.ynames = ["y1", "y2", "y3", "y4"] cls.row_labels = None
def setupClass(cls): cls.endog = np.random.random((10, 4)) cls.exog = np.c_[np.ones(10), np.random.random((10, 2))] cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 3 neqs = 4 cls.col_result = cls.col_input = np.random.random(nvars) cls.row_result = cls.row_input = np.random.random(nrows) cls.cov_result = cls.cov_input = np.random.random((nvars, nvars)) cls.cov_eq_result = cls.cov_eq_input = np.random.random((neqs, neqs)) cls.col_eq_result = cls.col_eq_input = np.array((neqs, nvars)) cls.xnames = ['const', 'x1', 'x2'] cls.ynames = ['y1', 'y2', 'y3', 'y4'] cls.row_labels = None
def setupClass(cls): cls.endog = pandas.DataFrame(np.random.random(10), columns=["y_1"]) exog = pandas.DataFrame(np.random.random((10, 2)), columns=["x_1", "x_2"]) exog.insert(0, "const", 1) cls.exog = exog cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 3 cls.col_input = np.random.random(nvars) cls.col_result = pandas.Series(cls.col_input, index=exog.columns) cls.row_input = np.random.random(nrows) cls.row_result = pandas.Series(cls.row_input, index=exog.index) cls.cov_input = np.random.random((nvars, nvars)) cls.cov_result = pandas.DataFrame(cls.cov_input, index=exog.columns, columns=exog.columns) cls.xnames = ["const", "x_1", "x_2"] cls.ynames = "y_1" cls.row_labels = cls.exog.index
def setupClass(cls): cls.endog = pandas.Series(np.random.random(10), name="y_1") exog = pandas.Series(np.random.random(10), name="x_1") cls.exog = exog cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 1 cls.col_input = np.random.random(nvars) cls.col_result = pandas.Series(cls.col_input, index=[exog.name]) cls.row_input = np.random.random(nrows) cls.row_result = pandas.Series(cls.row_input, index=exog.index) cls.cov_input = np.random.random((nvars, nvars)) cls.cov_result = pandas.DataFrame(cls.cov_input, index=[exog.name], columns=[exog.name]) cls.xnames = ["x_1"] cls.ynames = "y_1" cls.row_labels = cls.exog.index
def setup_class(cls): cls.endog = pd.DataFrame(np.random.random(10), columns=['y_1']) exog = pd.DataFrame(np.random.random((10, 2)), columns=['x_1', 'x_2']) exog.insert(0, 'const', 1) cls.exog = exog cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 3 cls.col_input = np.random.random(nvars) cls.col_result = pd.Series(cls.col_input, index=exog.columns) cls.row_input = np.random.random(nrows) cls.row_result = pd.Series(cls.row_input, index=exog.index) cls.cov_input = np.random.random((nvars, nvars)) cls.cov_result = pd.DataFrame(cls.cov_input, index=exog.columns, columns=exog.columns) cls.xnames = ['const', 'x_1', 'x_2'] cls.ynames = 'y_1' cls.row_labels = cls.exog.index
def setupClass(cls): cls.endog = pandas.Series(np.random.random(10), name='y_1') exog = pandas.Series(np.random.random(10), name='x_1') cls.exog = exog cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 1 cls.col_input = np.random.random(nvars) cls.col_result = pandas.Series(cls.col_input, index=[exog.name]) cls.row_input = np.random.random(nrows) cls.row_result = pandas.Series(cls.row_input, index=exog.index) cls.cov_input = np.random.random((nvars, nvars)) cls.cov_result = pandas.DataFrame(cls.cov_input, index=[exog.name], columns=[exog.name]) cls.xnames = ['x_1'] cls.ynames = 'y_1' cls.row_labels = cls.exog.index
def setup_class(cls): cls.endog = pd.DataFrame(np.random.random(10), columns=['y_1']) mi = pd.MultiIndex.from_product([['x'], ['1', '2']]) exog = pd.DataFrame(np.random.random((10, 2)), columns=mi) exog_flattened_idx = pd.Index(['const', 'x_1', 'x_2']) exog.insert(0, 'const', 1) cls.exog = exog cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 3 cls.col_input = np.random.random(nvars) cls.col_result = pd.Series(cls.col_input, index=exog_flattened_idx) cls.row_input = np.random.random(nrows) cls.row_result = pd.Series(cls.row_input, index=exog.index) cls.cov_input = np.random.random((nvars, nvars)) cls.cov_result = pd.DataFrame(cls.cov_input, index=exog_flattened_idx, columns=exog_flattened_idx) cls.xnames = ['const', 'x_1', 'x_2'] cls.ynames = 'y_1' cls.row_labels = cls.exog.index
def setup_class(cls): cls.endog = pandas.DataFrame(np.random.random(10), columns=['y_1']) mi = pandas.MultiIndex.from_product([['x'], ['1', '2']]) exog = pandas.DataFrame(np.random.random((10, 2)), columns=mi) exog_flattened_idx = pandas.Index(['const', 'x_1', 'x_2']) exog.insert(0, 'const', 1) cls.exog = exog cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 3 cls.col_input = np.random.random(nvars) cls.col_result = pandas.Series(cls.col_input, index=exog_flattened_idx) cls.row_input = np.random.random(nrows) cls.row_result = pandas.Series(cls.row_input, index=exog.index) cls.cov_input = np.random.random((nvars, nvars)) cls.cov_result = pandas.DataFrame(cls.cov_input, index=exog_flattened_idx, columns=exog_flattened_idx) cls.xnames = ['const', 'x_1', 'x_2'] cls.ynames = 'y_1' cls.row_labels = cls.exog.index
def setup_class(cls): cls.endog = np.random.random(10).tolist() exog = pandas.DataFrame(np.random.random((10,2)), columns=['x_1','x_2']) exog.insert(0, 'const', 1) cls.exog = exog cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 3 cls.col_input = np.random.random(nvars) cls.col_result = pandas.Series(cls.col_input, index=exog.columns) cls.row_input = np.random.random(nrows) cls.row_result = pandas.Series(cls.row_input, index=exog.index) cls.cov_input = np.random.random((nvars, nvars)) cls.cov_result = pandas.DataFrame(cls.cov_input, index = exog.columns, columns = exog.columns) cls.xnames = ['const', 'x_1', 'x_2'] cls.ynames = 'y' cls.row_labels = cls.exog.index
def setupClass(cls): cls.endog = pandas.DataFrame(np.random.random(10), columns=['y_1']) exog = pandas.DataFrame(np.random.random((10,2)), columns=['x1','x2']) # names mimic defaults exog.insert(0, 'const', 1) cls.exog = exog.values cls.data = sm_data.handle_data(cls.endog, cls.exog) nrows = 10 nvars = 3 cls.col_input = np.random.random(nvars) cls.col_result = pandas.Series(cls.col_input, index=exog.columns) cls.row_input = np.random.random(nrows) cls.row_result = pandas.Series(cls.row_input, index=exog.index) cls.cov_input = np.random.random((nvars, nvars)) cls.cov_result = pandas.DataFrame(cls.cov_input, index = exog.columns, columns = exog.columns) cls.xnames = ['const', 'x1', 'x2'] cls.ynames = 'y_1' cls.row_labels = cls.endog.index
def test_endog_only_drop(self): y = self.y y = y.dropna() data = sm_data.handle_data(self.y, None, 'drop') np.testing.assert_array_equal(data.endog, y.values)
def test_none(self): data = sm_data.handle_data(self.y, self.X, 'none') np.testing.assert_array_equal(data.endog, self.y.values) np.testing.assert_array_equal(data.exog, self.X.values)
def test_none(self): data = sm_data.handle_data(self.y, self.X, 'none', hasconst=False) np.testing.assert_array_equal(data.endog, self.y) np.testing.assert_array_equal(data.exog, self.X) assert data.k_constant == 0
def test_mv_endog(self): y = self.X y = y.loc[~np.isnan(y.values).any(axis=1)] data = sm_data.handle_data(self.X, None, 'drop') np.testing.assert_array_equal(data.endog, y.values)
def test_none(self): data = sm_data.handle_data(self.y, self.X, 'none', hasconst=False) np.testing.assert_array_equal(data.endog, self.y.values) np.testing.assert_array_equal(data.exog, self.X.values)
def test_endog_only_raise(self): with pytest.raises(Exception): # TODO: be more specific about exception sm_data.handle_data(self.y, None, 'raise')
def setupClass(cls): super(TestArrays2dEndog, cls).setupClass() cls.endog = np.random.random((10,1)) cls.exog = np.c_[np.ones(10), np.random.random((10,2))] cls.data = sm_data.handle_data(cls.endog, cls.exog)
def test_raise_no_missing(self): # GH#1700 sm_data.handle_data(pd.Series(np.random.random(20)), pd.DataFrame(np.random.random((20, 2))), 'raise')
def __init__(self, endog, exog, constraints=None, **kwargs): # Standardize data endog_using_pandas = _is_using_pandas(endog, None) if not endog_using_pandas: endog = np.asanyarray(endog) exog_is_using_pandas = _is_using_pandas(exog, None) if not exog_is_using_pandas: exog = np.asarray(exog) # Make sure we have 2-dimensional array if exog.ndim == 1: if not exog_is_using_pandas: exog = exog[:, None] else: exog = pd.DataFrame(exog) self.k_exog = exog.shape[1] # Handle constraints self.k_constraints = 0 self._r_matrix = self._q_matrix = None if constraints is not None: from patsy import DesignInfo from statsmodels.base.data import handle_data data = handle_data(endog, exog, **kwargs) names = data.param_names LC = DesignInfo(names).linear_constraint(constraints) self._r_matrix, self._q_matrix = LC.coefs, LC.constants self.k_constraints = self._r_matrix.shape[0] constraint_endog = np.zeros((len(endog), len(self._r_matrix))) if endog_using_pandas: constraint_endog = pd.DataFrame(constraint_endog, index=endog.index) endog = concat([endog, constraint_endog], axis=1) endog.values[:, 1:] = self._q_matrix[:, 0] else: endog[:, 1:] = self._q_matrix[:, 0] # Handle coefficient initialization kwargs.setdefault('initialization', 'diffuse') # Initialize the state space representation super(RecursiveLS, self).__init__( endog, k_states=self.k_exog, exog=exog, **kwargs) # Use univariate filtering by default self.ssm.filter_univariate = True # Concentrate the scale out of the likelihood function self.ssm.filter_concentrated = True # Setup the state space representation self['design'] = np.zeros((self.k_endog, self.k_states, self.nobs)) self['design', 0] = self.exog[:, :, None].T if self._r_matrix is not None: self['design', 1:, :] = self._r_matrix[:, :, None] self['transition'] = np.eye(self.k_states) # Notice that the filter output does not depend on the measurement # variance, so we set it here to 1 self['obs_cov', 0, 0] = 1. self['transition'] = np.eye(self.k_states) # Linear constraints are technically imposed by adding "fake" endog # variables that are used during filtering, but for all model- and # results-based purposes we want k_endog = 1. if self._r_matrix is not None: self.k_endog = 1
def test_raise_no_missing(self): # smoke test for #1700 sm_data.handle_data(pandas.Series(np.random.random(20)), pandas.DataFrame(np.random.random((20, 2))), 'raise')
def test_pandas_constant(self): exog = self.data.exog.copy() exog['const'] = 1 data = sm_data.handle_data(self.data.endog, exog) np.testing.assert_equal(data.k_constant, 1) np.testing.assert_equal(data.const_idx, 6)
def test_pandas_noconstant(self): exog = self.data.exog.copy() data = sm_data.handle_data(self.data.endog, exog) np.testing.assert_equal(data.k_constant, 0) np.testing.assert_equal(data.const_idx, None)
def setupClass(cls): super(TestLists, cls).setupClass() cls.endog = np.random.random(10).tolist() cls.exog = np.c_[np.ones(10), np.random.random((10,2))].tolist() cls.data = sm_data.handle_data(cls.endog, cls.exog)
def test_mv_endog(self): y = self.X y = y.ix[~np.isnan(y.values).any(axis=1)] data = sm_data.handle_data(self.X, None, 'drop') np.testing.assert_array_equal(data.endog, y.values)
def test_raise_no_missing(self): # smoke test for #1700 sm_data.handle_data(np.random.random(20), np.random.random((20, 2)), 'raise')
def test_labels(self): 2, 10, 14 labels = pandas.Index([0, 1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]) data = sm_data.handle_data(self.y, self.X, 'drop') np.testing.assert_(data.row_labels.equals(labels))
def test_extra_kwargs_1d(self): weights = np.random.random(25) data = sm_data.handle_data(self.y, self.X, 'drop', weights=weights) idx = ~np.isnan(np.c_[self.y, self.X]).any(axis=1) weights = weights[idx] np.testing.assert_array_equal(data.weights, weights)
def setupClass(cls): super(TestArrays2dEndog, cls).setupClass() cls.endog = np.random.random((10, 1)) cls.exog = np.c_[np.ones(10), np.random.random((10, 2))] cls.data = sm_data.handle_data(cls.endog, cls.exog)
def test_raise(self): with pytest.raises(Exception): # TODO: be more specific about exception sm_data.handle_data(self.y, self.X, 'raise')
def setupClass(cls): super(TestLists, cls).setupClass() cls.endog = np.random.random(10).tolist() cls.exog = np.c_[np.ones(10), np.random.random((10, 2))].tolist() cls.data = sm_data.handle_data(cls.endog, cls.exog)
def test_endog_only_drop(self): y = self.y y = y[~np.isnan(y)] data = sm_data.handle_data(self.y, None, 'drop') np.testing.assert_array_equal(data.endog, y)
def test_formula_missing_extra_arrays(): np.random.seed(1) # because patsy can't turn off missing data-handling as of 0.3.0, we need # separate tests to make sure that missing values are handled correctly # when going through formulas # there is a handle_formula_data step # then there is the regular handle_data step # see 2083 # the untested cases are endog/exog have missing. extra has missing. # endog/exog are fine. extra has missing. # endog/exog do or do not have missing and extra has wrong dimension y = np.random.randn(10) y_missing = y.copy() y_missing[[2, 5]] = np.nan X = np.random.randn(10) X_missing = X.copy() X_missing[[1, 3]] = np.nan weights = np.random.uniform(size=10) weights_missing = weights.copy() weights_missing[[6]] = np.nan weights_wrong_size = np.random.randn(12) data = {'y': y, 'X': X, 'y_missing': y_missing, 'X_missing': X_missing, 'weights': weights, 'weights_missing': weights_missing} data = pandas.DataFrame.from_dict(data) data['constant'] = 1 formula = 'y_missing ~ X_missing' ((endog, exog), missing_idx, design_info) = handle_formula_data(data, None, formula, depth=2, missing='drop') kwargs = {'missing_idx': missing_idx, 'missing': 'drop', 'weights': data['weights_missing']} model_data = sm_data.handle_data(endog, exog, **kwargs) data_nona = data.dropna() assert_equal(data_nona['y'].values, model_data.endog) assert_equal(data_nona[['constant', 'X']].values, model_data.exog) assert_equal(data_nona['weights'].values, model_data.weights) tmp = handle_formula_data(data, None, formula, depth=2, missing='drop') (endog, exog), missing_idx, design_info = tmp weights_2d = np.random.randn(10, 10) weights_2d[[8, 7], [7, 8]] = np.nan #symmetric missing values kwargs.update({'weights': weights_2d, 'missing_idx': missing_idx}) model_data2 = sm_data.handle_data(endog, exog, **kwargs) good_idx = [0, 4, 6, 9] assert_equal(data.loc[good_idx, 'y'], model_data2.endog) assert_equal(data.loc[good_idx, ['constant', 'X']], model_data2.exog) assert_equal(weights_2d[good_idx][:, good_idx], model_data2.weights) tmp = handle_formula_data(data, None, formula, depth=2, missing='drop') (endog, exog), missing_idx, design_info = tmp kwargs.update({'weights': weights_wrong_size, 'missing_idx': missing_idx}) assert_raises(ValueError, sm_data.handle_data, endog, exog, **kwargs)
def test_none(self): data = sm_data.handle_data(self.y, self.X, 'none', hasconst=False) np.testing.assert_array_equal(data.endog, self.y) np.testing.assert_array_equal(data.exog, self.X)
def test_array_noconstant(self): exog = self.data.exog.copy() data = sm_data.handle_data(self.data.endog.values, exog.values) np.testing.assert_equal(data.k_constant, 0) np.testing.assert_equal(data.const_idx, None)
def test_none(self): data = sm_data.handle_data(self.y, self.X, 'none') np.testing.assert_array_equal(data.endog, self.y) np.testing.assert_array_equal(data.exog, self.X)
def test_formula_missing_extra_arrays(): np.random.seed(1) # because patsy can't turn off missing data-handling as of 0.3.0, we need # separate tests to make sure that missing values are handled correctly # when going through formulas # there is a handle_formula_data step # then there is the regular handle_data step # see 2083 # the untested cases are endog/exog have missing. extra has missing. # endog/exog are fine. extra has missing. # endog/exog do or do not have missing and extra has wrong dimension y = np.random.randn(10) y_missing = y.copy() y_missing[[2, 5]] = np.nan X = np.random.randn(10) X_missing = X.copy() X_missing[[1, 3]] = np.nan weights = np.random.uniform(size=10) weights_missing = weights.copy() weights_missing[[6]] = np.nan weights_wrong_size = np.random.randn(12) data = { 'y': y, 'X': X, 'y_missing': y_missing, 'X_missing': X_missing, 'weights': weights, 'weights_missing': weights_missing } data = pandas.DataFrame.from_dict(data) data['constant'] = 1 formula = 'y_missing ~ X_missing' ((endog, exog), missing_idx, design_info) = handle_formula_data(data, None, formula, depth=2, missing='drop') kwargs = { 'missing_idx': missing_idx, 'missing': 'drop', 'weights': data['weights_missing'] } model_data = sm_data.handle_data(endog, exog, **kwargs) data_nona = data.dropna() assert_equal(data_nona['y'].values, model_data.endog) assert_equal(data_nona[['constant', 'X']].values, model_data.exog) assert_equal(data_nona['weights'].values, model_data.weights) tmp = handle_formula_data(data, None, formula, depth=2, missing='drop') (endog, exog), missing_idx, design_info = tmp weights_2d = np.random.randn(10, 10) weights_2d[[8, 7], [7, 8]] = np.nan #symmetric missing values kwargs.update({'weights': weights_2d, 'missing_idx': missing_idx}) model_data2 = sm_data.handle_data(endog, exog, **kwargs) good_idx = [0, 4, 6, 9] assert_equal(data.loc[good_idx, 'y'], model_data2.endog) assert_equal(data.loc[good_idx, ['constant', 'X']], model_data2.exog) assert_equal(weights_2d[good_idx][:, good_idx], model_data2.weights) tmp = handle_formula_data(data, None, formula, depth=2, missing='drop') (endog, exog), missing_idx, design_info = tmp kwargs.update({'weights': weights_wrong_size, 'missing_idx': missing_idx}) assert_raises(ValueError, sm_data.handle_data, endog, exog, **kwargs)
def _handle_data(self, X, missing='none'): data = handle_data(X, None, missing, 0) self.exog = None #TODO: remove this when we don't inherit from LLM self.endog = data.endog #TODO: ditto return data