def test_MICE1(self): df = gendat() imp_data = mice.MICEData(df) mi = mice.MICE("y ~ x1 + x2 + x1:x2", sm.OLS, imp_data) from statsmodels.regression.linear_model import RegressionResultsWrapper for j in range(3): x = mi.next_sample() assert (issubclass(x.__class__, RegressionResultsWrapper))
def test_MICE(self): df = gendat() imp_data = mice.MICEData(df) mi = mice.MICE("y ~ x1 + x2 + x1:x2", sm.OLS, imp_data) result = mi.fit(1, 3) assert (issubclass(result.__class__, mice.MICEResults)) # Smoke test for results smr = result.summary()
def test_MICE2(self): from statsmodels.genmod.generalized_linear_model import GLMResultsWrapper df = gendat() imp_data = mice.MICEData(df) mi = mice.MICE("x3 ~ x1 + x2", sm.GLM, imp_data, init_kwds={"family": sm.families.Binomial()}) for j in range(3): x = mi.next_sample() assert(isinstance(x, GLMResultsWrapper)) assert(isinstance(x.family, sm.families.Binomial))
def imputation_mice(self, var_name, reddy_mice, reddy_info): # not sure if this var_data is doing anything var_data = reddy_mice.copy() print("How many null values in " + var_name + " to change? " + str(reddy_info[var_name].isnull().sum())) imp_var = mice.MICEData(var_data) # create formula: var_name ~ sum(other_vars) other_vars = reddy_mice.loc[:, reddy_mice.columns != var_name].columns fml_var = var_name + ' ~' + ' +'.join(' {0}'.format(var) for var in other_vars) # perform mice imputation mice_var = mice.MICE(fml_var, lm.OLS, imp_var) results = mice_var.fit(10,10) # fit(#cycles to skip, #datasets to impute) reddy_info[var_name] = mice_var.data.data[var_name].values if reddy_info[var_name].isnull().sum() != 0: raise AssertionError ("All values could not be imputed.") else: #print(reddy_info[var_name].isnull().sum()) print(var_name + " successfully imputed") return reddy_info
def test_combine(self): np.random.seed(3897) x1 = np.random.normal(size=300) x2 = np.random.normal(size=300) y = x1 + x2 + np.random.normal(size=300) x1[0:100] = np.nan x2[250:] = np.nan df = pd.DataFrame({"x1": x1, "x2": x2, "y": y}) idata = mice.MICEData(df) mi = mice.MICE("y ~ x1 + x2", sm.OLS, idata, n_skip=20) result = mi.fit(10, 20) fmi = np.asarray([0.1920533, 0.1587287, 0.33174032]) assert_allclose(result.frac_miss_info, fmi, atol=1e-5) params = np.asarray([-0.05397474, 0.97273307, 1.01652293]) assert_allclose(result.params, params, atol=1e-5) tvalues = np.asarray([-0.84781698, 15.10491582, 13.59998039]) assert_allclose(result.tvalues, tvalues, atol=1e-5)
def test_combine(self): np.random.seed(3897) x1 = np.random.normal(size=300) x2 = np.random.normal(size=300) y = x1 + x2 + np.random.normal(size=300) x1[0:100] = np.nan x2[250:] = np.nan df = pd.DataFrame({"x1": x1, "x2": x2, "y": y}) idata = mice.MICEData(df) mi = mice.MICE("y ~ x1 + x2", sm.OLS, idata, n_skip=20) result = mi.fit(10, 20) fmi = np.asarray([0.1778143, 0.11057262, 0.29626521]) assert_allclose(result.frac_miss_info, fmi, atol=1e-5) params = np.asarray([-0.03486102, 0.96236808, 0.9970371]) assert_allclose(result.params, params, atol=1e-5) tvalues = np.asarray([-0.54674776, 15.28091069, 13.61359403]) assert_allclose(result.tvalues, tvalues, atol=1e-5)
def Interpolation_mice(df: pd.DataFrame) -> pd.DataFrame: imp = mice.MICEData(df) fml = df.columns[0] + " ~ " + df.columns[1] for i in range(2, len(df.columns)): fml += " + " + df.columns[i] # fml = 'y ~ x1 + x2 + x3 + x4' mi = mice.MICE(fml, sm.OLS, imp) results = mi.fit(10, 10) dm = imp.next_sample() # dm.to_csv("data_mice_10.csv") # results = [] # for k in range(10): # x = mi.next_sample() # results.append(x) # TODO: # results到底怎么个结果? # FINISHED return dm
chmiss).fit() (ilogit(lmodr.predict(chmiss))*100)[mv] # chredlin.race.iloc[np.where(chmiss.race.isna())] # ## Multiple Imputation # import statsmodels.imputation.mice as smi imp = smi.MICEData(chmiss) fm = 'involact ~ race + fire + theft + age + np.log(income)' mmod = smi.MICE(fm, sm.OLS, imp) results = mmod.fit(10, 50) print(results.summary()) # ## Discussion # ## Exercises # ## Packages Used import sys import matplotlib import statsmodels as sm import seaborn as sns print("Python version:{}".format(sys.version)) print("matplotlib version: {}".format(matplotlib.__version__))
# -*- coding: utf-8 -*- """ Created on Sun Sep 1 23:39:09 2019 @author: 92156 """ from statsmodels.imputation import mice imp = mice.MICEData(data) >>> fml = 'y ~ x1 + x2 + x3 + x4' >>> mice = mice.MICE(fml, sm.OLS, imp) >>> results = mice.fit(10, 10) >>> print(results.summary())