示例#1
0
    def test_MICE1(self):

        df = gendat()
        imp_data = mice.MICEData(df)
        mi = mice.MICE("y ~ x1 + x2 + x1:x2", sm.OLS, imp_data)

        from statsmodels.regression.linear_model import RegressionResultsWrapper

        for j in range(3):
            x = mi.next_sample()
            assert (issubclass(x.__class__, RegressionResultsWrapper))
示例#2
0
    def test_MICE(self):

        df = gendat()
        imp_data = mice.MICEData(df)
        mi = mice.MICE("y ~ x1 + x2 + x1:x2", sm.OLS, imp_data)
        result = mi.fit(1, 3)

        assert (issubclass(result.__class__, mice.MICEResults))

        # Smoke test for results
        smr = result.summary()
示例#3
0
    def test_MICE2(self):

        from statsmodels.genmod.generalized_linear_model import GLMResultsWrapper

        df = gendat()
        imp_data = mice.MICEData(df)
        mi = mice.MICE("x3 ~ x1 + x2", sm.GLM, imp_data,
                       init_kwds={"family": sm.families.Binomial()})

        for j in range(3):
            x = mi.next_sample()
            assert(isinstance(x, GLMResultsWrapper))
            assert(isinstance(x.family, sm.families.Binomial))
 def imputation_mice(self, var_name, reddy_mice, reddy_info):
     # not sure if this var_data is doing anything
     var_data = reddy_mice.copy()
     print("How many null values in " + var_name + " to change? " + str(reddy_info[var_name].isnull().sum()))
     imp_var = mice.MICEData(var_data)
     # create formula: var_name ~ sum(other_vars)
     other_vars = reddy_mice.loc[:, reddy_mice.columns != var_name].columns
     fml_var = var_name + ' ~' + ' +'.join(' {0}'.format(var) for var in other_vars)
     # perform mice imputation
     mice_var = mice.MICE(fml_var, lm.OLS, imp_var)
     results = mice_var.fit(10,10) # fit(#cycles to skip, #datasets to impute)
     reddy_info[var_name] = mice_var.data.data[var_name].values
     if reddy_info[var_name].isnull().sum() != 0:
         raise AssertionError ("All values could not be imputed.")
     else:
         #print(reddy_info[var_name].isnull().sum())
         print(var_name + " successfully imputed")
     return reddy_info
示例#5
0
    def test_combine(self):

        np.random.seed(3897)
        x1 = np.random.normal(size=300)
        x2 = np.random.normal(size=300)
        y = x1 + x2 + np.random.normal(size=300)
        x1[0:100] = np.nan
        x2[250:] = np.nan
        df = pd.DataFrame({"x1": x1, "x2": x2, "y": y})
        idata = mice.MICEData(df)
        mi = mice.MICE("y ~ x1 + x2", sm.OLS, idata, n_skip=20)
        result = mi.fit(10, 20)

        fmi = np.asarray([0.1920533, 0.1587287, 0.33174032])
        assert_allclose(result.frac_miss_info, fmi, atol=1e-5)

        params = np.asarray([-0.05397474, 0.97273307, 1.01652293])
        assert_allclose(result.params, params, atol=1e-5)

        tvalues = np.asarray([-0.84781698, 15.10491582, 13.59998039])
        assert_allclose(result.tvalues, tvalues, atol=1e-5)
示例#6
0
    def test_combine(self):

        np.random.seed(3897)
        x1 = np.random.normal(size=300)
        x2 = np.random.normal(size=300)
        y = x1 + x2 + np.random.normal(size=300)
        x1[0:100] = np.nan
        x2[250:] = np.nan
        df = pd.DataFrame({"x1": x1, "x2": x2, "y": y})
        idata = mice.MICEData(df)
        mi = mice.MICE("y ~ x1 + x2", sm.OLS, idata, n_skip=20)
        result = mi.fit(10, 20)

        fmi = np.asarray([0.1778143, 0.11057262, 0.29626521])
        assert_allclose(result.frac_miss_info, fmi, atol=1e-5)

        params = np.asarray([-0.03486102, 0.96236808, 0.9970371])
        assert_allclose(result.params, params, atol=1e-5)

        tvalues = np.asarray([-0.54674776, 15.28091069, 13.61359403])
        assert_allclose(result.tvalues, tvalues, atol=1e-5)
示例#7
0
def Interpolation_mice(df: pd.DataFrame) -> pd.DataFrame:
    imp = mice.MICEData(df)
    fml = df.columns[0] + " ~ " + df.columns[1]
    for i in range(2, len(df.columns)):
        fml += " + " + df.columns[i]
    # fml = 'y ~ x1 + x2 + x3 + x4'
    mi = mice.MICE(fml, sm.OLS, imp)
    results = mi.fit(10, 10)
    dm = imp.next_sample()

    # dm.to_csv("data_mice_10.csv")

    # results = []
    # for k in range(10):
    #    x = mi.next_sample()
    #    results.append(x)

    # TODO:
    # results到底怎么个结果?
    # FINISHED
    return dm
示例#8
0
    chmiss).fit()
(ilogit(lmodr.predict(chmiss))*100)[mv]


#	

chredlin.race.iloc[np.where(chmiss.race.isna())]


# ## Multiple Imputation
#	

import statsmodels.imputation.mice as smi
imp = smi.MICEData(chmiss)
fm = 'involact ~ race + fire + theft + age + np.log(income)'
mmod = smi.MICE(fm, sm.OLS, imp)
results = mmod.fit(10, 50)
print(results.summary())


# ## Discussion
# ## Exercises

# ## Packages Used

import sys
import matplotlib
import statsmodels as sm
import seaborn as sns
print("Python version:{}".format(sys.version))
print("matplotlib version: {}".format(matplotlib.__version__))
示例#9
0
# -*- coding: utf-8 -*-
"""
Created on Sun Sep  1 23:39:09 2019

@author: 92156
"""

from statsmodels.imputation import mice
imp = mice.MICEData(data)
>>> fml = 'y ~ x1 + x2 + x3 + x4'
>>> mice = mice.MICE(fml, sm.OLS, imp)
>>> results = mice.fit(10, 10)
>>> print(results.summary())