def rlm_date(x,y):
    from statsmodels.robust.robust_linear_model import RLM
    x2 = sm.add_constant(date2num(x))
    rlmmodel = RLM(y,x2)
    rlmresults = rlmmodel.fit()

    return x,rlmresults.fittedvalues,N.array(rlmresults.pvalues).mean()
示例#2
0
def test_rlm_start_values():
    data = sm.datasets.stackloss.load_pandas()
    exog = sm.add_constant(data.exog, prepend=False)
    model = RLM(data.endog, exog, M=norms.HuberT())
    results = model.fit()
    start_params = [0.7156402, 1.29528612, -0.15212252, -39.91967442]
    result_sv = model.fit(start_params=start_params)
    assert_allclose(results.params, result_sv.params)
示例#3
0
def test_bad_criterion():
    data = load_stackloss()
    data.exog = np.asarray(data.exog)
    data.endog = np.asarray(data.endog)
    data.exog = sm.add_constant(data.exog, prepend=False)
    mod = RLM(data.endog, data.exog, M=norms.HuberT())
    with pytest.raises(ValueError, match='Convergence argument unknown'):
        mod.fit(conv='unknown')
示例#4
0
    def setup_class(cls):
        super(TestRlmAndrewsHuber, cls).setup_class()

        model = RLM(cls.data.endog, cls.data.exog, M=norms.AndrewWave())
        results = model.fit(scale_est=HuberScale())
        h2 = model.fit(cov="H2", scale_est=HuberScale()).bcov_scaled
        h3 = model.fit(cov="H3", scale_est=HuberScale()).bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#5
0
    def setup_class(cls):
        super(TestRlmBisquareHuber, cls).setup_class()

        model = RLM(cls.data.endog, cls.data.exog, M=norms.TukeyBiweight())
        results = model.fit(scale_est=HuberScale())
        h2 = model.fit(cov="H2", scale_est=HuberScale()).bcov_scaled
        h3 = model.fit(cov="H3", scale_est=HuberScale()).bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#6
0
    def setup_class(cls):
        super(TestRlmAndrewsHuber, cls).setup_class()

        model = RLM(cls.data.endog, cls.data.exog, M=norms.AndrewWave())
        results = model.fit(scale_est=HuberScale())
        h2 = model.fit(cov="H2", scale_est=HuberScale()).bcov_scaled
        h3 = model.fit(cov="H3", scale_est=HuberScale()).bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#7
0
    def setup_class(cls):
        super(TestRlmBisquareHuber, cls).setup_class()

        model = RLM(cls.data.endog, cls.data.exog, M=norms.TukeyBiweight())
        results = model.fit(scale_est=HuberScale())
        h2 = model.fit(cov="H2", scale_est=HuberScale()).bcov_scaled
        h3 = model.fit(cov="H3", scale_est=HuberScale()).bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#8
0
    def setup_class(cls):
        cls.data = load_stackloss()
        cls.data.exog = sm.add_constant(cls.data.exog, prepend=False)

        model = RLM(cls.data.endog, cls.data.exog, M=norms.HuberT())
        results = model.fit(scale_est=HuberScale())
        h2 = model.fit(cov="H2", scale_est=HuberScale()).bcov_scaled
        h3 = model.fit(cov="H3", scale_est=HuberScale()).bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#9
0
    def setup_class(cls):
        from statsmodels.datasets.stackloss import load
        cls.data = load(as_pandas=False)
        cls.data.exog = sm.add_constant(cls.data.exog, prepend=False)

        model = RLM(cls.data.endog, cls.data.exog, M=norms.HuberT())
        results = model.fit(scale_est=HuberScale())
        h2 = model.fit(cov="H2", scale_est=HuberScale()).bcov_scaled
        h3 = model.fit(cov="H3", scale_est=HuberScale()).bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#10
0
 def __init__(self):
     results = RLM(self.data.endog, self.data.exog,
                 M=sm.robust.norms.AndrewWave()).fit()
     h2 = RLM(self.data.endog, self.data.exog,
                 M=sm.robust.norms.AndrewWave()).fit(cov=\
                 "H2").bcov_scaled
     h3 = RLM(self.data.endog, self.data.exog,
                 M=sm.robust.norms.AndrewWave()).fit(cov=\
                 "H3").bcov_scaled
     self.res1 = results
     self.res1.h2 = h2
     self.res1.h3 = h3
示例#11
0
    def setup_class(cls):
        super(TestRlmBisquare, cls).setup_class()
        # Test precisions
        cls.decimal_standarderrors = DECIMAL_1

        model = RLM(cls.data.endog, cls.data.exog, M=norms.TukeyBiweight())
        results = model.fit()
        h2 = model.fit(cov="H2").bcov_scaled
        h3 = model.fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#12
0
    def setup_class(cls):
        super(TestRlmBisquare, cls).setup_class()
        # Test precisions
        cls.decimal_standarderrors = DECIMAL_1

        model = RLM(cls.data.endog, cls.data.exog, M=norms.TukeyBiweight())
        results = model.fit()
        h2 = model.fit(cov="H2").bcov_scaled
        h3 = model.fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#13
0
 def setup_class(cls):
     super(TestRlmAndrews, cls).setup_class()
     results = RLM(cls.data.endog, cls.data.exog,
                 M=sm.robust.norms.AndrewWave()).fit()
     h2 = RLM(cls.data.endog, cls.data.exog,
                 M=sm.robust.norms.AndrewWave()).fit(cov=\
                 "H2").bcov_scaled
     h3 = RLM(cls.data.endog, cls.data.exog,
                 M=sm.robust.norms.AndrewWave()).fit(cov=\
                 "H3").bcov_scaled
     cls.res1 = results
     cls.res1.h2 = h2
     cls.res1.h3 = h3
示例#14
0
 def __init__(self):
     results = RLM(self.data.endog, self.data.exog,\
                 M=sm.robust.norms.HuberT()).fit(scale_est=\
                 sm.robust.scale.HuberScale())
     h2 = RLM(self.data.endog, self.data.exog,\
                 M=sm.robust.norms.HuberT()).fit(cov="H2",
                 scale_est=sm.robust.scale.HuberScale()).bcov_scaled
     h3 = RLM(self.data.endog, self.data.exog,\
                 M=sm.robust.norms.HuberT()).fit(cov="H3",
                 scale_est=sm.robust.scale.HuberScale()).bcov_scaled
     self.res1 = results
     self.res1.h2 = h2
     self.res1.h3 = h3
示例#15
0
    def __init__(self):
        # Test precisions
        self.decimal_standarderrors = DECIMAL_1
        self.decimal_scale = DECIMAL_3

        results = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(conv='sresid') # default M
        h2 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H2").bcov_scaled
        h3 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H3").bcov_scaled
        self.res1 = results
        self.res1.h2 = h2
        self.res1.h3 = h3
示例#16
0
    def setup_class(cls):
        cls.data = load_stackloss()  # class attributes for subclasses
        cls.data.exog = sm.add_constant(cls.data.exog, prepend=False)
        # Test precisions
        cls.decimal_standarderrors = DECIMAL_1
        cls.decimal_scale = DECIMAL_3

        model = RLM(cls.data.endog, cls.data.exog, M=norms.HuberT())
        results = model.fit(conv='sresid')
        h2 = model.fit(cov="H2").bcov_scaled
        h3 = model.fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#17
0
    def setup_class(cls):
        super(TestHampel, cls).setup_class()
        # Test precisions
        cls.decimal_standarderrors = DECIMAL_2
        cls.decimal_scale = DECIMAL_3
        cls.decimal_bcov_scaled = DECIMAL_3

        model = RLM(cls.data.endog, cls.data.exog, M=norms.Hampel())
        results = model.fit()
        h2 = model.fit(cov="H2").bcov_scaled
        h3 = model.fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#18
0
    def setup_class(cls):
        super(TestHampel, cls).setup_class()
        # Test precisions
        cls.decimal_standarderrors = DECIMAL_2
        cls.decimal_scale = DECIMAL_3
        cls.decimal_bcov_scaled = DECIMAL_3

        model = RLM(cls.data.endog, cls.data.exog, M=norms.Hampel())
        results = model.fit()
        h2 = model.fit(cov="H2").bcov_scaled
        h3 = model.fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#19
0
    def setup_class(cls):
        from statsmodels.datasets.stackloss import load
        cls.data = load(as_pandas=False)  # class attributes for subclasses
        cls.data.exog = sm.add_constant(cls.data.exog, prepend=False)
        # Test precisions
        cls.decimal_standarderrors = DECIMAL_1
        cls.decimal_scale = DECIMAL_3

        model = RLM(cls.data.endog, cls.data.exog, M=norms.HuberT())
        results = model.fit(conv='sresid')
        h2 = model.fit(cov="H2").bcov_scaled
        h3 = model.fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#20
0
    def __init__(self):
        # Test precisions
        self.decimal_standarderrors = DECIMAL_2
        self.decimal_scale = DECIMAL_3
        self.decimal_bcov_scaled = DECIMAL_3

        results = RLM(self.data.endog, self.data.exog,
                    M=sm.robust.norms.Hampel()).fit()
        h2 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.Hampel()).fit(cov="H2").bcov_scaled
        h3 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.Hampel()).fit(cov="H3").bcov_scaled
        self.res1 = results
        self.res1.h2 = h2
        self.res1.h3 = h3
示例#21
0
    def __init__(self):
        # Test precisions
        self.decimal_standarderrors = DECIMAL_1

        results = RLM(self.data.endog, self.data.exog,
                    M=sm.robust.norms.TukeyBiweight()).fit()
        h2 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.TukeyBiweight()).fit(cov=\
                    "H2").bcov_scaled
        h3 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.TukeyBiweight()).fit(cov=\
                    "H3").bcov_scaled
        self.res1 = results
        self.res1.h2 = h2
        self.res1.h3 = h3
示例#22
0
 def setup_class(cls):
     super(TestHampelHuber, cls).setup_class()
     results = RLM(cls.data.endog, cls.data.exog,
                 M=sm.robust.norms.Hampel()).fit(scale_est=\
                 sm.robust.scale.HuberScale())
     h2 = RLM(cls.data.endog, cls.data.exog,\
                 M=sm.robust.norms.Hampel()).fit(cov="H2",
                 scale_est=\
                 sm.robust.scale.HuberScale()).bcov_scaled
     h3 = RLM(cls.data.endog, cls.data.exog,\
                 M=sm.robust.norms.Hampel()).fit(cov="H3",
                 scale_est=\
                 sm.robust.scale.HuberScale()).bcov_scaled
     cls.res1 = results
     cls.res1.h2 = h2
     cls.res1.h3 = h3
示例#23
0
 def setup_class(cls):
     from statsmodels.datasets.stackloss import load
     cls.data = load(as_pandas=False)
     cls.data.exog = sm.add_constant(cls.data.exog, prepend=False)
     results = RLM(cls.data.endog, cls.data.exog,\
                 M=sm.robust.norms.HuberT()).fit(scale_est=\
                 sm.robust.scale.HuberScale())
     h2 = RLM(cls.data.endog, cls.data.exog,\
                 M=sm.robust.norms.HuberT()).fit(cov="H2",
                 scale_est=sm.robust.scale.HuberScale()).bcov_scaled
     h3 = RLM(cls.data.endog, cls.data.exog,\
                 M=sm.robust.norms.HuberT()).fit(cov="H3",
                 scale_est=sm.robust.scale.HuberScale()).bcov_scaled
     cls.res1 = results
     cls.res1.h2 = h2
     cls.res1.h3 = h3
示例#24
0
    def setup_class(cls):
        from statsmodels.datasets.stackloss import load
        cls.data = load(as_pandas=False)  # class attributes for subclasses
        cls.data.exog = sm.add_constant(cls.data.exog, prepend=False)
        # Test precisions
        cls.decimal_standarderrors = DECIMAL_1
        cls.decimal_scale = DECIMAL_3

        model = RLM(cls.data.endog, cls.data.exog, M=norms.HuberT())
        cls.model = model
        results = model.fit()
        h2 = model.fit(cov="H2").bcov_scaled
        h3 = model.fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#25
0
 def setup_class(cls):
     super(TestRlmBisquareHuber, cls).setup_class()
     results = RLM(cls.data.endog, cls.data.exog,
                 M=sm.robust.norms.TukeyBiweight()).fit(\
                 scale_est=\
                 sm.robust.scale.HuberScale())
     h2 = RLM(cls.data.endog, cls.data.exog,\
                 M=sm.robust.norms.TukeyBiweight()).fit(cov=\
                 "H2", scale_est=\
                 sm.robust.scale.HuberScale()).bcov_scaled
     h3 = RLM(cls.data.endog, cls.data.exog,\
                 M=sm.robust.norms.TukeyBiweight()).fit(cov=\
                 "H3", scale_est=\
                 sm.robust.scale.HuberScale()).bcov_scaled
     cls.res1 = results
     cls.res1.h2 = h2
     cls.res1.h3 = h3
示例#26
0
    def setup_class(cls):
        from statsmodels.datasets.stackloss import load
        cls.data = load()  # class attributes for subclasses
        cls.data.exog = sm.add_constant(cls.data.exog, prepend=False)
        # Test precisions
        cls.decimal_standarderrors = DECIMAL_1
        cls.decimal_scale = DECIMAL_3

        results = RLM(cls.data.endog, cls.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(conv='sresid') # default M
        h2 = RLM(cls.data.endog, cls.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H2").bcov_scaled
        h3 = RLM(cls.data.endog, cls.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H3").bcov_scaled
        cls.res1 = results
        cls.res1.h2 = h2
        cls.res1.h3 = h3
示例#27
0
def fit_velocity(dist, tmax1, plot=True):
    #    rlm = RLM(dist/1000 , np.abs(tmax1))
    #    rlm = RLM(np.abs(tmax1), dist/1000, M=sm.robust.norms.Hampel(1, 2, 4))
    s = dist / 1000
    t = np.abs(tmax1)
    o = np.ones(len(t))
    #    rlm = RLM(d/t, np.ones(len(d)), M=sm.robust.norms.Hampel(1, 2, 4))
    rlm = RLM(s / t, o, M=sm.robust.norms.Hampel(1, 2, 4))
    res = rlm.fit(maxiter=100)
    v = res.params[0]
    w = res.weights
    #    scale = res.scale
    #    v = np.median(s/t)
    from statsmodels.robust.scale import mad
    scale = mad(s / t, center=v, c=1)
    tmax = np.max(s) / v
    if plot:
        fig = plt.figure()
        ax = fig.add_subplot(121)
        ax.scatter(tmax1, dist)
        ax.plot((-tmax, 0, tmax), (np.max(dist), 0, np.max(dist)))
        ax2 = fig.add_subplot(122)
        ax2.hist(dist / 1000 / np.abs(tmax1), bins=np.linspace(3, 5, 21))
    return v, w, scale
示例#28
0
def test_rlm_start_values_errors():
    data = sm.datasets.stackloss.load_pandas()
    exog = sm.add_constant(data.exog, prepend=False)
    model = RLM(data.endog, exog, M=norms.HuberT())
    start_params = [0.7156402, 1.29528612, -0.15212252]
    with pytest.raises(ValueError):
        model.fit(start_params=start_params)

    start_params = np.array([start_params, start_params]).T
    with pytest.raises(ValueError):
        model.fit(start_params=start_params)
示例#29
0
文件: util.py 项目: mlaaten/qopen
def robust_stat(data, axis=None, fall_back=5):
    """Robust mean and mean absolute deviation

    See also:
    statsmodels.sf.net/stable/rlm.html
    """
    data = np.ma.masked_invalid(data)
    assert len(data.shape) < 3
    if axis is not None:
        if axis == 0:
            data = data.T
        mean = np.empty(data.shape[0])
        err = np.empty(data.shape[0])
        for i, d in enumerate(data):
            mean[i], err[i] = robust_stat(d)
        return mean, err
    assert len(data.shape) < 2
    data = data[~data.mask]
    if len(data) < fall_back:
        return weighted_stat(data)
    res = RLM(data, np.ones(len(data))).fit()
    return res.params[0], res.scale
示例#30
0
def test_bad_criterion():
    data = sm.datasets.stackloss.load(as_pandas=True)
    data.exog = sm.add_constant(data.exog, prepend=False)
    mod = RLM(data.endog, data.exog, M=norms.HuberT())
    with pytest.raises(ValueError, match='Convergence argument unknown'):
        mod.fit(conv='unknown')
#se_gau_x,se_gau_y = gaussian_point_smooth(N.array(middate[wse]),N.array(d.mb[wse]))
#sc_gau_x,sc_gau_y = gaussian_point_smooth(N.array(middate[wsc]),N.array(d.mb[wsc]))
#in_gau_x,in_gau_y = gaussian_point_smooth(N.array(middate[win]),N.array(d.mb[win]))

se_gau_x,se_gau_y = loess_point_smooth(N.array(middate[wse]),N.array(d.mb[wse]),frac=1)
sc_gau_x,sc_gau_y = loess_point_smooth(N.array(middate[wsc]),N.array(d.mb[wsc]),frac=1)
in_gau_x,in_gau_y = loess_point_smooth(N.array(middate[win]),N.array(d.mb[win]),frac=1)

#se_gau_x,se_gau_y,se_p = rlm_date(N.array(middate[wse]),N.array(d.mb[wse]))#loess_point_smooth(N.array(middate[wse]),N.array(d.mb[wse]),frac=0.7)
#sc_gau_x,sc_gau_y,sc_p = rlm_date(N.array(middate[wsc]),N.array(d.mb[wsc]))#loess_point_smooth(N.array(middate[wsc]),N.array(d.mb[wsc]),frac=0.7)
#in_gau_x,in_gau_y,in_p = rlm_date(N.array(middate[win]),N.array(d.mb[win]))#loess_point_smooth(N.array(middate[win]),N.array(d.mb[win]),frac=0.7)


N.array(date2num(middate[wse]))
from statsmodels.robust.robust_linear_model import RLM
rlmmodel = RLM(N.array(d.mb[wse]),N.array(date2num(middate[wse])))
rlmresults = rlmmodel.fit()


#FIGURE SETUP
fig = plt.figure(figsize=[4,7])
#ax1 = fig.add_subplot(414)
#ax2 = fig.add_subplot(413)
#ax3 = fig.add_subplot(412)
#ax4 = fig.add_subplot(411)

ax1 = plt.subplot2grid((8,1), (6, 0), rowspan=2)
ax2 = plt.subplot2grid((8,1), (4, 0), rowspan=2)
ax3 = plt.subplot2grid((8,1), (2, 0), rowspan=2)
ax4 = plt.subplot2grid((8,1), (0, 0), rowspan=2)
#ax5 = plt.subplot2grid((9,1), (2, 0))
示例#32
0
def test_perfect_fit(perfect_fit_data, norm):
    res = RLM(perfect_fit_data.endog, perfect_fit_data.exog, M=norm).fit()
    assert_allclose(res.params, np.array([0, 1, 1]), atol=1e-8)
示例#33
0
def test_perfect_const(perfect_fit_data, norm):
    res = RLM(perfect_fit_data.const, perfect_fit_data.exog, M=norm).fit()
    assert_allclose(res.params, np.array([3.2, 0, 0]), atol=1e-8)
示例#34
0
def test_perfect_const(perfect_fit_data, norm):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        res = RLM(perfect_fit_data.const, perfect_fit_data.exog, M=norm).fit()
    assert_allclose(res.params, np.array([3.2, 0, 0]), atol=1e-8)
示例#35
0
def test_alt_criterion(conv):
    data = sm.datasets.stackloss.load(as_pandas=True)
    data.exog = sm.add_constant(data.exog, prepend=False)
    base = RLM(data.endog, data.exog, M=norms.HuberT()).fit()
    alt = RLM(data.endog, data.exog, M=norms.HuberT()).fit(conv=conv)
    assert_allclose(base.params, alt.params)
示例#36
0
import matplotlib.pyplot as plt 
import statsmodels.api as sm

fig, ax = plt.subplots(figsize=(12,8))
#decorations
hats=LinRegEPI.get_influence().hat_diag_factor
ax.axhspan(-2,2,edgecolor='r',facecolor='g',fill=True,ls='--',alpha=0.1) #outside box residuals warning
ax.axvline(2*hats.mean(),ls='dashed',c='r')#vertical line for leverage warning

# main plot
fig = sm.graphics.influence_plot(LinRegEPI,  ax = ax, criterion="cooks")
#%%
from statsmodels.robust.robust_linear_model import RLM

formula='environment ~ corruptionIndex + scoreEconomy + scorepressOK'
LinRegEPI_R = RLM.from_formula(formula, indexes).fit()
#print(LinRegEPI_R.summary())
#%%
pd.concat({'Coefficients':LinRegEPI_R.params,
           'Significant?':LinRegEPI_R.pvalues<0.05},axis=1)
#%%
formula='environment ~ corruptionIndex + scoreEconomy + C(presscat, Treatment("High"))'
LinRegEPI_catX = smf.ols(formula, indexes).fit()
#print(LinRegEPI_catX.summary())
#%%
pd.concat({'Coefficients':LinRegEPI_catX.params,
           'Significant?':LinRegEPI_catX.pvalues<0.05},axis=1)
#%%
LinRegEPI_catX.rsquared_adj
#%%
indexes.environmentCat.value_counts().plot.bar()