def setup_class(cls): s_scale = 0.0263073404164214 cc = CyclicCubicSplines(data_mcycle['times'].values, df=[6]) gam_cc = GLMGam(data_mcycle['accel'], smoother=cc, alpha=0) cls.res1 = gam_cc.fit(method='bfgs')
def setup_class(cls): s_scale = 0.0263073404164214 nobs = data_mcycle['times'].shape[0] cc = CyclicCubicSplines(data_mcycle['times'].values, df=[6], constraints='center') gam_cc = GLMGam(data_mcycle['accel'], np.ones((nobs, 1)), smoother=cc, alpha=1 / s_scale / 2) cls.res1 = gam_cc.fit(method='pirls')
def _init(cls): # TODO: CyclicCubicSplines raises when using pandas cc_h = CyclicCubicSplines(np.asarray(data_mcycle['times']), df=[6]) constraints = np.atleast_2d(cc_h.basis.mean(0)) transf = transf_constraints(constraints) exog = cc_h.basis.dot(transf) penalty_matrix = transf.T.dot(cc_h.penalty_matrices[0]).dot(transf) restriction = matrix_sqrt(penalty_matrix) return exog, penalty_matrix, restriction
def setup_class(cls): s_scale = 0.0263073404164214 x = data_mcycle['times'].values endog = data_mcycle['accel'] cc = CyclicCubicSplines(x, df=[6], constraints='center') gam_cc = GLMGam(endog, smoother=cc, alpha=1 / s_scale / 2) cls.res1 = gam_cc.fit(method='bfgs') cls.res2 = results_pls.pls5 cls.rtol_fitted = 1e-5 # cls.covp_corrfact = 1.0025464444310588 # without edf # edf is implemented cls.covp_corrfact = 1
def setup_class(cls): sp = np.array([6.46225497484073, 0.81532465890585]) s_scale = np.array([2.95973613706629e-07, 0.000126203730141359]) x_spline = df_autos[['weight', 'hp']].values exog = patsy.dmatrix('fuel + drive', data=df_autos) cc = CyclicCubicSplines(x_spline, df=[6, 5], constraints='center') # TODO alpha needs to be list gam_cc = GLMGam(df_autos['city_mpg'], exog=exog, smoother=cc, alpha=(1 / s_scale * sp / 2).tolist()) cls.res1a = gam_cc.fit() gam_cc = GLMGam(df_autos['city_mpg'], exog=exog, smoother=cc, alpha=(1 / s_scale * sp / 2).tolist()) cls.res1b = gam_cc.fit(method='newton')
def test_cyclic_cubic_splines(): cur_dir = os.path.dirname(os.path.abspath(__file__)) file_path = os.path.join(cur_dir, "results", "cubic_cyclic_splines_from_mgcv.csv") data_from_r = pd.read_csv(file_path) x = data_from_r[['x0', 'x2']].values y = data_from_r['y'].values y_est_mgcv = data_from_r[['y_est']].values # noqa: F841 s_mgcv = data_from_r[['s(x0)', 's(x2)']].values dfs = [10, 10] ccs = CyclicCubicSplines(x, df=dfs) alpha = [0.05 / 2, 0.0005 / 2] # TODO: if alpha changes in pirls this should be updated gam = GLMGam(y, smoother=ccs, alpha=alpha) gam_res = gam.fit(method='pirls') s0 = np.dot(ccs.basis[:, ccs.mask[0]], gam_res.params[ccs.mask[0]]) # TODO: Mean has to be removed # removing mean could be replaced by options for intercept handling s0 -= s0.mean() s1 = np.dot(ccs.basis[:, ccs.mask[1]], gam_res.params[ccs.mask[1]]) s1 -= s1.mean() # TODO: Mean has to be removed # plt.subplot(2, 1, 1) # plt.plot(x[:, 0], s0, '.', label='s0') # plt.plot(x[:, 0], s_mgcv[:, 0], '.', label='s0_mgcv') # plt.legend(loc='best') # # plt.subplot(2, 1, 2) # plt.plot(x[:, 1], s1, '.', label='s1_est') # plt.plot(x[:, 1], s_mgcv[:, 1], '.', label='s1_mgcv') # plt.legend(loc='best') # plt.show() assert_allclose(s0, s_mgcv[:, 0], atol=0.02) assert_allclose(s1, s_mgcv[:, 1], atol=0.33)
def decompose(x, transform=True): # Decompose data into trend, seasonality and randomness # Accepts a pandas series object with a datetime index if (transform and min(x.dropna()) >= 0): # Transforms data and finds the lambda that maximizes the log likelihood # R version has above method and method that minimizes the coefficient of variation ("guerrero") x_transformed, var_lambda = boxcox(na_contiguous(x), lmbda=None) x_transformed = pd.Series(x_transformed, index=na_contiguous(x).index) else: x_transformed = x var_lambda = np.nan transform = False # Seasonal data # In R code, we find the number of samples per unit time below (should be 1 every time) # Here I take the datetime index differences, take their inverses, and store in a list to be evaluated # https://stackoverflow.com/questions/36583859/compute-time-difference-of-datetimeindex idx = x_transformed.index #samples = np.unique([int(1/(idx[n]-idx[n - 1]).days) for n in range(1,len(idx))]) # Filter out Nulls for this exercise #samples = samples[~np.isnan(samples)] #if len(samples) == 1 and samples.item() > 1: # Just use the R code instead # This is supposed to be "> 1" but all data results in a frequency of 1 # All frequency results in R equal 4, meaning this code block gets evaluated every time in R # So this code block should always be evaluated as well if int(rstats.frequency(x_transformed).item()) == 1: # Decompose stl = sm.tsa.seasonal_decompose(na_contiguous(x_transformed)) #stl = rstats.stl(na_contiguous(x_transformed),s_window='periodic') # When I try to use above function, I get this: ''' R[write to console]: Error in (function (x, s.window, s.degree = 0, t.window = NULL, t.degree = 1, : series is not periodic or has less than two periods ''' trend = stl.trend seasonality = stl.seasonal remainder = x_transformed - trend - seasonality else: # Nonseasonal data trend = pd.Series(np.nan, index=x_transformed.index) time_index = pd.Index([i for i in range(1, len(x_transformed) + 1)]) # Python specific bs = BSplines(time_index, df=[12, 10], degree=[3, 3]) cs = CyclicCubicSplines(time_index, df=[3, 3]) alpha = np.array([218.338888]) gam = GLMGam(x_transformed, smoother=cs, alpha=alpha).fit() #trend.loc[~x_transformed.isnull()] = gam.fittedvalues # R Code fmla = Formula('x ~ s(tt)') env = fmla.environment env['tt'] = time_index env['x'] = x_transformed trend.loc[~x_transformed.isnull()] = rstats.fitted(rmgcv.gam(fmla)) seasonality = pd.Series(np.nan, index=x_transformed.index) remainder = x_transformed - trend return_dct = { 'x': x_transformed, 'trend': trend, 'seasonality': seasonality, 'remainder': remainder, 'transform': transform, 'lambda': var_lambda, } return return_dct