def test_calculate_f_tilde(normal_input): factor_estimator = FactorEstimator(normal_input) f_hat = factor_estimator._calculate_f_tilde(r=2) expect = np.array([ [0.5681053, -1.9665053], [-1.1560569, -0.3777842], [-1.8189881, -0.8677836], [-0.7013869, 0.1826577], [-0.7135161, 0.9389870], [0.1761313, -0.5674743], ]) np.testing, assert_almost_equal(f_hat, expect)
def task_factor_estimate_interactive_fixed_effects_model(produces): """ Task for estimating factor numbers in interactive fixed effects model. We choose different penalty functions g1,g2,g3 with criterias PC and IC. """ rmax = 8 nsims = 1000 all_N = [100, 100, 100, 100, 10, 20, 50] all_T = [10, 20, 50, 100, 100, 100, 100] dgp_func = dgp_interactive_fixed_effects_model_with_common_and_time_invariant tolerance = 0.0001 beta_true = {"beta1": 1, "beta2": 3, "mu": 5, "gamma": 2, "delta": 4} r0 = 8 df_factor_estimate = pd.DataFrame() np.random.seed(123) for case in range(len(all_N)): N = all_N[case] T = all_T[case] df_sim = pd.DataFrame( index=range(nsims), columns=["T", "N", "PC1", "PC2", "PC3", "IC1", "IC2", "IC3"], ) df_sim["T"] = [T] * nsims df_sim["N"] = [N] * nsims for i in range(nsims): X, Y, panel_df = dgp_func(T, N, **beta_true) start_value_estimator = PooledOLS( panel_df.y, panel_df[["x" + str(i) for i in range(1, 6)]]) start_value_result = start_value_estimator.fit() interactive_start_value = start_value_result.params.tolist() interactive_estimator = InteractiveFixedEffect(Y, X) beta_hat, beta_hat_list, f_hat, lambda_hat = interactive_estimator.fit( r0, interactive_start_value, tolerance) residual = Y - (X.T.dot(beta_hat)).T factor_estimator = FactorEstimator(residual) df_sim.loc[i, "PC1"] = factor_estimator.r_hat(rmax, "PC", 1) df_sim.loc[i, "PC2"] = factor_estimator.r_hat(rmax, "PC", 2) df_sim.loc[i, "PC3"] = factor_estimator.r_hat(rmax, "PC", 3) df_sim.loc[i, "IC1"] = factor_estimator.r_hat(rmax, "IC", 1) df_sim.loc[i, "IC2"] = factor_estimator.r_hat(rmax, "IC", 2) df_sim.loc[i, "IC3"] = factor_estimator.r_hat(rmax, "IC", 3) df_factor_estimate = df_factor_estimate.append( pd.DataFrame(df_sim.mean(axis=0)).T) df_factor_estimate = df_factor_estimate.reset_index(drop=True) df_factor_estimate.to_csv(produces, index=False)
def test_calculate_lambda_tilde(normal_input): factor_estimator = FactorEstimator(normal_input) f_hat = np.array([ [0.5681053, 1.9665053], [-1.1560569, 0.3777842], [-1.8189881, 0.8677836], [-0.7013869, -0.1826577], [-0.7135161, -0.9389870], [0.1761313, 0.5674743], ]) lambda_hat = factor_estimator._calculate_lambda_tilde(f_hat, r=2) expect = np.array([ [-2.0951825, -0.05400627], [-1.1985173, -0.95462779], [-1.5878389, 0.71872749], [1.2482359, -2.00280008], [1.1363406, 2.05542389], [1.5535728, 0.03597738], [0.3335476, -0.02295224], ]) np.testing, assert_almost_equal(lambda_hat, expect)
def task_factor_estimate_random_iid_residual(produces): """ Task for estimating factor numbers in the model defined by the function `dgp_random_iid_residual`. We choose different penalty functions g1,g2,g3 with criterias PC and IC. It replicates the result of Table 2 in Bai,Ng (2002), page 205. """ r = 3 rmax = 8 nsims = 1000 all_N = [100, 100, 200, 500, 1000] all_T = [40, 60, 60, 60, 60] df_factor_estimate = pd.DataFrame() np.random.seed(123) for case in range(len(all_N)): N = all_N[case] T = all_T[case] df_sim = pd.DataFrame( index=range(nsims), columns=["T", "N", "PC1", "PC2", "PC3", "IC1", "IC2", "IC3"], ) df_sim["T"] = [T] * nsims df_sim["N"] = [N] * nsims for i in range(nsims): residual = dgp_random_iid_residual(N, T, r) factor_estimator = FactorEstimator(residual) df_sim.loc[i, "PC1"] = factor_estimator.r_hat(rmax, "PC", 1) df_sim.loc[i, "PC2"] = factor_estimator.r_hat(rmax, "PC", 2) df_sim.loc[i, "PC3"] = factor_estimator.r_hat(rmax, "PC", 3) df_sim.loc[i, "IC1"] = factor_estimator.r_hat(rmax, "IC", 1) df_sim.loc[i, "IC2"] = factor_estimator.r_hat(rmax, "IC", 2) df_sim.loc[i, "IC3"] = factor_estimator.r_hat(rmax, "IC", 3) df_factor_estimate = df_factor_estimate.append( pd.DataFrame(df_sim.mean(axis=0)).T) df_factor_estimate = df_factor_estimate.reset_index(drop=True) df_factor_estimate.to_csv(produces, index=False)
def test_calculate_g(normal_input): factor_estimator = FactorEstimator(normal_input) id1 = factor_estimator._calculate_g(1) np.testing, assert_almost_equal(id1, 0.3629848) id2 = factor_estimator._calculate_g(2) np.testing, assert_almost_equal(id2, 0.5545922) id3 = factor_estimator._calculate_g(3) np.testing, assert_almost_equal(id3, 0.2986266) id4 = factor_estimator._calculate_g(4) np.testing, assert_almost_equal(id4, 0.3333333)
def test_calculate_pc(normal_input): factor_estimator = FactorEstimator(normal_input) pc = factor_estimator._calculate_pc(r=2, rmax=5, id=2) expect = 1.351501 np.testing, assert_almost_equal(pc, expect, decimal=6)
def test_calculate_ic(normal_input): factor_estimator = FactorEstimator(normal_input) ic = factor_estimator._calculate_ic(r=2, id=2) expect = 1.34978 np.testing, assert_almost_equal(ic, expect, decimal=6)
def test_r_hat(normal_input): factor_estimator = FactorEstimator(normal_input) actual = factor_estimator.r_hat(rmax=6, panelty="PC", id=3) expect = 6 np.testing, assert_almost_equal(actual, expect)