def test_calculate_f_tilde(normal_input):
    factor_estimator = FactorEstimator(normal_input)
    f_hat = factor_estimator._calculate_f_tilde(r=2)
    expect = np.array([
        [0.5681053, -1.9665053],
        [-1.1560569, -0.3777842],
        [-1.8189881, -0.8677836],
        [-0.7013869, 0.1826577],
        [-0.7135161, 0.9389870],
        [0.1761313, -0.5674743],
    ])
    np.testing, assert_almost_equal(f_hat, expect)
def task_factor_estimate_interactive_fixed_effects_model(produces):
    """
    Task for estimating factor numbers in interactive fixed effects model.
    We choose different penalty functions g1,g2,g3 with criterias PC and IC.
    """
    rmax = 8
    nsims = 1000
    all_N = [100, 100, 100, 100, 10, 20, 50]
    all_T = [10, 20, 50, 100, 100, 100, 100]
    dgp_func = dgp_interactive_fixed_effects_model_with_common_and_time_invariant
    tolerance = 0.0001
    beta_true = {"beta1": 1, "beta2": 3, "mu": 5, "gamma": 2, "delta": 4}
    r0 = 8
    df_factor_estimate = pd.DataFrame()
    np.random.seed(123)
    for case in range(len(all_N)):
        N = all_N[case]
        T = all_T[case]
        df_sim = pd.DataFrame(
            index=range(nsims),
            columns=["T", "N", "PC1", "PC2", "PC3", "IC1", "IC2", "IC3"],
        )
        df_sim["T"] = [T] * nsims
        df_sim["N"] = [N] * nsims
        for i in range(nsims):
            X, Y, panel_df = dgp_func(T, N, **beta_true)
            start_value_estimator = PooledOLS(
                panel_df.y, panel_df[["x" + str(i) for i in range(1, 6)]])
            start_value_result = start_value_estimator.fit()
            interactive_start_value = start_value_result.params.tolist()
            interactive_estimator = InteractiveFixedEffect(Y, X)
            beta_hat, beta_hat_list, f_hat, lambda_hat = interactive_estimator.fit(
                r0, interactive_start_value, tolerance)
            residual = Y - (X.T.dot(beta_hat)).T
            factor_estimator = FactorEstimator(residual)
            df_sim.loc[i, "PC1"] = factor_estimator.r_hat(rmax, "PC", 1)
            df_sim.loc[i, "PC2"] = factor_estimator.r_hat(rmax, "PC", 2)
            df_sim.loc[i, "PC3"] = factor_estimator.r_hat(rmax, "PC", 3)
            df_sim.loc[i, "IC1"] = factor_estimator.r_hat(rmax, "IC", 1)
            df_sim.loc[i, "IC2"] = factor_estimator.r_hat(rmax, "IC", 2)
            df_sim.loc[i, "IC3"] = factor_estimator.r_hat(rmax, "IC", 3)
        df_factor_estimate = df_factor_estimate.append(
            pd.DataFrame(df_sim.mean(axis=0)).T)
    df_factor_estimate = df_factor_estimate.reset_index(drop=True)
    df_factor_estimate.to_csv(produces, index=False)
def test_calculate_lambda_tilde(normal_input):
    factor_estimator = FactorEstimator(normal_input)
    f_hat = np.array([
        [0.5681053, 1.9665053],
        [-1.1560569, 0.3777842],
        [-1.8189881, 0.8677836],
        [-0.7013869, -0.1826577],
        [-0.7135161, -0.9389870],
        [0.1761313, 0.5674743],
    ])
    lambda_hat = factor_estimator._calculate_lambda_tilde(f_hat, r=2)
    expect = np.array([
        [-2.0951825, -0.05400627],
        [-1.1985173, -0.95462779],
        [-1.5878389, 0.71872749],
        [1.2482359, -2.00280008],
        [1.1363406, 2.05542389],
        [1.5535728, 0.03597738],
        [0.3335476, -0.02295224],
    ])
    np.testing, assert_almost_equal(lambda_hat, expect)
def task_factor_estimate_random_iid_residual(produces):
    """
    Task for estimating factor numbers in the model defined by the function
    `dgp_random_iid_residual`.
    We choose different penalty functions g1,g2,g3 with criterias PC and IC.
    It replicates the result of Table 2 in Bai,Ng (2002), page 205.
    """
    r = 3
    rmax = 8
    nsims = 1000
    all_N = [100, 100, 200, 500, 1000]
    all_T = [40, 60, 60, 60, 60]
    df_factor_estimate = pd.DataFrame()
    np.random.seed(123)
    for case in range(len(all_N)):
        N = all_N[case]
        T = all_T[case]
        df_sim = pd.DataFrame(
            index=range(nsims),
            columns=["T", "N", "PC1", "PC2", "PC3", "IC1", "IC2", "IC3"],
        )
        df_sim["T"] = [T] * nsims
        df_sim["N"] = [N] * nsims
        for i in range(nsims):
            residual = dgp_random_iid_residual(N, T, r)
            factor_estimator = FactorEstimator(residual)
            df_sim.loc[i, "PC1"] = factor_estimator.r_hat(rmax, "PC", 1)
            df_sim.loc[i, "PC2"] = factor_estimator.r_hat(rmax, "PC", 2)
            df_sim.loc[i, "PC3"] = factor_estimator.r_hat(rmax, "PC", 3)
            df_sim.loc[i, "IC1"] = factor_estimator.r_hat(rmax, "IC", 1)
            df_sim.loc[i, "IC2"] = factor_estimator.r_hat(rmax, "IC", 2)
            df_sim.loc[i, "IC3"] = factor_estimator.r_hat(rmax, "IC", 3)
        df_factor_estimate = df_factor_estimate.append(
            pd.DataFrame(df_sim.mean(axis=0)).T)
    df_factor_estimate = df_factor_estimate.reset_index(drop=True)
    df_factor_estimate.to_csv(produces, index=False)
def test_calculate_g(normal_input):
    factor_estimator = FactorEstimator(normal_input)
    id1 = factor_estimator._calculate_g(1)
    np.testing, assert_almost_equal(id1, 0.3629848)
    id2 = factor_estimator._calculate_g(2)
    np.testing, assert_almost_equal(id2, 0.5545922)
    id3 = factor_estimator._calculate_g(3)
    np.testing, assert_almost_equal(id3, 0.2986266)
    id4 = factor_estimator._calculate_g(4)
    np.testing, assert_almost_equal(id4, 0.3333333)
def test_calculate_pc(normal_input):
    factor_estimator = FactorEstimator(normal_input)
    pc = factor_estimator._calculate_pc(r=2, rmax=5, id=2)
    expect = 1.351501
    np.testing, assert_almost_equal(pc, expect, decimal=6)
def test_calculate_ic(normal_input):
    factor_estimator = FactorEstimator(normal_input)
    ic = factor_estimator._calculate_ic(r=2, id=2)
    expect = 1.34978
    np.testing, assert_almost_equal(ic, expect, decimal=6)
def test_r_hat(normal_input):
    factor_estimator = FactorEstimator(normal_input)
    actual = factor_estimator.r_hat(rmax=6, panelty="PC", id=3)
    expect = 6
    np.testing, assert_almost_equal(actual, expect)