示例#1
0
    def test_diagonal_sample_tvma1(self, sample_size=8, mean=5, sigma=0.12,
                                   noise_type= ["bernoulli","gaussian"]):

        print('\n\n===============================================================================')
        print('Testing "diagonal_sample_tvma1"')
        start_time = timer()
        
        for each in noise_type:
            returned = diagonal_sample_tvma1(sample_size=sample_size, mean=mean, sigma=sigma, noise_type=each)
            print('Test parameters:')
            print('sample_size = ', sample_size)
            print('mean = ', mean)
            print('sigma = ', sigma)
            print('noise_type = ', each)

            print('\nreturned = ', type(returned))
            if isinstance(returned, list):
                print('returned shape = ', len(returned))
            elif isinstance(returned, np.ndarray):
                print('returned shape = ', returned.shape)
            print('returned = ', returned)
                
            print("\nDuration: {:g} secs".format(timer() - start_time))
            print('End of test {}'.format('diagonal_sample_tvma1'))
            print('===============================================================================\n')
示例#2
0
def compute_and_save_cov_hat_vs_sample_size(sample_size_from: int,
                                            sample_size_to: int,
                                            sample_size_by: int,
                                            replication_count: int,
                                            mean: int,
                                            sigma: int,
                                            noise_type: str,
                                            lag: int,
                                            sample_type: str = "ma1"):
    par_list = {
        "sample_size_from": sample_size_from,
        "sample_size_to": sample_size_to,
        "sample_size_by": sample_size_by,
        "replication_count": replication_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "lag": lag,
        "sample_type": sample_type
    }

    sample_size_array = np.arange(start=sample_size_from,
                                  stop=sample_size_to,
                                  step=sample_size_by)
    cov_hat_t_free_array = np.full(shape=(replication_count,
                                          len(sample_size_array)),
                                   fill_value=np.nan)

    if sample_type == "ma1":
        true_cov_array = np.repeat(true_cov_ma1_t_free(lag=lag, sigma=sigma),
                                   len(sample_size_array))
    elif sample_type == "ma3":
        true_cov_array = np.repeat(true_cov_ma3_t_free(lag=lag, sigma=sigma),
                                   len(sample_size_array))

    for index_col, sample_size in enumerate(sample_size_array):
        for replication in range(replication_count):
            if sample_type == "ma1":
                sample = diagonal_sample_tvma1(sample_size=sample_size,
                                               mean=mean,
                                               sigma=sigma,
                                               noise_type=noise_type)
            elif sample_type == "ma3":
                sample = diagonal_sample_tvma3(sample_size=sample_size,
                                               mean=mean,
                                               sigma=sigma,
                                               noise_type=noise_type)
            cov_hat_t_free_array[replication,
                                 index_col] = cov_hat_t_free(sample=sample,
                                                             lag=lag)

    plot_double_array(x_array=sample_size_array,
                      hat_double_array=cov_hat_t_free_array,
                      true_array=true_cov_array,
                      title="Cov hat t free by sample size",
                      x_label="sample size",
                      par_list=par_list,
                      axis='row',
                      true_label="true autocovariance t free",
                      y_label='autocovariance')
示例#3
0
    def test_lrv_hat_threshold_real_t_free(self,
                                           sample_size=100,
                                           noise_type="gaussian",
                                           sd_type="block_est"):
        print(
            '\n\n==============================================================================='
        )
        print('testing "lrv_hat_threshold_real_t_free"')

        start_time = timer()
        sample = diagonal_sample_tvma1(sample_size=sample_size,
                                       mean=0,
                                       sigma=2,
                                       noise_type="gaussian")
        max_lag = threshold_max_lag(sample_size=sample_size)
        cov_hat_column = cov_column_t_free(sample=sample, max_lag=max_lag)

        returned = lrv_hat_threshold_t_free(cov_hat_column=cov_hat_column,
                                            sample_size=sample_size,
                                            noise_type=noise_type,
                                            sd_type=sd_type)
        duration = timer() - start_time
        print('Test parameters:')
        print('sample_size =', sample_size)
        print('noise_type =', noise_type)
        print('sd_type =', sd_type)
        print('returned =', returned)
        print("\nDuration: {:g} secs".format(duration))
        print('End of test {}'.format('lrv_hat_threshold_real_t_free'))
        print(
            '===============================================================================\n'
        )
示例#4
0
    def test_run_cov_hat_of_t(self, t_par=0, lag=0):

        print('\n\n===============================================================================')
        print('RUN testing "cov_hat_of_t"')
        diagonal_sample = diagonal_sample_tvma1(sample_size=20,
                                                mean=0,
                                                sigma=2,
                                                noise_type='bernoulli')
        start_time = timer()
        returned = cov_hat_of_t(sample=diagonal_sample, t_par=t_par, lag=lag)
        print('Test parameters:')
        print('sample =', diagonal_sample)
        print('t_par =', t_par)
        print('lag =', lag)

        print('\nreturned =', type(returned))
        if isinstance(returned, list):
            print('returned shape =', len(returned))
        elif isinstance(returned, np.ndarray):
            print('returned shape =', returned.shape)
        print('returned =', returned)

        print("\nDuration: {:g} secs".format(timer() - start_time))
        print('End of RUN test {}'.format('cov_hat_of_t'))
        print('===============================================================================\n')
示例#5
0
def compute_and_save_v_vs_nw(sample_size,
                             t_par_count,
                             mean,
                             sigma,
                             noise_type,
                             sample_type="ma1"):
    """
    This function is for visual comparison of v double summation formula with regular NW.
    It saves a single image with two curves that must coincide. And they currently do coincide.
    """
    par_list = {
        "sample_size": sample_size,
        "t_par_count": t_par_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "sample_type": sample_type
    }

    t_par_array = create_t_par_array(t_par_count=t_par_count)

    if sample_type == "ma1":
        true_lrv_array = true_lrv_ma1_of_t(sigma=sigma,
                                           t_par_array=t_par_array)
        sample = diagonal_sample_tvma1(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    elif sample_type == "ma3":
        true_lrv_array = true_lrv_ma3_of_t(sigma=sigma,
                                           t_par_array=t_par_array)
        sample = diagonal_sample_tvma3(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)

    max_lag = int(support_bound(sample_size=sample_size)) + 1

    cov_double_array = cov_double_array_of_t(sample=sample,
                                             t_par_count=t_par_count,
                                             max_lag=max_lag)
    original_nw_array = lrv_hat_nw_of_t(cov_double_array=cov_double_array,
                                        sample_size=sample_size)

    double_sum_nw_array = lrv_hat_nw_2_of_t(sample=sample,
                                            t_par_array=t_par_array)

    arrays_dict = {
        "Double sum Newey-West": double_sum_nw_array,
        "Original Newey-West": original_nw_array
    }

    plot_arrays(x_array=t_par_array,
                arrays_dict=arrays_dict,
                true_array=true_lrv_array,
                title='Double sum vs original Newey-West',
                x_label='t par',
                par_list=par_list)
示例#6
0
def compute_and_save_nw_single_n(sample_size: int,
                                 t_par_count: int,
                                 mean: float,
                                 sigma: float,
                                 noise_type: str,
                                 replication_count: int,
                                 sample_type: str = "ma1") -> np.array:
    """
    This function computes r (replication_count) arrays of t-dependent NW estimates,
    first generating r samples of a given fixed n.
    Then it saves a sinle image to output directory.
    :param cov_double_array: covariance double array
    :return: array of newey west
    """
    par_list = {
        "sample_size": sample_size,
        "t_par_count": t_par_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "replication_count": replication_count,
        "sample_type": sample_type
    }

    t_par_array = create_t_par_array(t_par_count=t_par_count)
    if sample_type == "ma1":
        true_lrv_array = true_lrv_ma1_of_t(sigma=sigma, t_par_array=t_par_array)
    elif sample_type == "ma3":
        true_lrv_array = true_lrv_ma3_of_t(sigma=sigma, t_par_array=t_par_array)

    nw_hat_double_array = np.full(shape=(t_par_count, replication_count),
                                         fill_value=np.nan)
    max_lag = int(support_bound(sample_size=sample_size)) + 1

    for r in range(replication_count):
        if sample_type == "ma1":
            sample = diagonal_sample_tvma1(sample_size=sample_size, mean=mean,
                                           sigma=sigma, noise_type=noise_type)
        elif sample_type == "ma3":
            sample = diagonal_sample_tvma3(sample_size=sample_size, mean=mean,
                                           sigma=sigma, noise_type=noise_type)

        cov_double_array = cov_double_array_of_t(sample=sample,
                                                 t_par_count=t_par_count,
                                                 max_lag=max_lag)
        nw_hat_double_array[:, r] = lrv_hat_nw_of_t(
            cov_double_array=cov_double_array,
            sample_size=sample_size)

    plot_double_array(x_array=t_par_array,
                      hat_double_array=nw_hat_double_array,
                      true_array=true_lrv_array,
                      title=" Newey-West vs true lrv",
                      x_label="t par",
                      par_list=par_list)

    return nw_hat_double_array
示例#7
0
def compute_and_save_acf(sample_size: int,
                         mean: int,
                         sigma: int,
                         noise_type: str,
                         sd_type: str,
                         sample_type: str="ma1"):
    """
    It saves to output directory, the sample autocovariance function for several lags
    We generate the sample, given the default process, currently TVMA(1).
    If later we change the default process, we would need to change the call
    diagonal_sample_tvma1 to something else.
    Currently, max lag is the one for threshold.
    Later, if need arises, we may introduce the max_lag argument.
    """
    par_list = {"sample_size": sample_size,
                "mean": mean,
                "sigma": sigma,
                "noise_type": noise_type,
                "sd_type": sd_type,
                "sample_type": sample_type}

    if sample_type == "ma1":
        sample = diagonal_sample_tvma1(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    elif sample_type == "ma3":
        sample = diagonal_sample_tvma3(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    max_lag = threshold_max_lag(sample_size=sample_size)
    cov_hat = cov_column_t_free(sample=sample,
                                max_lag=max_lag)
    sd_cov_hat_array = np.full(shape=max_lag, fill_value=np.nan)
    for lag in range(max_lag):
        sd_cov_hat_array[lag] = sd_cov_hat(sample_size=sample_size,
                                           lag=lag,
                                           noise_type=noise_type,
                                           sd_type=sd_type,
                                           sample_type=sample_type)
    cloud = sd_cov_hat_array * zhou_treshold(sample_size=sample_size)

    plot_acf(cov_hat=cov_hat,
             cloud=cloud,
             par_list=par_list)
    def test_run_lrv_hat_nw_of_t(self,
                                 t_par_count=11,
                                 sample_size=20,
                                 mean=0,
                                 sigma=2,
                                 noise_type='bernoulli'):

        print(
            '\n\n==============================================================================='
        )
        print('RUN testing "lrv_hat_nw_of_t"')
        diagonal_sample = diagonal_sample_tvma1(sample_size=sample_size,
                                                mean=mean,
                                                sigma=sigma,
                                                noise_type=noise_type)

        max_lag = int(support_bound(sample_size=sample_size)) + 1

        cov_double_array = cov_double_array_of_t(sample=diagonal_sample,
                                                 t_par_count=t_par_count,
                                                 max_lag=max_lag)
        start_time = timer()
        returned = lrv_hat_nw_of_t(cov_double_array=cov_double_array,
                                   sample_size=sample_size)
        duration = timer() - start_time
        print('Test parameters:')
        print('t_par_count = ', t_par_count)
        print('sample_size = ', sample_size)
        print('mean = ', mean)
        print('sigma = ', sigma)
        print('noise_type = ', noise_type)

        print('\ncov_double_aray_size = ', cov_double_array.shape)

        print('\nreturned = ', type(returned))
        if isinstance(returned, list):
            print('returned shape = ', len(returned))
        elif isinstance(returned, np.ndarray):
            print('returned shape = ', returned.shape)
        print('returned = ', returned)

        print("\nDuration: {:g} secs".format(duration))
        print('End of RUN test {}'.format('lrv_hat_nw_of_t'))
        print(
            '===============================================================================\n'
        )
def compute_and_save_cov_hat_hist(sample_size: int,
                                  replication_count: int,
                                  mean: int,
                                  sigma: int,
                                  noise_type: str,
                                  lag: int,
                                  sample_type: str = "ma1"):
    par_list = {
        "sample_size": sample_size,
        "replication_count": replication_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "lag": lag,
        "sample_type": sample_type
    }

    cov_hat_t_free_array = np.full(shape=replication_count, fill_value=np.nan)

    if sample_type == "ma1":
        true_cov = true_cov_ma1_t_free(lag=lag, sigma=sigma)
    elif sample_type == "ma3":
        true_cov = true_cov_ma3_t_free(lag=lag, sigma=sigma)

    for replication in range(replication_count):
        if sample_type == "ma1":
            sample = diagonal_sample_tvma1(sample_size=sample_size,
                                           mean=mean,
                                           sigma=sigma,
                                           noise_type=noise_type)
        elif sample_type == "ma3":
            sample = diagonal_sample_tvma3(sample_size=sample_size,
                                           mean=mean,
                                           sigma=sigma,
                                           noise_type=noise_type)
        cov_hat_t_free_array[replication] = cov_hat_t_free(sample=sample,
                                                           lag=lag)

    arrays_dict = {"Autocovariance": cov_hat_t_free_array}

    plot_histograms(arrays_dict=arrays_dict,
                    true_value=true_cov,
                    title="Autocovariance",
                    par_list=par_list,
                    true_label="true autocovariance")
示例#10
0
def semi_bootstrap(sample_size: int,
                   lag: int,
                   mean: float,
                   sigma: float,
                   noise_type: str,
                   sample_type: str = "ma1"):
    """
    Generates a sample and computes a block estimate for var(covHat). 
    :param sample_size: size of a sample to be generated. 
    :param lag: lag of autocovariance, whose variance should be generated. 
    :param mean: mean for the noise whose sample should be generated. 
    :param sigma: sigma for the noise whose sample should be generated. 
    :param noise_type: type for the noise whose sample should be generated.
    :param sample_type: type of the sample that should be generated.
    :return: semi_bootstrap_value, a block estimate value. 
    """
    batch_size_value = batch_size(sample_size=sample_size)
    if sample_type == "ma1":
        sample = diagonal_sample_tvma1(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    elif sample_type == "ma3":
        sample = diagonal_sample_tvma3(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    paired_product_array = paired_products(sample=sample, lag=lag)
    block_sum_array = block_sums(paired_product_array=paired_product_array)

    cum_sum = 0

    for index in range(len(block_sum_array) - batch_size_value):
        cum_sum += ((block_sum_array[index] -
                     block_sum_array[index + batch_size_value]) /
                    np.sqrt(2 * batch_size_value))**2

    semi_bootstrap_value = cum_sum / sample_size

    return semi_bootstrap_value
示例#11
0
def compute_and_save_cov_and_cov_hats(sample_size,
                                      t_par_count,
                                      gamma_count,
                                      mean,
                                      sigma,
                                      lag,
                                      noise_type,
                                      diag_or_horiz,
                                      sample_type: str = "ma1"):
    """
    For all values of t, this function computes the true covariance and its estimates
    using K(t).
    Saves image image file. No CSV.
    If diag_or_horiz is horiz, we generate the 2d horizontal sample,
    and for each line we compute cov hats using the non-kernel formula.
    """
    par_list = {"sample_size": sample_size,
                "t_par_count": t_par_count,
                "gamma_count": gamma_count,
                "mean": mean,
                "sigma": sigma,
                "lag": lag,
                "sample_type": sample_type,
                "noise_type": noise_type,
                "diag_or_horiz": diag_or_horiz}

    t_par_array = create_t_par_array(t_par_count=t_par_count)

    true_gamma_array = np.full(shape=t_par_count, fill_value=np.nan)

    gamma_hat_double_array = np.full(shape=(t_par_count, gamma_count),
                                     fill_value=np.nan)

    if sample_type == "ma1":
        for t_index in range(t_par_count):
            true_gamma_array[t_index] = true_cov_ma1_of_t(t_par=t_par_array[t_index],
                                                          sigma=sigma,
                                                          lag=lag)
    elif sample_type == "ma3":
        for t_index in range(t_par_count):
            true_gamma_array[t_index] = true_cov_ma3_of_t(t_par=t_par_array[t_index],
                                                          sigma=sigma,
                                                          lag=lag)
    """
    For each index, generate a sample (later called replication)
    and compute gamma.
    """
    for index in range(gamma_count):
        if sample_type == "scaled_noise":
            if diag_or_horiz == "diag":
                sample = diagonal_sample_scaled_noise(
                    sample_size=sample_size,
                    mean=mean,
                    sigma=sigma,
                    noise_type=noise_type)
            elif diag_or_horiz == "horiz":
                horizontal = horizontal_sample_scaled_noise(
                    sample_size=sample_size,
                    t_par_count=t_par_count,
                    mean=mean,
                    sigma=sigma,
                    noise_type=noise_type)
        elif sample_type == "ma1":
            if diag_or_horiz == "diag":
                sample = diagonal_sample_tvma1(
                    sample_size=sample_size,
                    mean=mean,
                    sigma=sigma,
                    noise_type=noise_type)
            elif diag_or_horiz == "horiz":
                horizontal = horizontal_sample_tvma1(
                    sample_size=sample_size,
                    t_par_count=t_par_count,
                    mean=mean,
                    sigma=sigma,
                    noise_type=noise_type)
        elif sample_type == "ma3":
            if diag_or_horiz == "diag":
                sample = diagonal_sample_tvma3(
                    sample_size=sample_size,
                    mean=mean,
                    sigma=sigma,
                    noise_type=noise_type)
            elif diag_or_horiz == "horiz":
                horizontal = horizontal_sample_tvma3(
                    sample_size=sample_size,
                    t_par_count=t_par_count,
                    mean=mean,
                    sigma=sigma,
                    noise_type=noise_type)
        for t_index in range(t_par_count):
            if diag_or_horiz == "horiz":
                sample = horizontal[t_index]
                gamma_hat_double_array[t_index, index] = cov_hat_t_free(
                    sample=sample,
                    lag=lag)
            elif diag_or_horiz == "diag":
                gamma_hat_double_array[t_index, index] = cov_hat_of_t(
                    sample=sample,
                    t_par=t_par_array[t_index],
                    lag=lag)

        print("There are", gamma_count - (index + 1), "replications left")

    plot_double_array(x_array=t_par_array,
                      hat_double_array=gamma_hat_double_array,
                      true_array=true_gamma_array,
                      title='Autocovariance',
                      axis='column',
                      x_label='t par',
                      par_list=par_list)
def compute_and_save_threshold_nw_t_free(sample_size: int,
                                         replication_count: int,
                                         mean: int,
                                         sigma: int,
                                         noise_type: str,
                                         sd_type: str,
                                         lrv_est: str,
                                         sample_type: str = "ma1"):
    """
    Illustrated in
    402 LRV 3a / computing 2 / project 2 / Threshold / M: threshold t free
    Saves a single image file with
    histogram of replicated NW estimate threshold or both.
    True value is marked on all histograms.
    """
    par_list = {
        "sample_size": sample_size,
        "replication_count": replication_count,
        "mean": mean,
        "sigma": sigma,
        "sd_type": sd_type,
        "noise_type": noise_type,
        "sample_type": sample_type
    }

    threshold_t_free_array = np.full(shape=replication_count,
                                     fill_value=np.nan)
    nw_t_free_array = np.full(shape=replication_count, fill_value=np.nan)

    if sample_type == "ma1":
        true_lrv = true_lrv_ma1_t_free(sigma=sigma)
    elif sample_type == "ma3":
        true_lrv = true_lrv_ma3_t_free(sigma=sigma)

    support_bound_value = int(support_bound(sample_size=sample_size)) + 1
    threshold_max_lag_value = threshold_max_lag(sample_size=sample_size)

    max_lag = max(support_bound_value, threshold_max_lag_value)

    for replication in range(replication_count):
        if sample_type == "ma1":
            sample = diagonal_sample_tvma1(sample_size=sample_size,
                                           mean=mean,
                                           sigma=sigma,
                                           noise_type=noise_type)
        elif sample_type == "ma3":
            sample = diagonal_sample_tvma3(sample_size=sample_size,
                                           mean=mean,
                                           sigma=sigma,
                                           noise_type=noise_type)
        cov_column = cov_column_t_free(sample=sample, max_lag=max_lag)
        if lrv_est == "threshold" or lrv_est == "both":
            threshold_t_free_array[replication] = lrv_hat_threshold_t_free(
                cov_hat_column=cov_column[:threshold_max_lag_value],
                sample_size=sample_size,
                noise_type=noise_type,
                sd_type=sd_type,
                sample_type=sample_type)
        if lrv_est == "nw" or lrv_est == "both":
            nw_t_free_array[replication] = lrv_hat_nw_t_free(
                cov_column=cov_column[:support_bound_value],
                sample_size=sample_size)
        print("compute_and_save_threshold_nw_t_free",
              replication_count - (replication + 1), "left")

    arrays_dict = {
        "Newey-West": nw_t_free_array,
        "Threshold": threshold_t_free_array
    }

    plot_histograms(arrays_dict=arrays_dict,
                    true_value=true_lrv,
                    title="{} t free lrv".format(lrv_est),
                    par_list=par_list)
示例#13
0
def compute_and_save_threshold_single_n(sample_size: int,
                                        t_par_count: int,
                                        mean: int,
                                        sigma: int,
                                        noise_type: str,
                                        sd_type: str,
                                        replication_count: int,
                                        sample_type: str = "ma1"):
    """
    Saves a straw plot of several replicates of t-dependent threshold estimates
    for the given sample_size.
    Illustrated in
    385 LRV3a / computing 2 / project 2 / tests / ME: Test the function
    """
    par_list = {"sample_size": sample_size,
                "t_par_count": t_par_count,
                "mean": mean,
                "sigma": sigma,
                "noise_type": noise_type,
                "sd_type": sd_type,
                "replication_count": replication_count,
                "sample_type": sample_type}

    threshold_hat_double_array = np.full(shape=(t_par_count, replication_count),
                                         fill_value=np.nan)

    t_par_array = create_t_par_array(t_par_count=t_par_count)
    if sample_type == "ma1":
        true_lrv_ma1_array = true_lrv_ma1_of_t(sigma=sigma,
                                               t_par_array=t_par_array)
    elif sample_type == "ma3":
        true_lrv_ma1_array = true_lrv_ma3_of_t(sigma=sigma,
                                               t_par_array=t_par_array)

    max_lag = threshold_max_lag(sample_size=sample_size)

    for replication in range(replication_count):
        if sample_type == "ma1":
            sample = diagonal_sample_tvma1(sample_size=sample_size,
                                           mean=mean,
                                           sigma=sigma,
                                           noise_type=noise_type)
        elif sample_type == "ma3":
            sample = diagonal_sample_tvma3(sample_size=sample_size,
                                           mean=mean,
                                           sigma=sigma,
                                           noise_type=noise_type)
        cov_double_array = cov_double_array_of_t(sample=sample,
                                                 t_par_count=t_par_count,
                                                 max_lag=max_lag)
        threshold_hat_double_array[:, replication] = lrv_hat_threshold_of_t(
            cov_double_array=cov_double_array,
            sample_size=sample_size,
            noise_type=noise_type,
            sd_type=sd_type,
            sample_type=sample_type)

    plot_double_array(x_array=t_par_array,
                      hat_double_array=threshold_hat_double_array,
                      true_array=true_lrv_ma1_array,
                      title="Thresholds vs true lrv",
                      x_label="t par",
                      par_list=par_list)
示例#14
0
def compute_and_save_nw_threshold_single_t(sample_size_from: int,
                                           sample_size_to: int,
                                           sample_size_by: int,
                                           replication_count: int,
                                           mean: int,
                                           sigma: int,
                                           noise_type: str,
                                           sd_type: str,
                                           t_par="free",
                                           sample_type="ma1"):
    """
    For a series of sample sizes,
    this function generates r samples for each sample size,
    Illustrated in
    412 LRV3a / computing 2 / project 2 / vs_sample_size / M: NW and T vs sample size
    Saves 6 image files.
    For each of the two estimates, it computes and depicts the base estimates,
    and then all 4 precision indicators.
    It may be either for a given t or for t-free.
    """
    par_list = {
        "replication_count": replication_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "sd_type": sd_type,
        "t_par": t_par,
        "sample_type": sample_type
    }

    sample_size_array = np.arange(start=sample_size_from,
                                  stop=sample_size_to,
                                  step=sample_size_by)

    # compute max lags for threshold and Newey-West
    support_bound_array = [
        int(support_bound(sample_size=sample_size)) + 1
        for sample_size in sample_size_array
    ]
    threshold_max_lag_array = [
        threshold_max_lag(sample_size=sample_size)
        for sample_size in sample_size_array
    ]
    max_lag_array = max(threshold_max_lag_array, support_bound_array)

    # empty double arrays for estimates
    threshold_double_array = np.full(shape=(replication_count,
                                            len(sample_size_array)),
                                     fill_value=np.nan)
    nw_double_array = np.full(shape=(replication_count,
                                     len(sample_size_array)),
                              fill_value=np.nan)

    # compute one of two true values
    if isinstance(t_par, numbers.Number):
        if sample_type == "ma1":
            true_LRV_array = np.repeat(
                true_lrv_ma1_of_single_t(sigma=sigma, t_par=t_par),
                len(sample_size_array))
        elif sample_type == "ma3":
            true_LRV_array = np.repeat(
                true_lrv_ma3_of_single_t(sigma=sigma, t_par=t_par),
                len(sample_size_array))
    elif t_par == 'free':
        if sample_type == "ma1":
            true_LRV_array = np.repeat(true_lrv_ma1_t_free(sigma=sigma),
                                       len(sample_size_array))
        elif sample_type == "ma3":
            true_LRV_array = np.repeat(true_lrv_ma3_t_free(sigma=sigma),
                                       len(sample_size_array))
    else:
        raise ValueError(
            't_par parameter should be "free" or float number not' + t_par)

    for col_index, sample_size in enumerate(sample_size_array):
        max_lag = max_lag_array[col_index]
        threshold_max_lag_value = threshold_max_lag_array[col_index]
        nw_max_lag_value = support_bound_array[col_index]
        for replication in range(replication_count):
            if sample_type == "ma1":
                sample = diagonal_sample_tvma1(sample_size=sample_size,
                                               mean=mean,
                                               sigma=sigma,
                                               noise_type=noise_type)
            elif sample_type == "ma3":
                sample = diagonal_sample_tvma3(sample_size=sample_size,
                                               mean=mean,
                                               sigma=sigma,
                                               noise_type=noise_type)
            if isinstance(t_par, numbers.Number):
                cov_hat_column = cov_column_of_t(sample=sample,
                                                 t_par=t_par,
                                                 max_lag=max_lag)
            elif t_par == 'free':
                cov_hat_column = cov_column_t_free(sample=sample,
                                                   max_lag=max_lag)
            threshold_double_array[replication, col_index] = \
                lrv_hat_threshold_t_free(
                    cov_hat_column=cov_hat_column[:threshold_max_lag_value],
                    sample_size=sample_size,
                    noise_type=noise_type,
                    sd_type=sd_type,
                    sample_type=sample_type)
            nw_double_array[replication, col_index] = lrv_hat_nw_t_free(
                cov_column=cov_hat_column[:nw_max_lag_value],
                sample_size=sample_size)

    col_names = [
        "sample size " + str(sample_size) for sample_size in sample_size_array
    ]
    threshold_double_array_df = pd.DataFrame(threshold_double_array,
                                             columns=col_names)
    nw_double_array_df = pd.DataFrame(nw_double_array, columns=col_names)

    plot_ridgline(hat_double_array=threshold_double_array_df,
                  title="Threshold ridgline",
                  x_label="value",
                  par_list=par_list)

    plot_ridgline(hat_double_array=nw_double_array_df,
                  title="Newey-West ridgline",
                  x_label="value",
                  par_list=par_list)

    plot_double_array(x_array=sample_size_array,
                      hat_double_array=threshold_double_array,
                      true_array=true_LRV_array,
                      title="Threshold LRV t = {0}".format(t_par),
                      x_label="sample size",
                      par_list=par_list,
                      axis='row',
                      true_label='True lrv',
                      y_label='LRV')

    plot_double_array(x_array=sample_size_array,
                      hat_double_array=nw_double_array,
                      true_array=true_LRV_array,
                      title="Newey-West LRV t = {0}".format(t_par),
                      x_label="sample size",
                      par_list=par_list,
                      axis='row',
                      true_label='True lrv',
                      y_label='LRV')

    arrays_dict = {
        "Newey-West": nw_double_array,
        "Threshold": threshold_double_array
    }

    compute_and_save_multi_precision_of_t(true_array=true_LRV_array,
                                          est_dict=arrays_dict,
                                          par_list=par_list,
                                          x_label="sample size",
                                          x_array=sample_size_array)
示例#15
0
def compute_and_save_nw_vs_threshold(sample_size: int,
                                     t_par_count: int,
                                     mean: int,
                                     sigma: int,
                                     noise_type: str,
                                     sd_type: str,
                                     sample_type: str = "ma1"):
    """
    Illustrated in
    394 LRV 3a / computing 2 / project 2 / Threshold / N: compute_and_save_nw_vs_threshold
    It saves one image file of 2 t-dependent plots. For two estimates.
    No precision is computed here so far.
    """
    par_list = {
        "sample_size": sample_size,
        "t_par_count": t_par_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "sd_type": sd_type,
        "sample_type": sample_type
    }

    t_par_array = create_t_par_array(t_par_count=t_par_count)

    if sample_type == "ma1":
        true_lrv_array = true_lrv_ma1_of_t(sigma=sigma,
                                           t_par_array=t_par_array)
        sample = diagonal_sample_tvma1(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    elif sample_type == "ma3":
        true_lrv_array = true_lrv_ma3_of_t(sigma=sigma,
                                           t_par_array=t_par_array)
        sample = diagonal_sample_tvma3(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)

    support_bound_value = int(support_bound(sample_size=sample_size)) + 1
    threshold_max_lag_value = threshold_max_lag(sample_size=sample_size)

    max_lag = max(support_bound_value, threshold_max_lag_value)

    cov_double_array = cov_double_array_of_t(sample=sample,
                                             t_par_count=t_par_count,
                                             max_lag=max_lag)

    nw_lrv_array = lrv_hat_nw_of_t(
        cov_double_array=cov_double_array[:support_bound_value, :],
        sample_size=sample_size)
    threshold_lrv_array = lrv_hat_threshold_of_t(
        cov_double_array=cov_double_array[:threshold_max_lag_value, :],
        sample_size=sample_size,
        noise_type=noise_type,
        sd_type=sd_type,
        sample_type=sample_type)

    arrays_dict = {
        "Newey-West LRV": nw_lrv_array,
        "Threshold LRV": threshold_lrv_array
    }

    plot_arrays(x_array=t_par_array,
                arrays_dict=arrays_dict,
                title="Threshold vs Newey-West",
                x_label="t par",
                par_list=par_list,
                true_array=true_lrv_array,
                y_label="LRV")