Пример #1
0
def circular_block_bootstrap_method(X, Y, block_size=80, n_samples=50):

    boot_samples = []
    bs = CircularBlockBootstrap(block_size, X, y=Y)

    for samp in bs.bootstrap(n_samples):
        boot_samples.append((samp[0][0], samp[1]['y']))

    return boot_samples
 def circular_block_bootstrap_function(self):
     bootstrap = CircularBlockBootstrap(self.block_size, self.time_series)
     self.bootstrapped_time_series_arrays = \
         np.array([data[0][0] for data in bootstrap.bootstrap(self.bootstrap_sampling_times)])
     # reshape the result in the array form
     self.bootstrapped_time_series_arrays = \
         np.reshape(self.bootstrapped_time_series_arrays,
                    (self.bootstrap_sampling_times,
                     len(self.bootstrapped_time_series_arrays[0])))
Пример #3
0
def generateDatasets(data, n, block_size):
    newDatasets = []
    for i in range(n):
        data = np.array(data)
        bs = CircularBlockBootstrap(block_size, data)
        for d in bs.bootstrap(1):
            bs_data = d[0][0]
        bs_data = np.array(bs_data)
        newDatasets.append(bs_data)
    return newDatasets
Пример #4
0
    def test_str(self):
        bs = IIDBootstrap(self.y_series)
        expected = 'IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)
        expected = '<strong>IID Bootstrap</strong>(' + \
                   '<strong>no. pos. inputs</strong>: 1, ' + \
                   '<strong>no. keyword inputs</strong>: 0, ' + \
                   '<strong>ID</strong>: ' + hex(id(bs)) + ')'
        assert_equal(bs._repr_html(), expected)

        bs = StationaryBootstrap(10, self.y_series, self.x_df)
        expected = 'Stationary Bootstrap(block size: 10, no. pos. inputs: 2, no. keyword inputs: 0)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)

        bs = CircularBlockBootstrap(block_size=20,
                                    y=self.y_series,
                                    x=self.x_df)
        expected = 'Circular Block Bootstrap(block size: 20, no. pos. inputs: 0, no. keyword inputs: 2)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)
        expected = '<strong>Circular Block Bootstrap</strong>' + \
                   '(<strong>block size</strong>: 20, ' \
                   + '<strong>no. pos. inputs</strong>: 0, ' + \
                   '<strong>no. keyword inputs</strong>: 2,' + \
                   ' <strong>ID</strong>: ' + hex(id(bs)) + ')'
        assert_equal(bs._repr_html(), expected)
Пример #5
0
    def test_smoke(self):
        num_bootstrap = 20

        def func(y):
            return y.mean(axis=0)

        bs = StationaryBootstrap(13, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = MovingBlockBootstrap(13, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = CircularBlockBootstrap(13, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = MovingBlockBootstrap(10, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = CircularBlockBootstrap(10, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
Пример #6
0
def block_bootstrap(series, n_samples, bs_type='Stationary', block_size=10):
    '''
    Computes bootstrapped samples of series.
    
    Inputs:
        series: pandas Series indexed by time
        n_samples: # bootstrapped samples to output
        bs_type ('Stationary'): type of bootstrapping to perform.
            Options include ['Stationary', 'Circular']
        block_size: # size of resampling blocks. Should be big enough to
            capture important frequencies in the series
            
    Ouput:
        DataFrame indexed by sample number and time
        
    
    '''

    # Set up list for sampled time-series
    list_samples = []

    # Stationary bootstrapping
    if bs_type == 'Stationary':
        bs = StationaryBootstrap(block_size, series)

        # Count for sample number
        count = 1
        for data in bs.bootstrap(n_samples):

            df_temp = pd.DataFrame({
                'sample': count,
                'time': series.index.values,
                'x': data[0][0]
            })
            list_samples.append(df_temp)
            count += 1

    if bs_type == 'Circular':
        bs = CircularBlockBootstrap(block_size, series)

        # Count for sample number
        count = 1
        for data in bs.bootstrap(n_samples):

            df_temp = pd.DataFrame({
                'sample': count,
                'time': series.index.values,
                'x': data[0][0]
            })
            list_samples.append(df_temp)
            count += 1

    # Concatenate list of samples
    df_samples = pd.concat(list_samples)
    df_samples.set_index(['sample', 'time'], inplace=True)

    # Output DataFrame of samples
    return df_samples
Пример #7
0
def test_uneven_sampling(bs_setup):
    bs = MovingBlockBootstrap(block_size=31, y=bs_setup.y_series, x=bs_setup.x_df)
    for _, kw in bs.bootstrap(10):
        assert kw["y"].shape == bs_setup.y_series.shape
        assert kw["x"].shape == bs_setup.x_df.shape
    bs = CircularBlockBootstrap(block_size=31, y=bs_setup.y_series, x=bs_setup.x_df)
    for _, kw in bs.bootstrap(10):
        assert kw["y"].shape == bs_setup.y_series.shape
        assert kw["x"].shape == bs_setup.x_df.shape
Пример #8
0
 def test_uneven_sampling(self):
     bs = MovingBlockBootstrap(block_size=31, y=self.y_series, x=self.x_df)
     for _, kw in bs.bootstrap(10):
         assert kw['y'].shape == self.y_series.shape
         assert kw['x'].shape == self.x_df.shape
     bs = CircularBlockBootstrap(block_size=31, y=self.y_series, x=self.x_df)
     for _, kw in bs.bootstrap(10):
         assert kw['y'].shape == self.y_series.shape
         assert kw['x'].shape == self.x_df.shape
Пример #9
0
def test_str(bs_setup):
    bs = IIDBootstrap(bs_setup.y_series)
    expected = "IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)"
    assert_equal(str(bs), expected)
    expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")"
    assert_equal(bs.__repr__(), expected)
    expected = ("<strong>IID Bootstrap</strong>(" +
                "<strong>no. pos. inputs</strong>: 1, " +
                "<strong>no. keyword inputs</strong>: 0, " +
                "<strong>ID</strong>: " + hex(id(bs)) + ")")
    assert_equal(bs._repr_html(), expected)

    bs = StationaryBootstrap(10, bs_setup.y_series, bs_setup.x_df)
    expected = ("Stationary Bootstrap(block size: 10, no. pos. "
                "inputs: 2, no. keyword inputs: 0)")
    assert_equal(str(bs), expected)
    expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")"
    assert_equal(bs.__repr__(), expected)

    bs = CircularBlockBootstrap(block_size=20,
                                y=bs_setup.y_series,
                                x=bs_setup.x_df)
    expected = ("Circular Block Bootstrap(block size: 20, no. pos. "
                "inputs: 0, no. keyword inputs: 2)")
    assert_equal(str(bs), expected)
    expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")"
    assert_equal(bs.__repr__(), expected)
    expected = ("<strong>Circular Block Bootstrap</strong>" +
                "(<strong>block size</strong>: 20, " +
                "<strong>no. pos. inputs</strong>: 0, " +
                "<strong>no. keyword inputs</strong>: 2," +
                " <strong>ID</strong>: " + hex(id(bs)) + ")")
    assert_equal(bs._repr_html(), expected)

    bs = MovingBlockBootstrap(block_size=20,
                              y=bs_setup.y_series,
                              x=bs_setup.x_df)
    expected = ("Moving Block Bootstrap(block size: 20, no. pos. "
                "inputs: 0, no. keyword inputs: 2)")
    assert_equal(str(bs), expected)
    expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")"
    assert_equal(bs.__repr__(), expected)
    expected = ("<strong>Moving Block Bootstrap</strong>" +
                "(<strong>block size</strong>: 20, " +
                "<strong>no. pos. inputs</strong>: 0, " +
                "<strong>no. keyword inputs</strong>: 2," +
                " <strong>ID</strong>: " + hex(id(bs)) + ")")
    assert_equal(bs._repr_html(), expected)
Пример #10
0
    def cbb_bootstrap(self):
        """
        return paths simulated using the circular block bootstrap 

        params:
        -------

            - self: see above

        return:
        -------

            - none
        """

        print("\nCIRCULAR BOOTSTRAP \n")
        bs = CircularBlockBootstrap(self.blocksize, self.data)
        out_cbb = boot(N_paths=self.n_paths,
                       method=bs,
                       obs_path=self.data,
                       add_noise=self.add_noise)
        if self.store_sim:
            self.simulated_paths['CBB'] = out_cbb.iloc[:, :out_cbb.
                                                       shape[1] if out_cbb.
                                                       shape[1] < 100 else 100]

        self.store_output = investment_horizons(
            observed_path=self.data,
            sims=out_cbb,
            investment_horizons=self.ih,
            sum_stats=self.stats,
            freq=self.frequency,
            perf_functions=self.perf_functions,
            store_output_dic=self.store_output,
            simulation_tech='CBB',
            plotting=self.plotting)

        return None
Пример #11
0
def block_bootstrap(series,
              n_samples,
              bs_type = 'Stationary',
              block_size = 10
              ):

    '''
    Computes block-bootstrap samples of series.
    
    Args
    ----
    series: pd.Series
        Time-series data in the form of a Pandas Series indexed by time
    n_samples: int
        Number of bootstrapped samples to output.
    bs_type: {'Stationary', 'Circular'}
        Type of block-bootstrapping to perform.
    block_size: int
        Size of resampling blocks. Should be big enough to
        capture important frequencies in the series.
        
    Returns
    -------
    pd.DataFrame:
        DataFrame containing the block-bootstrapped samples of series. 
        Indexed by sample number, then time.
    
    '''

    # Set up list for sampled time-series
    list_samples = []
    
    # Stationary bootstrapping
    if bs_type == 'Stationary':
        bs = StationaryBootstrap(block_size, series)
                
        # Count for sample number
        count = 1
        for data in bs.bootstrap(n_samples):
            
            df_temp = pd.DataFrame({'sample': count, 
                                    'time': series.index.values,
                                    'x': data[0][0]})
            list_samples.append(df_temp)
            count += 1
            
    if bs_type == 'Circular':
        bs = CircularBlockBootstrap(block_size, series)
                
        # Count for sample number
        count = 1
        for data in bs.bootstrap(n_samples):
            
            df_temp = pd.DataFrame({'sample': count, 
                                    'time': series.index.values,
                                    'x': data[0][0]})
            list_samples.append(df_temp)
            count += 1   
    

    # Concatenate list of samples
    df_samples = pd.concat(list_samples)
    df_samples.set_index(['sample','time'], inplace=True)

    
    # Output DataFrame of samples
    return df_samples
Пример #12
0
def block_bootstrap(df):
    bstar = opt_block_length(df[['target']], bootstrap_type='Circular', rnd=True)
    bs = CircularBlockBootstrap(bstar, df)
    for data in bs.bootstrap(100):
        print data[0][0]
        sys.exit()
Пример #13
0
sectors = list(sorted(set(company_sectors)))
df_2 = df.iloc[1:, :]
df_2 = df_2.apply(pd.to_numeric)
df_2 = np.log(df_2) - np.log(df_2.shift(1))
X = df_2.values[1:, :]

num_removal_runs = 1000
no_samples = X.shape[0]
p = X.shape[1]

X_new = X[0:window_size, 0:70]
company_names = company_names[0:70]
company_sectors = company_sectors[0:70]

p = X_new.shape[1]
bs = CircularBlockBootstrap(bootstrap_size, X_new)
total_mst_prescence_spearman = np.zeros((p, p))
total_mst_prescence_pearson = np.zeros((p, p))
total_mst_prescence_tau = np.zeros((p, p))
pearson_msts = []
spearman_msts = []
tau_msts = []

pearson_full = []
spearman_full = []
tau_full = []
i = 0
for data in bs.bootstrap(num_removal_runs):
    print("Run %s" % i)
    X_bs = data[0][0]
def sim_returns(data, block_size=20, total_sim=10, random_seed=1):
    retx_sim = data[data['retd'] == 0]['retx']
    rs = np.random.RandomState(random_seed)
    retx_sim_mod = CircularBlockBootstrap(20, retx_sim, random_state=rs)
    sim = np.zeros((total_sim, len(data), 3))
    count = 0
    if len(retx_sim) == len(data):
        for y in retx_sim_mod.bootstrap(total_sim):

            ls_retx = y[0][0]
            ls_retx.index = retx_sim.index
            ls_retx = ls_retx.sort_index()
            ls_retx[0] = 1
            prc = ls_retx * 0
            prc[0] = data.prc[0]
            cum_retx_all = ls_retx[1:].cumprod()
            prc[1:] = cum_retx_all * prc[0]
            div_pay = ls_retx * 0
            sim[count, :, :] = np.array([prc, ls_retx, div_pay]).T
            count += 1
    else:
        ls_div_days_between = days_between_div(data['retd'])
        div_day_cumsum = np.cumsum(ls_div_days_between)
        ls_div_dt = div_dates(data['retd'])
        ls_div_to_div_ret, ls_div_to_div_std, ls_div_ret = div_to_div_metrics(
            ls_div_days_between, data)
        reg = div_predict_linreg(ls_div_to_div_ret, ls_div_to_div_std,
                                 ls_div_ret)
        for y in retx_sim_mod.bootstrap(total_sim):
            ls_retx = y[0][0]
            ls_of_div = np.array(data[data['retd'] != 0]['retd'])
            ls_of_div_retx = np.array(data[data['retd'] != 0]['retx'])
            ls_sim_div = []
            ls_retx.index = retx_sim.index
            start = 1
            for x in range(len(div_day_cumsum)):
                cum_retx = np.array(ls_retx[start - 1:div_day_cumsum[x] -
                                            1]).prod()
                cum_std = np.array(ls_retx[start - 1:div_day_cumsum[x] -
                                           1]).std()
                pred_div = reg.predict(np.array([[cum_retx], [cum_std]]).T)
                sim_div_idx = min(ls_of_div,
                                  key=lambda i: abs(i - pred_div[0]))
                sim_rx = ls_of_div_retx[np.where(
                    ls_of_div == sim_div_idx)[0][0]]
                ls_retx.loc[ls_div_dt[x]] = sim_rx
                ls_sim_div.append(pred_div[0])

            ls_retx = ls_retx.sort_index()
            ls_retx[0] = 1
            prc = ls_retx * 0
            prc[0] = data.prc[0]
            cum_retx_all = ls_retx[1:].cumprod()
            prc[1:] = cum_retx_all * prc[0]
            div_pay = ls_retx * 0
            div_dic = dict(zip(ls_div_dt, ls_sim_div))
            for i in ls_div_dt:
                div_pay[i] = div_dic[i]

            sim[count, :, :] = np.array([prc, ls_retx, div_pay]).T
            count += 1
    return sim
Пример #15
0
def circular_block_bootstrap(block_size, dataset, bootstrap_resampling_times,
                             p, ar_parameters_original,
                             parameters_resampling_times):
    """Circular Block Bootstrap is adapted
        re-sampling 100 times for given block size
        1. store the parameters in the DataFrame form
        2. store the Model Right result in the pd.Series form
        3. recall the Model Parameter Comparison test
        4. store the results of Model Parameter

    # parameters_similarity_test_rate: mean of all 1-or-0 matrix
    # parameters_accuracy_vs_model_right: take the Model Right Test into consideration
    # parameters_same_test_pass_rate: all parameters pass the Parameters Same Test
    # pass_2_tests_rate: Pass Model Right Test & Model Same Test"""

    # 1. re-sample time series
    bootstrap = CircularBlockBootstrap(block_size, dataset)
    re_sample = np.array(
        [k[0][0] for k in bootstrap.bootstrap(bootstrap_resampling_times)])
    # re_sample = np.reshape(re_sample, -1)
    # print('first change for re_sample:\n', re_sample)
    len_simulation = len(re_sample[0])
    re_sample = np.reshape(re_sample,
                           (bootstrap_resampling_times, len_simulation))
    sleep(0.05)

    # 2. store fitted parameters & Model Right results
    model_residual_test_results_series = pd.Series()
    model_parameters = pd.DataFrame()
    for l in np.arange(len(re_sample)):
        results = ar_model_fit(re_sample[l], p)
        model_residual_test_results_series.loc[l + 1] = results[1]
        model_parameters[l + 1] = results[0]
    model_parameters = model_parameters.transpose()
    # print("\nbootstrapped model parameters | re-sampling times: \n", model_parameters.head())
    # print('\nar model residuals test result: \n', model_residual_test_results_series.head())
    sleep(0.05)

    # 3. recall the Parameters Comparison Test & store the results
    parameters_similarity_test = \
        model_parameters_comparison(model_parameters, ar_parameters_original, model_residual_test_results_series)
    parameters_similarity_test_individual_rate_list = np.mean(
        parameters_similarity_test, axis=1)
    parameters_similarity_test_rate = np.mean(
        parameters_similarity_test_individual_rate_list)

    model_right_test_pass_rate = np.mean(model_residual_test_results_series)
    # print('\nModel Right Test Pass Rate: ', model_right_test_pass_rate)
    sleep(0.05)

    counts = 0
    for i in np.arange(len(parameters_similarity_test)):
        if np.sum(parameters_similarity_test.iloc[i, :]) == len(
                parameters_similarity_test.columns):
            counts = counts + 1
    parameters_same_test_pass_rate = counts / len(parameters_similarity_test)
    parameters_accuracy_vs_model_right = \
        np.sum(parameters_similarity_test_individual_rate_list) / len(model_residual_test_results_series)
    pass_2_tests_rate = counts / len(model_residual_test_results_series)
    half_test1_half_test2 = 0.7 * model_right_test_pass_rate + 0.3 * parameters_same_test_pass_rate
    sleep(0.05)
    return pass_2_tests_rate, half_test1_half_test2, model_right_test_pass_rate, parameters_same_test_pass_rate, \
        parameters_similarity_test_rate, parameters_accuracy_vs_model_right, block_size, parameters_resampling_times, p
Пример #16
0
    if well == "170":
        cols = [0, 1, 2, 3]
        storm_avg = 120
    if well == "175":
        cols = [0, 1, 2, 3]
        storm_avg = 132

    # set base path to store results
    path = "C:/Users/Ben Bowes/PycharmProjects/Tensorflow/mmps" + well + "_bootstraps/"

    # load dataset
    dataset_raw = pd.read_csv("C:/Users/Ben Bowes/Documents/HRSD GIS/Site Data/Data_2010_2018/MMPS_" + well +
                              "_no_blanks_SI.csv", index_col=None, parse_dates=True, infer_datetime_format=True)

    dataset_raw_np = np.array(dataset_raw)
    dataset_np = dataset_raw_np[:, cols]

    # set up bootstrap parameters
    bootstrap = CircularBlockBootstrap(storm_avg, dataset_np)

    bs_df_list = []
    results = bootstrap.apply(bs_to_df, 1000)

    count = 0
    for bs in bs_df_list:
        if count % 25 == 0:
            print("well", well, "bootstrap:", count)
        f = path + "bs" + str(count) + ".csv"
        bs.to_csv(f)
        count += 1