def circular_block_bootstrap_method(X, Y, block_size=80, n_samples=50): boot_samples = [] bs = CircularBlockBootstrap(block_size, X, y=Y) for samp in bs.bootstrap(n_samples): boot_samples.append((samp[0][0], samp[1]['y'])) return boot_samples
def circular_block_bootstrap_function(self): bootstrap = CircularBlockBootstrap(self.block_size, self.time_series) self.bootstrapped_time_series_arrays = \ np.array([data[0][0] for data in bootstrap.bootstrap(self.bootstrap_sampling_times)]) # reshape the result in the array form self.bootstrapped_time_series_arrays = \ np.reshape(self.bootstrapped_time_series_arrays, (self.bootstrap_sampling_times, len(self.bootstrapped_time_series_arrays[0])))
def generateDatasets(data, n, block_size): newDatasets = [] for i in range(n): data = np.array(data) bs = CircularBlockBootstrap(block_size, data) for d in bs.bootstrap(1): bs_data = d[0][0] bs_data = np.array(bs_data) newDatasets.append(bs_data) return newDatasets
def test_str(self): bs = IIDBootstrap(self.y_series) expected = 'IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)' assert_equal(str(bs), expected) expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')' assert_equal(bs.__repr__(), expected) expected = '<strong>IID Bootstrap</strong>(' + \ '<strong>no. pos. inputs</strong>: 1, ' + \ '<strong>no. keyword inputs</strong>: 0, ' + \ '<strong>ID</strong>: ' + hex(id(bs)) + ')' assert_equal(bs._repr_html(), expected) bs = StationaryBootstrap(10, self.y_series, self.x_df) expected = 'Stationary Bootstrap(block size: 10, no. pos. inputs: 2, no. keyword inputs: 0)' assert_equal(str(bs), expected) expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')' assert_equal(bs.__repr__(), expected) bs = CircularBlockBootstrap(block_size=20, y=self.y_series, x=self.x_df) expected = 'Circular Block Bootstrap(block size: 20, no. pos. inputs: 0, no. keyword inputs: 2)' assert_equal(str(bs), expected) expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')' assert_equal(bs.__repr__(), expected) expected = '<strong>Circular Block Bootstrap</strong>' + \ '(<strong>block size</strong>: 20, ' \ + '<strong>no. pos. inputs</strong>: 0, ' + \ '<strong>no. keyword inputs</strong>: 2,' + \ ' <strong>ID</strong>: ' + hex(id(bs)) + ')' assert_equal(bs._repr_html(), expected)
def test_smoke(self): num_bootstrap = 20 def func(y): return y.mean(axis=0) bs = StationaryBootstrap(13, self.y) cov = bs.cov(func, reps=num_bootstrap) bs = MovingBlockBootstrap(13, self.y) cov = bs.cov(func, reps=num_bootstrap) bs = CircularBlockBootstrap(13, self.y) cov = bs.cov(func, reps=num_bootstrap) bs = MovingBlockBootstrap(10, self.y) cov = bs.cov(func, reps=num_bootstrap) bs = CircularBlockBootstrap(10, self.y) cov = bs.cov(func, reps=num_bootstrap)
def block_bootstrap(series, n_samples, bs_type='Stationary', block_size=10): ''' Computes bootstrapped samples of series. Inputs: series: pandas Series indexed by time n_samples: # bootstrapped samples to output bs_type ('Stationary'): type of bootstrapping to perform. Options include ['Stationary', 'Circular'] block_size: # size of resampling blocks. Should be big enough to capture important frequencies in the series Ouput: DataFrame indexed by sample number and time ''' # Set up list for sampled time-series list_samples = [] # Stationary bootstrapping if bs_type == 'Stationary': bs = StationaryBootstrap(block_size, series) # Count for sample number count = 1 for data in bs.bootstrap(n_samples): df_temp = pd.DataFrame({ 'sample': count, 'time': series.index.values, 'x': data[0][0] }) list_samples.append(df_temp) count += 1 if bs_type == 'Circular': bs = CircularBlockBootstrap(block_size, series) # Count for sample number count = 1 for data in bs.bootstrap(n_samples): df_temp = pd.DataFrame({ 'sample': count, 'time': series.index.values, 'x': data[0][0] }) list_samples.append(df_temp) count += 1 # Concatenate list of samples df_samples = pd.concat(list_samples) df_samples.set_index(['sample', 'time'], inplace=True) # Output DataFrame of samples return df_samples
def test_uneven_sampling(bs_setup): bs = MovingBlockBootstrap(block_size=31, y=bs_setup.y_series, x=bs_setup.x_df) for _, kw in bs.bootstrap(10): assert kw["y"].shape == bs_setup.y_series.shape assert kw["x"].shape == bs_setup.x_df.shape bs = CircularBlockBootstrap(block_size=31, y=bs_setup.y_series, x=bs_setup.x_df) for _, kw in bs.bootstrap(10): assert kw["y"].shape == bs_setup.y_series.shape assert kw["x"].shape == bs_setup.x_df.shape
def test_uneven_sampling(self): bs = MovingBlockBootstrap(block_size=31, y=self.y_series, x=self.x_df) for _, kw in bs.bootstrap(10): assert kw['y'].shape == self.y_series.shape assert kw['x'].shape == self.x_df.shape bs = CircularBlockBootstrap(block_size=31, y=self.y_series, x=self.x_df) for _, kw in bs.bootstrap(10): assert kw['y'].shape == self.y_series.shape assert kw['x'].shape == self.x_df.shape
def test_str(bs_setup): bs = IIDBootstrap(bs_setup.y_series) expected = "IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)" assert_equal(str(bs), expected) expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")" assert_equal(bs.__repr__(), expected) expected = ("<strong>IID Bootstrap</strong>(" + "<strong>no. pos. inputs</strong>: 1, " + "<strong>no. keyword inputs</strong>: 0, " + "<strong>ID</strong>: " + hex(id(bs)) + ")") assert_equal(bs._repr_html(), expected) bs = StationaryBootstrap(10, bs_setup.y_series, bs_setup.x_df) expected = ("Stationary Bootstrap(block size: 10, no. pos. " "inputs: 2, no. keyword inputs: 0)") assert_equal(str(bs), expected) expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")" assert_equal(bs.__repr__(), expected) bs = CircularBlockBootstrap(block_size=20, y=bs_setup.y_series, x=bs_setup.x_df) expected = ("Circular Block Bootstrap(block size: 20, no. pos. " "inputs: 0, no. keyword inputs: 2)") assert_equal(str(bs), expected) expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")" assert_equal(bs.__repr__(), expected) expected = ("<strong>Circular Block Bootstrap</strong>" + "(<strong>block size</strong>: 20, " + "<strong>no. pos. inputs</strong>: 0, " + "<strong>no. keyword inputs</strong>: 2," + " <strong>ID</strong>: " + hex(id(bs)) + ")") assert_equal(bs._repr_html(), expected) bs = MovingBlockBootstrap(block_size=20, y=bs_setup.y_series, x=bs_setup.x_df) expected = ("Moving Block Bootstrap(block size: 20, no. pos. " "inputs: 0, no. keyword inputs: 2)") assert_equal(str(bs), expected) expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")" assert_equal(bs.__repr__(), expected) expected = ("<strong>Moving Block Bootstrap</strong>" + "(<strong>block size</strong>: 20, " + "<strong>no. pos. inputs</strong>: 0, " + "<strong>no. keyword inputs</strong>: 2," + " <strong>ID</strong>: " + hex(id(bs)) + ")") assert_equal(bs._repr_html(), expected)
def cbb_bootstrap(self): """ return paths simulated using the circular block bootstrap params: ------- - self: see above return: ------- - none """ print("\nCIRCULAR BOOTSTRAP \n") bs = CircularBlockBootstrap(self.blocksize, self.data) out_cbb = boot(N_paths=self.n_paths, method=bs, obs_path=self.data, add_noise=self.add_noise) if self.store_sim: self.simulated_paths['CBB'] = out_cbb.iloc[:, :out_cbb. shape[1] if out_cbb. shape[1] < 100 else 100] self.store_output = investment_horizons( observed_path=self.data, sims=out_cbb, investment_horizons=self.ih, sum_stats=self.stats, freq=self.frequency, perf_functions=self.perf_functions, store_output_dic=self.store_output, simulation_tech='CBB', plotting=self.plotting) return None
def block_bootstrap(series, n_samples, bs_type = 'Stationary', block_size = 10 ): ''' Computes block-bootstrap samples of series. Args ---- series: pd.Series Time-series data in the form of a Pandas Series indexed by time n_samples: int Number of bootstrapped samples to output. bs_type: {'Stationary', 'Circular'} Type of block-bootstrapping to perform. block_size: int Size of resampling blocks. Should be big enough to capture important frequencies in the series. Returns ------- pd.DataFrame: DataFrame containing the block-bootstrapped samples of series. Indexed by sample number, then time. ''' # Set up list for sampled time-series list_samples = [] # Stationary bootstrapping if bs_type == 'Stationary': bs = StationaryBootstrap(block_size, series) # Count for sample number count = 1 for data in bs.bootstrap(n_samples): df_temp = pd.DataFrame({'sample': count, 'time': series.index.values, 'x': data[0][0]}) list_samples.append(df_temp) count += 1 if bs_type == 'Circular': bs = CircularBlockBootstrap(block_size, series) # Count for sample number count = 1 for data in bs.bootstrap(n_samples): df_temp = pd.DataFrame({'sample': count, 'time': series.index.values, 'x': data[0][0]}) list_samples.append(df_temp) count += 1 # Concatenate list of samples df_samples = pd.concat(list_samples) df_samples.set_index(['sample','time'], inplace=True) # Output DataFrame of samples return df_samples
def block_bootstrap(df): bstar = opt_block_length(df[['target']], bootstrap_type='Circular', rnd=True) bs = CircularBlockBootstrap(bstar, df) for data in bs.bootstrap(100): print data[0][0] sys.exit()
sectors = list(sorted(set(company_sectors))) df_2 = df.iloc[1:, :] df_2 = df_2.apply(pd.to_numeric) df_2 = np.log(df_2) - np.log(df_2.shift(1)) X = df_2.values[1:, :] num_removal_runs = 1000 no_samples = X.shape[0] p = X.shape[1] X_new = X[0:window_size, 0:70] company_names = company_names[0:70] company_sectors = company_sectors[0:70] p = X_new.shape[1] bs = CircularBlockBootstrap(bootstrap_size, X_new) total_mst_prescence_spearman = np.zeros((p, p)) total_mst_prescence_pearson = np.zeros((p, p)) total_mst_prescence_tau = np.zeros((p, p)) pearson_msts = [] spearman_msts = [] tau_msts = [] pearson_full = [] spearman_full = [] tau_full = [] i = 0 for data in bs.bootstrap(num_removal_runs): print("Run %s" % i) X_bs = data[0][0]
def sim_returns(data, block_size=20, total_sim=10, random_seed=1): retx_sim = data[data['retd'] == 0]['retx'] rs = np.random.RandomState(random_seed) retx_sim_mod = CircularBlockBootstrap(20, retx_sim, random_state=rs) sim = np.zeros((total_sim, len(data), 3)) count = 0 if len(retx_sim) == len(data): for y in retx_sim_mod.bootstrap(total_sim): ls_retx = y[0][0] ls_retx.index = retx_sim.index ls_retx = ls_retx.sort_index() ls_retx[0] = 1 prc = ls_retx * 0 prc[0] = data.prc[0] cum_retx_all = ls_retx[1:].cumprod() prc[1:] = cum_retx_all * prc[0] div_pay = ls_retx * 0 sim[count, :, :] = np.array([prc, ls_retx, div_pay]).T count += 1 else: ls_div_days_between = days_between_div(data['retd']) div_day_cumsum = np.cumsum(ls_div_days_between) ls_div_dt = div_dates(data['retd']) ls_div_to_div_ret, ls_div_to_div_std, ls_div_ret = div_to_div_metrics( ls_div_days_between, data) reg = div_predict_linreg(ls_div_to_div_ret, ls_div_to_div_std, ls_div_ret) for y in retx_sim_mod.bootstrap(total_sim): ls_retx = y[0][0] ls_of_div = np.array(data[data['retd'] != 0]['retd']) ls_of_div_retx = np.array(data[data['retd'] != 0]['retx']) ls_sim_div = [] ls_retx.index = retx_sim.index start = 1 for x in range(len(div_day_cumsum)): cum_retx = np.array(ls_retx[start - 1:div_day_cumsum[x] - 1]).prod() cum_std = np.array(ls_retx[start - 1:div_day_cumsum[x] - 1]).std() pred_div = reg.predict(np.array([[cum_retx], [cum_std]]).T) sim_div_idx = min(ls_of_div, key=lambda i: abs(i - pred_div[0])) sim_rx = ls_of_div_retx[np.where( ls_of_div == sim_div_idx)[0][0]] ls_retx.loc[ls_div_dt[x]] = sim_rx ls_sim_div.append(pred_div[0]) ls_retx = ls_retx.sort_index() ls_retx[0] = 1 prc = ls_retx * 0 prc[0] = data.prc[0] cum_retx_all = ls_retx[1:].cumprod() prc[1:] = cum_retx_all * prc[0] div_pay = ls_retx * 0 div_dic = dict(zip(ls_div_dt, ls_sim_div)) for i in ls_div_dt: div_pay[i] = div_dic[i] sim[count, :, :] = np.array([prc, ls_retx, div_pay]).T count += 1 return sim
def circular_block_bootstrap(block_size, dataset, bootstrap_resampling_times, p, ar_parameters_original, parameters_resampling_times): """Circular Block Bootstrap is adapted re-sampling 100 times for given block size 1. store the parameters in the DataFrame form 2. store the Model Right result in the pd.Series form 3. recall the Model Parameter Comparison test 4. store the results of Model Parameter # parameters_similarity_test_rate: mean of all 1-or-0 matrix # parameters_accuracy_vs_model_right: take the Model Right Test into consideration # parameters_same_test_pass_rate: all parameters pass the Parameters Same Test # pass_2_tests_rate: Pass Model Right Test & Model Same Test""" # 1. re-sample time series bootstrap = CircularBlockBootstrap(block_size, dataset) re_sample = np.array( [k[0][0] for k in bootstrap.bootstrap(bootstrap_resampling_times)]) # re_sample = np.reshape(re_sample, -1) # print('first change for re_sample:\n', re_sample) len_simulation = len(re_sample[0]) re_sample = np.reshape(re_sample, (bootstrap_resampling_times, len_simulation)) sleep(0.05) # 2. store fitted parameters & Model Right results model_residual_test_results_series = pd.Series() model_parameters = pd.DataFrame() for l in np.arange(len(re_sample)): results = ar_model_fit(re_sample[l], p) model_residual_test_results_series.loc[l + 1] = results[1] model_parameters[l + 1] = results[0] model_parameters = model_parameters.transpose() # print("\nbootstrapped model parameters | re-sampling times: \n", model_parameters.head()) # print('\nar model residuals test result: \n', model_residual_test_results_series.head()) sleep(0.05) # 3. recall the Parameters Comparison Test & store the results parameters_similarity_test = \ model_parameters_comparison(model_parameters, ar_parameters_original, model_residual_test_results_series) parameters_similarity_test_individual_rate_list = np.mean( parameters_similarity_test, axis=1) parameters_similarity_test_rate = np.mean( parameters_similarity_test_individual_rate_list) model_right_test_pass_rate = np.mean(model_residual_test_results_series) # print('\nModel Right Test Pass Rate: ', model_right_test_pass_rate) sleep(0.05) counts = 0 for i in np.arange(len(parameters_similarity_test)): if np.sum(parameters_similarity_test.iloc[i, :]) == len( parameters_similarity_test.columns): counts = counts + 1 parameters_same_test_pass_rate = counts / len(parameters_similarity_test) parameters_accuracy_vs_model_right = \ np.sum(parameters_similarity_test_individual_rate_list) / len(model_residual_test_results_series) pass_2_tests_rate = counts / len(model_residual_test_results_series) half_test1_half_test2 = 0.7 * model_right_test_pass_rate + 0.3 * parameters_same_test_pass_rate sleep(0.05) return pass_2_tests_rate, half_test1_half_test2, model_right_test_pass_rate, parameters_same_test_pass_rate, \ parameters_similarity_test_rate, parameters_accuracy_vs_model_right, block_size, parameters_resampling_times, p
if well == "170": cols = [0, 1, 2, 3] storm_avg = 120 if well == "175": cols = [0, 1, 2, 3] storm_avg = 132 # set base path to store results path = "C:/Users/Ben Bowes/PycharmProjects/Tensorflow/mmps" + well + "_bootstraps/" # load dataset dataset_raw = pd.read_csv("C:/Users/Ben Bowes/Documents/HRSD GIS/Site Data/Data_2010_2018/MMPS_" + well + "_no_blanks_SI.csv", index_col=None, parse_dates=True, infer_datetime_format=True) dataset_raw_np = np.array(dataset_raw) dataset_np = dataset_raw_np[:, cols] # set up bootstrap parameters bootstrap = CircularBlockBootstrap(storm_avg, dataset_np) bs_df_list = [] results = bootstrap.apply(bs_to_df, 1000) count = 0 for bs in bs_df_list: if count % 25 == 0: print("well", well, "bootstrap:", count) f = path + "bs" + str(count) + ".csv" bs.to_csv(f) count += 1