def convergence_sensitivity_analysis(): for i in range(len(producer_names)): starting_index = producer_starting_indicies[i] producer = producers[i][starting_index:] injectors_tmp = [injector[starting_index:] for injector in injectors] X, y = production_rate_dataset(producer, *injectors_tmp) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False) for p0 in p0s: crmp = CRMP(p0=deepcopy(p0)) crmp = crmp.fit(X_train, y_train) # Fitting y_hat = crmp.predict(X_train) r2, mse = fit_statistics(y_hat, y_train) fit_data['Producer'].append(i + 1) fit_data['Model'].append(model_namer(crmp)) fit_data['tau_initial'].append(p0[0]) fit_data['tau_final'].append(crmp.tau_) fit_data['f1_initial'].append(p0[1]) fit_data['f1_final'].append(crmp.gains_[0]) fit_data['f2_initial'].append(p0[2]) fit_data['f2_final'].append(crmp.gains_[1]) fit_data['f3_initial'].append(p0[3]) fit_data['f3_final'].append(crmp.gains_[2]) fit_data['f4_initial'].append(p0[4]) fit_data['f4_final'].append(crmp.gains_[3]) fit_data['r2'].append(r2) fit_data['MSE'].append(mse) # Prediction y_hat = crmp.predict(X_test) r2, mse = fit_statistics(y_hat, y_test) predict_data['Producer'].append(i + 1) predict_data['Model'].append(model_namer(crmp)) predict_data['tau_initial'].append(p0[0]) predict_data['tau_final'].append(crmp.tau_) predict_data['f1_initial'].append(p0[1]) predict_data['f1_final'].append(crmp.gains_[0]) predict_data['f2_initial'].append(p0[2]) predict_data['f2_final'].append(crmp.gains_[1]) predict_data['f3_initial'].append(p0[3]) predict_data['f3_final'].append(crmp.gains_[2]) predict_data['f4_initial'].append(p0[4]) predict_data['f4_final'].append(crmp.gains_[3]) predict_data['r2'].append(r2) predict_data['MSE'].append(mse) # Fitting fit_df = pd.DataFrame(fit_data) fit_df.to_csv(fit_ouput_file) # Prediction predict_df = pd.DataFrame(predict_data) predict_df.to_csv(predict_output_file)
def objective_function(): X, y = production_rate_dataset(producers[0], *injectors) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False) crmp = CRMP().fit(X_train, y_train) for i in range(number_of_producers): X, y = production_rate_dataset(producers[i], *injectors) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False) for p0 in param_grid['p0']: crmp.tau_ = p0[0] crmp.gains_ = p0[1:] y_hat = crmp.predict(X_test) r2, mse = fit_statistics(y_hat, y_test) objective_function_data['Producer'].append(i + 1) objective_function_data['tau'].append(p0[0]) objective_function_data['f1'].append(p0[1]) objective_function_data['f2'].append(p0[2]) objective_function_data['r2'].append(r2) objective_function_data['MSE'].append(mse) objective_function_df = pd.DataFrame(objective_function_data) objective_function_df.to_csv(objective_function_file)
def fit_individual_initial_guesses(): producer = producers[2] X, y = production_rate_dataset(producer, *injectors) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, shuffle=False) crmp = CRMP(p0=[1e-03, 0.4, 0.6]) crmp = crmp.fit(X_train, y_train) y_hat = crmp.predict(X_train) r2, mse = fit_statistics(y_hat, y_train) print(crmp.tau_) print(crmp.gains_) print(mse)
def minimum_train_size(): data_sizes = np.linspace(1, 148, 148).astype(int) for data_size in data_sizes: X, y = production_rate_dataset(producers[3], *injectors) X_train, X_test, y_train, y_test = train_test_split( X, y, train_size=data_size, shuffle=False) crmp = CRMP(p0=[5, 0.5, 0.5]) crmp = crmp.fit(X_train, y_train) y_hat = crmp.predict(X_test) r2, mse = fit_statistics(y_hat, y_test) if np.log(mse) < 11.0581424463: print(data_size) print(mse) return
def test_model(X, y, model, test_split): r2_sum, mse_sum = 0, 0 length = len(test_split) y_hat = [] time_step = [] for train, test in test_split: x_train, x_test = X[train], X[test] y_train, y_test = y[train], y[test] model.fit(x_train, y_train) y_hat_i = model.predict(x_test) y_hat.append(y_hat_i) time_step.append(test) r2_i, mse_i = fit_statistics(y_hat_i, y_test) r2_sum += r2_i mse_sum += mse_i r2 = r2_sum / length mse = mse_sum / length return (r2, mse, y_hat, time_step)
history = model.fit(X_train_scaled, y_train_scaled, epochs=epoch, batch_size=batch, validation_split=0.1, verbose=0) y_hat_lstm = [] for j in range(30): y_hat_j = model.predict(X_test_scaled[j:(j + 1)])[0][0] X_test_scaled[j + 1] = y_hat_j y_hat_lstm.append(y_hat_j) y_hat_lstm = np.array(y_hat_lstm).reshape(-1, 1) y_hat_lstm = scaler.inverse_transform(y_hat_lstm) r2, mse = fit_statistics(y_hat_lstm, y_test[:30]) print(mse) crmp = CRMP().fit(X_train, y_train) y_hat_crmp = crmp.predict(X_test[:30, 1:]) r2, mse = fit_statistics(y_hat_crmp, y_test[:30]) print(mse) t = np.linspace(76, 105, 30) plt.plot(t, y_test[:30], color='k', label='True Value', linewidth=2) plt.plot(t, y_hat_crmp, alpha=0.5, label='CRMP', linewidth=2) plt.plot(t, y_hat_lstm, alpha=0.5, label='LSTM Neural Network', linewidth=2) plt.tight_layout() plot_helper( FIG_DIR, title='{}: 30 Days Prediction for CRMP and LSTM Neural Network'.format(
return np.log(column + 1) std = 25 for estimator in estimators: print(estimator) for i in range(number_of_producers): producer = producers[i] producer += np.random.normal(loc=0.0, scale=std, size=len(producer)) X, y = production_rate_dataset(producer, *injectors) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, shuffle=False) model = estimator.fit(X_train, y_train) y_hat = [] # y_hat = model.predict(X_test[:30, 1:]) y_hat_i = y_train[-1] for i in range(30): X_test_i = X_test[i, :] X_test_i[0] = y_hat_i X_test_i = X_test_i.reshape(1, -1) y_hat_i = model.predict(X_test_i) y_hat.append(y_hat_i) r2, mse = fit_statistics(y_hat, y_test[:30]) print(mse) print(min(y_hat)) print() print() print()
def test_not_enough_data(self): y_hat = [1] y = [1] r2, mse = fit_statistics(y_hat, y) assert (r2 in [np.nan])
def test_fit_statistics(self): y_hat = [1, 2, 3] y = [1, 3, 4] stats = fit_statistics(y_hat, y) assert (len(stats) == 2) assert (None not in stats)
def convergence_sensitivity_analysis(): t = time[1:] iterations = 0 for i in range(number_of_producers): X, y = production_rate_dataset(producers[i], *injectors) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, shuffle=False) train_length = len(y_train) test_length = len(y_test) train_time = t[:train_length] test_time = t[train_length:] # plt.plot(train_time, y_train, c='r', label='Fit') # plt.plot(test_time, y_test, c='g', label='Predict') # plt.plot(t, y, c='k', label='Actual') for p0 in param_grid['p0']: crmp = CRMP(p0=deepcopy(p0)) crmp = crmp.fit(X_train, y_train) # Fitting y_hat = crmp.predict(X_train) # plt.plot(train_time, y_hat, alpha=0.01, c='r', linewidth=2) r2, mse = fit_statistics(y_hat, y_train) fit_data['Producer'].append(i + 1) fit_data['Model'].append(model_namer(crmp)) fit_data['tau_initial'].append(p0[0]) fit_data['tau_final'].append(crmp.tau_) fit_data['f1_initial'].append(p0[1]) fit_data['f1_final'].append(crmp.gains_[0]) fit_data['f2_initial'].append(p0[2]) fit_data['f2_final'].append(crmp.gains_[1]) fit_data['r2'].append(r2) fit_data['MSE'].append(mse) # Prediction y_hat = crmp.predict(X_test) # plt.plot(test_time, y_hat, alpha=0.01, c='g', linewidth=2) r2, mse = fit_statistics(y_hat, y_test) predict_data['Producer'].append(i + 1) predict_data['Model'].append(model_namer(crmp)) predict_data['tau_initial'].append(p0[0]) predict_data['tau_final'].append(crmp.tau_) predict_data['f1_initial'].append(p0[1]) predict_data['f1_final'].append(crmp.gains_[0]) predict_data['f2_initial'].append(p0[2]) predict_data['f2_final'].append(crmp.gains_[1]) predict_data['r2'].append(r2) predict_data['MSE'].append(mse) iterations += 1 print(iterations) # plt.vlines(76, 0, 1000, linewidth=1, alpha=0.8) # plt.title(producer_names[i]) # plt.xlabel('Time') # plt.ylabel('Production Rate') # plt.legend() # plt.show() # Fitting fit_df = pd.DataFrame(fit_data) fit_df.to_csv(fit_ouput_file) # Prediction predict_df = pd.DataFrame(predict_data) predict_df.to_csv(predict_output_file)
def train_bagging_regressor_with_crmp(): train_sizes = [0.33, 0.735, 0.49, 0.45, 0.52, 0.66, 0.54] # for i in range(len(producer_names) - 1): n_estimators = 100 delta_t = 1 for i in [0, 1, 2, 3, 4, 6]: # Constructing dataset name = producer_names[i] print(name) producer = get_real_producer_data(producers_df, name, bhp=True) injectors = injectors_df[['Name', 'Date', 'Water Vol']] X, y = construct_real_production_rate_dataset(producer, injectors, delta_t=delta_t) X_train, X_test, y_train, y_test = train_test_split( X, y, train_size=train_sizes[i], shuffle=False) X_train = X_train.to_numpy() X_test = X_test.to_numpy() y_train = y_train.to_numpy() y_test = y_test.to_numpy() train_length = len(X_train) t_fit = np.linspace(0, train_length - 1, train_length) t_test = np.linspace(train_length, (train_length + 29), 30) # Setting up estimator bgr = MBBaggingRegressor(base_estimator=CrmpBHP(delta_t=delta_t), n_estimators=n_estimators, block_size=7, bootstrap=True, n_jobs=-1, random_state=0) bgr.fit(X_train, y_train) model = CrmpBHP().fit(X_train, y_train) y_fits = [] for e in bgr.estimators_: y_hat_i = [] for i in range(len(y_train)): e.q0 = X_train[i, 0] y_hat_i.append(e.predict(np.array([X_train[i, 1:]]))) y_fits.append(y_hat_i) y_fits_by_time = np.asarray(y_fits).T.reshape(-1, n_estimators) y_fits_average = [] for y_hats_i in y_fits_by_time: average = np.average(y_hats_i) y_fits_average.append(average) r2, mse = fit_statistics(y_fits_average, y_train) # Getting all bootstrapped predictions y_hats = [] for e in bgr.estimators_: e.q0 = y_train[-1] y_hat_i = e.predict(X_test[:30, 1:]) y_hats.append(y_hat_i) y_hats_by_time = np.asarray(y_hats).T p10s = [] averages = [] p90s = [] for y_hats_i in y_hats_by_time: p10 = np.percentile(y_hats_i, 10) average = np.average(y_hats_i) p90 = np.percentile(y_hats_i, 90) p10s.append(p10) averages.append(average) p90s.append(p90) mse = fit_statistics(y_test[:30], averages)[1] max_train = np.amax(y_train[-100:]) max_fit = np.amax(y_fits_average[-100:]) max_realization = np.amax(y_hats) height = max(max_train, max_fit, max_realization) # Plotting plt.plot(t_fit[-100:], y_train[-100:], color='k') plt.plot(t_fit[-100:], y_fits_average[-100:], color='g', label='Fitting') plt.plot(t_test, y_test[:30], color='k', label='True Value') plt.plot(t_test, averages, color='b', label='Average') plt.plot(t_test, p10s, color='r', alpha=0.5, label='P10 & P90') plt.plot(t_test, p90s, color='r', alpha=0.5) for hat in y_hats: plt.plot(t_test, hat, color='k', alpha=0.02) plt.annotate('r-squared = {:.4f}'.format(r2), xy=(train_length - 60, height)) plt.vlines(train_length - 1, 0, height, linewidth=2, colors='k', linestyles='dashed', alpha=0.8) plot_helper(FIG_DIR, title='{}: 30 Days Prediction'.format(name), xlabel='Days', ylabel='Production Rate [bbls/day]', legend=True, save=True)
def evaluate_crmp_bhp_model(): iteration = 0 for name in producer_names: print('Producer Name: ', name) producer = get_real_producer_data(producers_df, name, bhp=True) injectors = injectors_df[['Name', 'Date', 'Water Vol']] X, y = construct_real_production_rate_dataset(producer[['Date', name]], injectors, producer['delta_p']) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.40, shuffle=False) X_train = X_train.to_numpy() X_test = X_test.to_numpy() y_train = y_train.to_numpy() y_test = y_test.to_numpy() for p0 in p0s: iteration += 1 print('Iteration: {}'.format(iteration)) crmpbhp = CrmpBHP(p0=deepcopy(p0)) crmpbhp = crmpbhp.fit(X_train, y_train) # Fitting # y_hat = crmpbhp.predict(X_train) # r2, mse = fit_statistics(y_hat, y_train, shutin=True) # fit_data['Producer'].append(name) # fit_data['Model'].append(model_namer(crmpbhp)) # fit_data['tau_initial'].append(p0[0]) # fit_data['tau_final'].append(crmpbhp.tau_) # fit_data['f1_initial'].append(p0[1]) # fit_data['f1_final'].append(crmpbhp.gains_[0]) # fit_data['f2_initial'].append(p0[2]) # fit_data['f2_final'].append(crmpbhp.gains_[1]) # fit_data['f3_initial'].append(p0[3]) # fit_data['f3_final'].append(crmpbhp.gains_[2]) # fit_data['f4_initial'].append(p0[4]) # fit_data['f4_final'].append(crmpbhp.gains_[3]) # fit_data['r2'].append(r2) # fit_data['MSE'].append(mse) # Prediction y_hat = crmpbhp.predict(X_test[:30, 1:]) r2, mse = fit_statistics(y_hat, y_test[:30], shutin=True) predict_data['Producer'].append(name) predict_data['Model'].append(model_namer(crmpbhp)) predict_data['tau_initial'].append(p0[0]) predict_data['tau_final'].append(crmpbhp.tau_) predict_data['f1_initial'].append(p0[1]) predict_data['f1_final'].append(crmpbhp.gains_[0]) predict_data['f2_initial'].append(p0[2]) predict_data['f2_final'].append(crmpbhp.gains_[1]) predict_data['f3_initial'].append(p0[3]) predict_data['f3_final'].append(crmpbhp.gains_[2]) predict_data['f4_initial'].append(p0[4]) predict_data['f4_final'].append(crmpbhp.gains_[3]) predict_data['r2'].append(r2) predict_data['MSE'].append(mse) # Fitting fit_df = pd.DataFrame(fit_data) fit_df.to_csv(fit_output_file) # Prediction predict_df = pd.DataFrame(predict_data) predict_df.to_csv(predict_output_file)