def storeOLSPrediction(): data = list( IgnitionRow.objects.all().order_by('-pub_date')[:1].values()) two_hours = data[::-1] data = pd.DataFrame(two_hours) data['pub_date'] = data.apply(lambda x: str(x['pub_date']), axis=1) data['pub_date_struct'] = data.apply( lambda x: time.strptime(x['pub_date'], "%Y-%m-%d %H:%M:%S.%f%z"), axis=1) data.index = data.apply( lambda x: datetime.fromtimestamp(mktime(x['pub_date_struct'])), axis=1) data['hour'] = data.apply(lambda x: str( time.strptime(x['pub_date'], "%Y-%m-%d %H:%M:%S.%f%z")[3]), axis=1) data['day_of_week'] = data.index.map(lambda x: x.weekday()) data['hour'] = pd.Categorical(data['hour'], categories=list(range(24))) data['day_of_week'] = pd.Categorical(data['day_of_week'], categories=list(range(7))) hour_dummies = pd.get_dummies(data['hour'], drop_first=True) hour_dummies.columns = [ 'h' + str(elem) for elem in hour_dummies.columns ] day_of_week_dummies = pd.get_dummies(data['day_of_week'], drop_first=True) day_of_week_dummies.columns = [ 'dow' + str(elem) for elem in day_of_week_dummies.columns ] data = pd.concat((data, hour_dummies, day_of_week_dummies), axis=1) results5 = OLSResults.load("regression_models/ols_9_21_data_5.pickle") results25 = OLSResults.load( "regression_models/ols_9_21_data_25.pickle") results50 = OLSResults.load( "regression_models/ols_9_21_data_50.pickle") results200 = OLSResults.load( "regression_models/ols_9_21_data_200.pickle") results500 = OLSResults.load( "regression_models/ols_9_21_data_500.pickle") preds5 = results5.predict(data) preds25 = results25.predict(data) preds50 = results50.predict(data) preds200 = results200.predict(data) preds500 = results500.predict(data) preds = [preds5, preds25, preds50, preds200, preds500] print("OLS PREDICTIONS: {}".format(preds)) d = IgnitionRowPredictionOLS(num_players_5=preds[0], num_players_25=preds[1], num_players_50=preds[2], num_players_200=preds[3], num_players_500=preds[4], pub_date=timezone.now()) d.save()
def main(models): #models is the list of model names r_squareds = [] coefficients = [] standerd_errors = [] residuals = [] for model in models: results = OLSResults.load( f'regression_models/return_predictions/{coin}/{model}') r_squareds.append(results.rsquared) coefficients.append(tuple(results.params)) standerd_errors.append(tuple(results.bse)) residuals.append(results.df_resid) # model_dict.update({model, [results.rsquared, results.params,results.bse]}) model_dict = { 'r_squareds': r_squareds, 'coefficients': coefficients, 'standerd_errors': standerd_errors, 'residuals': residuals } print(model_dict) model_df = pd.DataFrame(model_dict, index=models).sort_values('r_squareds', ascending=False) print(model_df) model_df.to_csv(f'results/return_predictions/{coin}_model_results.csv') print('model saved to "model_df.csv"')
def unpickle(self): pkl_file = open('uuid_to_key.pickle', 'rb') self.uuid_to_key = pickle.load(pkl_file) pkl_file.close() pkl_file = open('key_to_uuid.pickle', 'rb') self.key_to_uuid = pickle.load(pkl_file) pkl_file.close() for name in self.allModelName: key = name[:-7] key = self.uuid_to_key[key] model = OLSResults.load('model/' + name) self.models[key] = model
def main(models, coin, dependent_variable='return'): #models is the list of model names r_squareds = [] coefficients = [] standerd_errors = [] residuals = [] times = [] formulas = [] for model in models: results = OLSResults.load( f'regression_models/{dependent_variable}_predictions/{coin}/{model}' ) r_squareds.append(results.rsquared) # coefficients.append(tuple(results.params)) # standerd_errors.append(tuple(results.bse)) coefficients.append(results.params) standerd_errors.append(results.bse) residuals.append(results.df_resid) times.append(get_time(model)) formulas.append(get_formula(model)) #puts all the import variables into a dictinary model_dict = { 'r_squareds': r_squareds, 'coefficients': coefficients, 'standerd_errors': standerd_errors, 'residuals': residuals, 'times': times, 'formulas': formulas } model_df = pd.DataFrame(model_dict, index=models).sort_values('r_squareds', ascending=False) model_df.to_pickle( f'results/{dependent_variable}_predictions/{coin}_model_results.pickle' ) print(f'model saved to {coin}_model_results.pickle')
def get_prediction(centered_filtered, col, side=None): model = load_model("models/{}_best.pb".format(col)) correction_model = OLSResults.load("models/{}_correction.pb".format(col)) maps = { "KneeFlex_maxExtension": (-29.4408212510502, 114.8431545843835), "GDI": (36.314492983907, 77.03271217530302), # singlesided "gmfcs": (1, 3), "speed": (0.0718863507111867, 1.5259117583433834), "cadence": (0.222, 1.71556665023985), "SEMLS_dev_residual": (-0.8205001909638112, 3.309054961371647) } def undo_scaling(y, target_min, target_range): return y * target_range + target_min preds = [] video_len = centered_filtered.shape[0] cols = x_columns if side == "L": cols = x_columns_left if side == "R": cols = x_columns_right samples = [] for nstart in range(0, video_len - 124, 31): samples.append(centered_filtered[nstart:(nstart + 124), cols]) X = np.stack(samples) p = model.predict(X)[:, 0] p = undo_scaling(p, maps[col][0], maps[col][1]) p = np.transpose(np.vstack([p, np.ones(p.shape[0])])) p = correction_model.predict(pd.DataFrame(p)) # reset_keras()# Shouldn't be needed anymore return np.mean(p)
def __init__(self, model_file_name, test_file_name): self.model_file_name = model_file_name self.test_file_name = test_file_name self.model = OLSResults.load(self.model_file_name) self.testing_set = pd.read_csv(self.test_file_name) self.prediction = []
from statsmodels.regression.linear_model import OLSResults from sklearn.externals import joblib # Load data -------------------------------------------------------------------------------- # Load batter and player information df_player_id = pd.read_csv("df_player_id.csv", index_col=0) df_batting_fromsc_250pa_prop_events = pd.read_csv( "df_batting_fromsc_250pa_prop_events.csv", index_col=0) df_pitching_fromsc_500pa_prop_events = pd.read_csv( "df_pitching_fromsc_500pa_prop_events.csv", index_col=0) # Load models sm_est_model_onbase_loaded = OLSResults.load( "sm_est_model_onbase_saved.pickle") sm_est_model_walk_loaded = OLSResults.load("sm_est_model_walk_saved.pickle") sm_est_model_single_loaded = OLSResults.load( "sm_est_model_single_saved.pickle") sm_est_model_double_loaded = OLSResults.load( "sm_est_model_double_saved.pickle") sm_est_model_homerun_loaded = OLSResults.load( "sm_est_model_homerun_saved.pickle") sm_est_model_strikeout_loaded = OLSResults.load( "sm_est_model_strikeout_saved.pickle") # Load scalers scale_onbase_loaded = joblib.load("scale_onbase_saved.pickle") scale_walk_loaded = joblib.load("scale_walk_saved.pickle") scale_single_loaded = joblib.load("scale_single_saved.pickle") scale_double_loaded = joblib.load("scale_double_saved.pickle")
def unpickle(self): for name in self.allModelName: key = name[:-7] model = OLSResults.load('model/' + name) self.models[key] = model
import statsmodels.api as sm from statsmodels.regression.linear_model import OLSResults import numpy as np nsample = 100 x = np.linspace(0, 10, 100) X = np.column_stack((x, x**2)) beta = np.array([1, 0.1, 10]) e = np.random.normal(size=nsample) X = sm.add_constant(X) y = np.dot(X, beta) + e model = sm.OLS(y, X) results = model.fit() results.save("example.pickle") new_results = OLSResults.load("example.pickle") print(new_results.summary())
sd = setup_data(gf, instrument=instrument, pricediff=True, log=True, trading=True) sd.head() sd['intercept'] = 1 models = {} models['HHLL_LogDiff USD_JPY_highMid-1'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid-1.h5') models['HHLL_LogDiff USD_JPY_highMid-2'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid-2.h5') models['HHLL_LogDiff USD_JPY_highMid-3'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid-3.h5') models['HHLL_LogDiff USD_JPY_highMid-4'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid-4.h5') models['HHLL_LogDiff USD_JPY_highMid-5'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid-5.h5') models['HHLL_LogDiff USD_JPY_highMid-6'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid-6.h5') models['HHLL_LogDiff USD_JPY_lowMid-1'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_lowMid-1.h5') models['HHLL_LogDiff USD_JPY_lowMid-2'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_lowMid-2.h5') models['HHLL_LogDiff USD_JPY_lowMid-3'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_lowMid-3.h5') models['HHLL_LogDiff USD_JPY_lowMid-4'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_lowMid-4.h5') models['HHLL_LogDiff USD_JPY_lowMid-5'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_lowMid-5.h5') models['HHLL_LogDiff USD_JPY_lowMid-6'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_lowMid-6.h5') models['HHLL_LogDiff USD_JPY_highMid0'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid0.h5') models['HHLL_LogDiff USD_JPY_highMid1'] = OLSResults.load('./src/models/HHLL_LogDiff USD_JPY_highMid1.h5')
def load_model(self, filename): filepath = path.join(self.PICKLES_PATH, filename) with open(filepath, 'rb') as f: model = OLSResults.load(f) return model
plt.plot(df_rates["RPC1"] / df_rates["RPC2"]) plt.show() plt.savefig('Rates_ratio_' + str(aRun) + '_' + args.do_fit + '.pdf') plt.close() #X=df_rates[['DT1','DT2','DT3','DT4']] X = df_rates[['RPC2', 'RPC3', 'RPC4']] y = df_rates['RPC1'] if fit is True: results = smf.ols('RPC1 ~ RPC2 + RPC3 + RPC4', df_rates).fit() results.save("model.pickle") else: from statsmodels.regression.linear_model import OLSResults results = OLSResults.load("model.pickle") print(results.summary()) res = results.predict(X) #print(res) #print(y) xy = np.vstack([y, res]) z = gaussian_kde(xy)(xy) fig, ax = plt.subplots() sc = ax.scatter(y, res, c=z, s=100, edgecolor='') plt.title('Predicted vs measured RPC rate') plt.ylabel('Predicted rate') plt.xlabel('Measured rate')
def _initialize_log_linear_risk_model(self, referenceName, modelName): modelResults = OLSResults.load("microsim/data/" + modelName + ".pickle") self._repository[referenceName] = LogLinearRiskFactorModel( referenceName, modelResults.params, modelResults.bse, modelResults.resid)
def OLS_realtime(X_test, OLS_name): OLS_name = '../datasets.nosync/' + OLS_name linear_results = OLSResults.load(OLS_name) ols_predict = linear_results.predict(X_test) return (ols_predict)
pred5.corr(wcat["AT"]) #plt.scatter(x=wcat["Waist"], y=wcat["AT"], color="blue");plt.xlabel="Waist";plt.ylabel="AT" #plt.plot(wcat["Waist"], pred5, color="red") res5 = wcat.AT - pred5 sqres5 = res5 * res5 mse5 = np.mean(sqres5) rmse5 = np.sqrt(mse5) #from sklearn.linear_model import LinearRegression model5.save("slr_wcat.pkl") from statsmodels.regression.linear_model import OLSResults model = OLSResults.load("slr_wcat.pkl") #type(new_results) # saving model to disk #pickle.dump(model5, open("slr_wcat.pkl","wb")) # loading model to compare results #slr_wcat = pickle.load(open("slr_wcat.pkl", "rb")) x = np.exp( model.predict( pd.DataFrame([[36, 1296, 46656]], columns=["Waist", "Waist_sq", "Waist_cb"]))) print(float(round(x, 2))) #print(round(np.exp(model5.predict(pd.DataFrame([[80,6400,512000]], columns=["Waist", "Waist_sq", "Waist_cb"]))),2))