def prepare_fly(Fmonth, Dmonth1, Dmonth2, year, period, instType='spread'): Fmonth, Fmonth_n, Fmonth_d = load_data(Fmonth, year, interval='1min') Fmonth = resamplePeriod(Fmonth, period=period) Dmonth1, Dmonth_n1, Dmonth_d1 = load_data(Dmonth1, year, interval='1min') Dmonth1 = resamplePeriod(Dmonth1, period=period) Dmonth2, Dmonth_n2, Dmonth_d2 = load_data(Dmonth2, year, interval='1min') Dmonth2 = resamplePeriod(Dmonth2, period=period) x = Fmonth.loc[Fmonth.index.intersection(Dmonth1.index.values), :].dropna() y = Dmonth1.loc[Dmonth1.index.intersection(x.index.values), :] z = Dmonth2.loc[Dmonth2.index.intersection(x.index.values), :].dropna() x = x.loc[x.index.intersection(z.index.values), :].dropna() y = y.loc[y.index.intersection(z.index.values), :].dropna() s1 = (x - y).Close s2 = (y - z).Close if instType == 'spread': return s1, s2 if instType == 'fly': return (s1 - s2).dropna()
from DataProcessing import load_data from DataProcessing import split_data from DataProcessing import encode_class_labels from DataProcessing import report_results from DataProcessing import extract_feats_from_text from DataProcessing import extract_feats_from_text_and_desc from sklearn.model_selection import GridSearchCV from sklearn.naive_bayes import MultinomialNB JOBS = 4 PARAMS = [{'alpha': [8, 4, 2, 1, 0.5, 0.25, 0.1, 0.07, 0.05, 0.03, 0.01, 0.001]}] df = load_data() x_train, x_test , index_train1, index_test1 = split_data() y_train, class_names = encode_class_labels(x_train) y_test, class_names1 = encode_class_labels(x_test) print("Features only from Text") X_train, X_test = extract_feats_from_text() grid_search = GridSearchCV(MultinomialNB(), PARAMS, n_jobs=JOBS, verbose=5, cv=4, scoring="f1") grid_search.fit(X_train, y_train) report_results(grid_search, y_train, X_train, y_test, X_test, class_names)
from sklearn.model_selection import GridSearchCV import matplotlib.pyplot as plt import seaborn as sns plt.rc("font", size=14) sns.set(style="white") sns.set(style="whitegrid", color_codes=True) PARAMS = [{ 'penalty': ["l1", "l2"], 'C': [4, 2, 1.5, 1, 0.5, 0.1, 0.05, 0.01, 0.001, 0.0001] }] JOBS = 4 data = load_data() # print(list(data.columns)) x_train, x_test, index_train1, index_test1 = split_data() # print(data.shape) # print(x_test.shape) # print(x_train.shape) # array = ['male', 'female'] # data1 = x_test.loc[:, 'gender'].values # data2 = x_train.loc[:, 'gender'].values y_train, class_names_train = encode_class_labels(x_train) y_test, class_names_test = encode_class_labels(x_test) # print(len(data1))
cross_validator = CrossVal(X, y, 10, model) cross_validator.fit() test_mse = np.mean(cross_validator.test_mses) train_mse = np.mean(cross_validator.train_mses) results.append([p, train_mse, test_mse]) return pd.DataFrame(results, columns=['polynomials', 'train_mse', 'test_mse']) if __name__ == "__main__": print("\n### OLS ### \n") X, y = load_data() model = OLS() # Running trials for different polynomials results = test_diffent_polynomials(X, y) results.to_csv(cfg.RESULT_FOLDER.joinpath('OLS_polynomial_results.csv')) best_poly = results[results.test_mse == results.test_mse.min()].polynomials.tolist()[0] model_summary(model, add_polynomials(X, best_poly), y) bias_variance_plot(results.polynomials, results.train_mse, results.test_mse, save=True)