def fit_evaluate(self, X_train, y_train, X_test, y_test): # Do the regression -- change this if it to slow. coeffs = fit_pixel(X_train, y_train) print( 'Finished fitting pixel test data in load_transform_fit after {} seconds' .format(timer() - self.timer_start)) y_test_pred = predict_pixel(X_test, coeffs) y_train_pred = predict_pixel(X_train, coeffs) if self.config['sigmoid']: y_test_pred = inverse_sigmoid(y_test_pred) y_train_pred = inverse_sigmoid(y_train_pred) self.score['mse_test'] = mean_squared_error(y_test, y_test_pred)[0] self.score['ase_test'] = accumulated_squared_error( y_test, y_test_pred)[0] self.score['r2_test'] = r2_score(y_test, y_test_pred)[0] self.score['mse_train'] = mean_squared_error(y_train, y_train_pred)[0] self.score['ase_train'] = accumulated_squared_error( y_train, y_train_pred)[0] self.score['r2_train'] = r2_score(y_train, y_train_pred)[0] print( 'Finished computing mse, ase, r2 data in load_transform_fit after {} seconds' .format(timer() - self.timer_start)) return coeffs,
def fit(self, X, z, split_size): """Searches for the optimal hyperparameter combination.""" # model and params are now lists --> sende med navn istedenfor. # Setup self.results = {self.name: []} self.train_scores_mse, self.test_scores_mse = [], [] self.train_scores_r2, self.test_scores_r2 = [], [] # Splitting our original dataset into test and train. X_train, X_test, z_train, z_test = train_test_split( X, z, split_size=split_size, random_state=105) " Returning these dictionaries to plot mse vs model" self.mse_test = [] self.mse_train = [] self.r2_test = [] self.r2_train = [] self.z_pred = [] self.coef_ = [] # For en model tester vi alle parameterne og returnerer denne. for param in self.params: estimator = self.model(lmd=param) # Train a model for this pair of lambda and random state estimator.fit(X_train, z_train) temp = estimator.predict(X_test) temp2 = estimator.predict(X_train) self.mse_test.append(mean_squared_error(z_test, temp)) self.mse_train.append(mean_squared_error(z_train, temp2)) self.r2_test.append(r2_score(z_test, temp)) self.r2_train.append(r2_score(z_train, temp2)) self.z_pred.append(temp) self.coef_.append(estimator.coef_) return self
def fit(self, X_train, y_train, X_test=None, y_test=None): """ Learn weights from training data. Parameters ----------- X_train : array, shape = [n_samples, n_features] Input layer with original features. y_train : array, shape = [n_samples] Target class labels or data we want to fit. X_test : array, shape = [n_samples, n_features] Sample features for validation during training. y_test : array, shape = [n_samples] Sample labels/data for validation during training. Returns: ---------- self """ self.initialize_weights_and_bias(X_train) #print(self.W_out.shape) # for progress formatting epoch_strlen = len(str(self.epochs)) self.eval_ = {'cost': [], 'train_preform': [], 'valid_preform': []} # iterate over training epochs for epoch in range(self.epochs): # Includes forward + backward prop. self._minibatch_sgd(X_train, y_train) # Evaluation after each epoch during training z_h, a_h, z_out, a_out = self._forwardprop(X_train) y_train_pred = self.predict(X_train) y_test_pred = self.predict(X_test) y_test = y_test.reshape((len(y_test), 1)) y_train = y_train.reshape((len(y_train), 1)) if (self.tpe == "regression"): # Cost without penalty (y-X.dot(self.W_out)).T.dot(y-X.dot(self.W_out)) train_preform = mean_squared_error(y_train, y_train_pred) valid_preform = mean_squared_error(y_test, y_test_pred) self.eval_['train_preform'].append(train_preform) self.eval_['valid_preform'].append(valid_preform) elif (self.tpe == "logistic"): #Calculate accuracy acc_test = np.sum(y_test == y_test_pred) / len(y_test) acc_train = np.sum(y_train == y_train_pred) / len(y_train) self.eval_['train_preform'].append(acc_train) self.eval_['valid_preform'].append(acc_test) return self
def fit(self, X, z, split_size): """Searches for the optimal hyperparameter combination.""" # model and params are now lists --> sende med navn istedenfor. # Setup self.results = {self.name: []} self.train_scores_mse, self.test_scores_mse= [], [] self.train_scores_r2, self.test_scores_r2 = [], [] # Splitting our original dataset into test and train. X_train, X_test, z_train, z_test = train_test_split(X, z, test_size = split_size, feature_scale = self.feature_scale) """ Returning these dictionaries to plot (standardized response) mse, r2 vs model""" self.mse_test = [] self.mse_train = [] self.r2_test = [] self.r2_train = [] self.z_pred = [] self.coef_ = [] # For en model tester vi alle parameterne og returnerer denne. for param in self.params: estimator = self.model(lmd=param) # Train a model for this pair of lambda and random state estimator.fit(X_train, z_train) temp = estimator.predict(X_test) temp2 = estimator.predict(X_train) # Tranforming values which left the predictor space back into the predictor space # Special case for this cloud cover predictions. temp = transforming_predictorspace(temp) temp2 = transforming_predictorspace(temp2) # Standardizing the response in order to make the performance metrics comparable. temp = standardicing_responce(temp) temp2 = standardicing_responce(temp2) #n,p = np.shape(X_train) Only nessesary for using adjusted r2 score. z_test = standardicing_responce(z_test) z_train = standardicing_responce(z_train) self.mse_test.append(mean_squared_error(z_test, temp)) self.mse_train.append(mean_squared_error(z_train, temp2)) self.r2_test.append(r2_score(z_test, temp)) self.r2_train.append(r2_score(z_train, temp2)) self.z_pred.append(temp) self.coef_.append(estimator.coef_) return self
def test_mse(): from utils import mean_squared_error result = [] x = np.linspace(-1, 1, num=20).reshape(-1, 1) y_true = x * x y_pred = x * x - 2 * x mse = mean_squared_error(y_true.flatten().tolist(), y_pred.flatten().tolist()) result.append(mse) result.append( mean_squared_error( np.random.normal(size=(10, )).flatten().tolist(), np.random.normal(size=(10, )).flatten().tolist())) return ['[TEST mean_squared_error],' + weights_to_string(result)]
def trimmed_model(data_name, label, lower_name, upper_name, model_name): lower = np.load(lower_name) upper = np.load(upper_name) x1, y1 = load_data("data/" + data_name) x, y = load_data("data/" + data_name, lower, upper) mod = import_module(model_name) model = mod.model # model trained_model = fit_n(5, model, losses.mean_squared_error, x, y) ym1 = trained_model.predict(x1) trained_model.save( "{model}_{data}_{label}_trimmed_{lower}_{upper}.h5".format( model=model_name, data=data_name, label=label, lower=lower_name, upper=upper_name)) np.save( "{model}_{data}_{label}_y_trimmed_{lower}_{upper}".format( model=model_name, data=data_name, label=label, lower=lower_name, upper=upper_name), ym1) # calculate final loss ym1 = ym1.squeeze() click.echo("MSE: {}".format(mean_squared_error(y1, ym1))) click.echo("TMSE: {}".format(trimmed_mean_squared_error(y1, ym1)))
def main(): X, y = make_regression(n_samples=100, n_features=1, noise=20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) model = LinearRegression(n_iterations=100) model.fit(X_train, y_train) #training error plot n = len(model.training_errors) training,=plt.plot(range(n),model.training_errors, label="training error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel("Mean Squared Error") plt.xlabel("Iterations") plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s"%mse) y_pred_line = model.predict(X) #color map cmap = plt.get_cmap("viridis") #plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9),s = 10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366*X, y_pred_line, color="black", linewidth=2, label="Prediction") plt.suptitle("Linear Regression") plt.title("MSE: %.2f"% mse, fontsize=10) plt.xlabel("Day") plt.ylabel("Temperature in Celcius") plt.legend((m1, m2),("Training data", "Test data"), loc="lower right") plt.show()
def main(): print '-- Grandient Boosting Regression --' data = pd.read_csv('TempLinkoping2016.txt', sep='\t') time = np.atleast_2d(data['time'].as_matrix()).T temp = np.atleast_2d(data['temp'].as_matrix()).T X = time.reshape((-1,1)) X = np.insert(X, 0, values=1, axis=1) y = temp[:, 0] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) model = GBDTRegressor() model.fit(X_train, y_train) y_pred = model.predict(X_test) cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print 'Mean Squared Error:',mse # Plot the results m1 = plt.scatter(366 * X_train[:, 1], y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test[:, 1], y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test[:, 1], y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show()
def on_train_begin(self, logs=None): batch_amount = self.train_generator.get_batch_amount_per_epoch() for batch_number in range(batch_amount): features, y_true = self.train_generator.__getitem__(batch_number) bicubic_loss_batch = mean_squared_error(features, y_true) self.bicubic_loss += bicubic_loss_batch / self.train_generator.get_batch_amount_per_epoch( ) print("Bicubic LOSS: {0}".format(self.bicubic_loss)) print("Bicubic PSNR: {0}".format(psnr_for_loss(self.bicubic_loss)))
def test_lr_integration_l2(self): features, values = generate_data_part_1() model = LinearRegressionWithL2Loss(nb_features=1, alpha=0.0) model.train(features, values) mse = mean_squared_error(values, model.predict(features)) # self.assertAlmostEqual(0.00175, mse, places=5) plt.scatter([x[0] for x in features], values, label='origin'); plt.plot([x[0] for x in features], model.predict(features), label='predicted'); plt.legend()
def main(): # Load temperature data data = pd.read_csv( 'https://raw.githubusercontent.com/eriklindernoren/ML-From-Scratch/master/mlfromscratch/data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = data["temp"].values X = time # fraction of the year [0, 1] y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) poly_degree = 13 model = LassoRegression(degree=15, reg_factor=0.05, learning_rate=0.001, n_iterations=4000) model.fit(X_train, y_train) # Training error plot n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s (given by reg. factor: %s)" % (mse, 0.05)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Lasso Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
def test_feature_select(self): features, values = generate_data_part_2() model = LinearRegression(nb_features=1) model.train(features, values) mse = mean_squared_error(values, model.predict(features)) print(f'[part 1.3.1]\tmse: {mse:.5f}') plt.scatter([x[0] for x in features], values, label='origin'); plt.plot([x[0] for x in features], model.predict(features), label='pre dicted'); plt.legend() plt.show()
def test_data_procession(self): features, values = generate_data_part_3() train_features, train_values = features[:100], values[:100] valid_features, valid_values = features[100:120], values[100:120] test_features, test_values = features[120:], values[120:] assert len(train_features) == len(train_values) == 100 assert len(valid_features) == len(valid_values) == 20 assert len(test_features) == len(test_values) == 30 best_mse, best_k = 1e10, -1 for k in [1, 3, 10]: train_features_extended = polynomial_features(train_features, k) model = LinearRegression(nb_features=k) model.train(train_features_extended, train_values) train_mse = mean_squared_error(train_values, model.predict(train_features_extended)) valid_features_extended = polynomial_features(valid_features, k) valid_mse = mean_squared_error(valid_values, model.predict(valid_features_extended)) print(f'[part 1.4.1]\tk: {k:d}\t' f'train mse: {train_mse:.5f}\tvalid mse: {valid_mse:.5f}') if valid_mse < best_mse: best_mse, best_k = valid_mse, k
def test_lr_integration(self): features, values = generate_data_part_1() model = LinearRegression(nb_features=1) model.train(features, values) mse = mean_squared_error(values, model.predict(features)) self.assertAlmostEqual(0.00175, mse, places=5) plt.scatter([x[0] for x in features], values, label='origin'); plt.plot([x[0] for x in features], model.predict(features), label='predicted'); plt.title("Holy shit") plt.legend() plt.show()
def lambdaError(lam, folds): average = 0 linreg = LeastSquareRegression(lam) for i in range(0, 5): leave_out_data, training_data = utils.partition_cross_validation_fold( folds, i) linreg.fit(training_data[0], training_data[1]) reg_pred = linreg.predict(leave_out_data[0]) reg_err = utils.mean_squared_error(reg_pred, leave_out_data[1]) average = average + reg_err average = average / 5 return average
def get_mean_squared_error(self, X, y): ''' Gets the mean squared error of the model evaluated on the dataset (X, y). - Inputs: - X: An ndarray of regressor variable values; i.e., features; i.e., inputs. - y: An ndarray of dependent variable values; i.e., targets; i.e., labels, i.e., outputs. - Returns: - MSE = \frac{1}{m} \sum_i (\mathbf{predictions} - \mathbf{targets})_i^2. ''' return mean_squared_error(np.dot(X, self.get_params()), y)
def test_add_polynomial_feature(self): features, values = generate_data_part_2() plt.scatter([x[0] for x in features], values, label='origin'); for k in [2, 4, 10]: # TODO: confirm polynomial feature and k = Xk features_extended = polynomial_features(features, k) model = LinearRegression(nb_features=k) model.train(features_extended, values) mse = mean_squared_error(values, model.predict(features_extended)) print(f'[part 1.3.2]\tk: {k:d}\tmse: {mse:.5f}') plt.plot([x[0] for x in features], model.predict(features_extended), label=f'k={k}'); plt.legend() plt.show()
def compute_errors(self, model, train_X, train_y, val_X, val_y): """ This method computes the training and validation errors for a single model. NOTE: For the following: - T: Number of training samples. - V: Number of validation samples Args: - model (object (PolynomialRegression)): The model to train and evaluate on. - train_X (ndarray (shape: (T, D))): A T-D matrix consisting of T-D dimensional training inputs. - train_y (ndarray (shape: (T, 1))): A T-column vector consisting of T scalar training outputs. - val_X (ndarray (shape: (V, D))): A V-D matrix consisting of V-D dimensional validation inputs. - val_y (ndarray (shape: (V, 1))): A V-column vector consisting of V scalar validation outputs. Output: - training_error (float): The training error of the trained model. - validation_error (float): The validation error for the trained model. """ # ==================================================== # TODO: Implement your solution within the box # Compute training and validation errors. # assume we are using MSE as our CV error formula (since we have MSE function) # train model model.fit(train_X, train_y) # predict values pred_vals_train = model.predict(train_X) pred_vals_val = model.predict(val_X) # calculate errors training_error = mean_squared_error(pred_vals_train, train_y) validation_error = mean_squared_error(pred_vals_val, val_y) # ==================================================== return training_error, validation_error
def main(): X, y = make_regression(n_samples=100, n_features=1, noise=20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) # 可自行设置模型参数,如正则化,梯度下降轮数学习率等 model = LinearRegression(n_iterations=3000, regularization=L2Regularization(alpha=0.5)) model.fit(X_train, y_train) # Training error plot 画loss的图 n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) y_pred = np.reshape(y_pred, y_test.shape) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s" % (mse)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results,画拟合情况的图 m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Linear Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
def main(): print("-- XGBoost --") # 载入气温数据 data = pd.read_csv('TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = np.atleast_2d(data["temp"].values).T X = time.reshape((-1, 1)) # Xi为0-1之间,一年中的比例 X = np.insert(X, 0, values=1, axis=1) # 偏置项,当作第一个特征 # 数据增强,扩充到16倍 #X = np.vstack((X,X,X,X)) #X = np.vstack((X,X,X,X)) #temp = np.vstack((temp,temp+0.01,temp+0.02,temp+0.03)) #temp = np.vstack((temp,temp+0.1,temp+0.2,temp+0.3)) y = temp[:, 0] # Temperature. 减少到一维 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) #print(y_train) model = XGBoost() model.fit(X_train, y_train) y_pred = model.predict(X_test) #y_pred_line = model.predict(X) # 使用训练好的模型对原数据进行预测 mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error: {:.2f}".format(mse)) # Color map cmap = plt.get_cmap('viridis') # Plot the results plt.figure(figsize=(12, 12)) m1 = plt.scatter(366 * X_train[:, 1], y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test[:, 1], y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test[:, 1], y_pred, color='black', s=10) plt.suptitle("XGBoost Regression Tree", fontsize=28) plt.title("MSE: {:.2f}".format(mse), fontsize=20) plt.xlabel('Day', fontsize=18) plt.ylabel('Temperature in Celcius', fontsize=16) plt.tick_params(labelsize=15) # 刻度字体大小 plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right', fontsize=15) plt.show()
def main(): print("-- XGBoost --") # Load temperature data data = pd.read_csv('temperature.txt', sep="\t") time = np.atleast_2d(data["time"].values).T # shape=(366,1) numpy.ndarray temp = np.atleast_2d(data["temp"].values).T X = time.reshape((-1, 1)) # Time. Fraction of the year [0, 1] X = np.insert(X, 0, values=1, axis=1) # Insert bias term # print(type(X), X.shape, X) y = temp[:, 0] # Temperature. Reduce to one-dim print('=' * 100) print(type(y), y.shape, y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=True) # print(y_train) model = XGBoost() model.fit(X_train, y_train) y_pred = model.predict(X_test) # print(y_test[0:5]) # Color map cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error:", mse) # Plot the results m1 = plt.scatter(366 * X_train[:, 1], y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test[:, 1], y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test[:, 1], y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show()
def main(): # Load temperature data data = pd.read_csv('./TempLinkoping2016.txt', sep="\t") #[[0.00273224] [0.00546448] [0.00819672]......] time = np.atleast_2d(data["time"].values).T #[[ 0.1] [ -4.5] [ -6.3]...] temp = np.atleast_2d(data["temp"].values).T #X:[[-1.72732488], [-1.71786008],....[-1.72732488]] X = standardize(time) # Time. Fraction of the year [0, 1] 标准化 #[[ 0.1] [ -4.5] [ -6.3]...]---------->[0.1,-4.5,-6.3........] y = temp[:, 0] # Temperature. Reduce to one-dim X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True) model = RegressionTree() model.fit(X_train, y_train) y_pred = model.predict(X_test) model.print_tree(indent=' ') # Color map cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error:", mse) # Plot the results # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show()
def test_regression(model): Regression = models[model] print ("-- Regression Tree --") # Load temperature data data = pd.read_csv('data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = np.atleast_2d(data["temp"].values).T X = standardize(time) # Time. Fraction of the year [0, 1] y = temp[:, 0] # Temperature. Reduce to one-dim print (X.shape, y.shape) X_train, y_train, X_test, y_test = split_train_test(X, y) model = Regression() model.fit(X_train, y_train) y_pred = model.predict(X_test) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print ("Mean Squared Error:", mse) # Plot the results # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show()
def simple_model(label, data_name, model_name): x, y = load_data("data/" + data_name) mod = import_module(model_name) model = mod.model # model trained_model = fit_n(5, model, losses.mean_squared_error, x, y) ym1 = trained_model.predict(x) trained_model.save("{model}_{data}_{label}_plain.h5".format( model=model_name, data=data_name, label=label)) np.save( "{model}_{data}_{label}_y_plain".format(model=model_name, data=data_name, label=label), ym1) # calculate final loss ym1 = ym1.squeeze() print("MSE: ", mean_squared_error(y, ym1)) print("TMSE: ", trimmed_mean_squared_error(y, ym1))
def main(): print("-- Gradient Boosting Regression --") # Load temperature data data = pd.read_csv('../TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = np.atleast_2d(data["temp"].values).T X = time.reshape((-1, 1)) # Time. Fraction of the year [0, 1] X = np.insert(X, 0, values=1, axis=1) # Insert bias term y = temp[:, 0] # Temperature. Reduce to one-dim X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) model = GBDTRegressor() model.fit(X_train, y_train) y_pred = model.predict(X_test) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error:", mse) # Plot the results m1 = plt.scatter(366 * X_train[:, 1], y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test[:, 1], y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test[:, 1], y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show()
def main(): print ("-- Regression Tree --") # Load temperature data data = pd.read_csv('../TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].as_matrix()).T temp = np.atleast_2d(data["temp"].as_matrix()).T X = standardize(time) # Time. Fraction of the year [0, 1] y = temp[:, 0] # Temperature. Reduce to one-dim X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) model = RegressionTree() model.fit(X_train, y_train) y_pred = model.predict(X_test) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print ("Mean Squared Error:", mse) # Plot the results # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show()
def sarsa_driver(player_constructor, num_episodes, lambdas, true_q, plot_root): if isinstance(num_episodes, list): checkpoints = num_episodes num_episodes = num_episodes[-1] else: checkpoints = [ int(10**(i + 3)) for i in range(int(math.log10(num_episodes / 1e3)) + 1) ] lambda_errors = np.empty((len(checkpoints), 0)).tolist() learning_curves = [] for lambada in tqdm(lambdas): player = player_constructor(Table, lambada) episode_errors = [] for i_episode in range(num_episodes): player() episode_errors.append(mean_squared_error(true_q, player.get_q())) episode_num = i_episode + 1 if episode_num in checkpoints: lambda_errors[checkpoints.index(episode_num)].append( np.mean(episode_errors)) if lambada in (0, 1): learning_curves.append(episode_errors) os.makedirs(plot_root, exist_ok=True) path = os.path.join(plot_root, f"learning_curves.png") plot_error_by_episode(learning_curves, path=path) path = os.path.join(plot_root, f"mse.png") plot_error_by_lambda(lambda_errors, lambdas, checkpoints, path=path)
def __init__(self, num_pads, num_frogs, num_iterations, wpath): self.pads_dict = self.initialize_pads_dict(num_pads, num_frogs) self.transition_matrix = self.initialize_transition_matrix(num_pads) self.write_path = wpath self.print_initial_stage( num_frogs, wpath) # Print initial stage of frogs/lilypads, pre-jumpsn # makes wpath folder if not existing. deletes latex_tables.txt if existing utils.clean_dir( wpath ) # since we append to the bottom of the file instead of overwriting net_flow = [] mse = [] current_distribution = self.get_pads_distribution_dict(self.pads_dict) for i in range(num_iterations): print('After {} jumps:'.format(i + 1)) prev_distribution = current_distribution self.increment_time() current_distribution = self.get_pads_distribution_dict( self.pads_dict) net_flow.append( self.get_net_flow(current_distribution, prev_distribution, i, num_pads, num_frogs)) mse.append( utils.mean_squared_error(current_distribution, prev_distribution, num_pads, num_frogs)) utils.pretty_print_dict(current_distribution) utils.save_histogram_image(current_distribution, i + 1, num_frogs, wpath) utils.save_distribution_table(current_distribution, i + 1, num_frogs, wpath, i + 1) print('\n') e_vals = sorted(utils.get_evals(self.transition_matrix), reverse=True) utils.print_and_write_results(net_flow, mse, e_vals, wpath)
def show_errors( time, Y_true, Y_predict, with_graphs=False ): mae = utils.mean_absolute_error( Y_true, Y_predict ) mape = utils.mean_absolute_percentage_error( Y_true, Y_predict, epsilon=1.0 ) mse = utils.mean_squared_error( Y_true, Y_predict ) print( 'MSE %f ' % mse.mean() ) print( 'MAE %f ' % mae.mean() ) print( 'MAPE %7.3f%% ' % mape.mean() ) if with_graphs: pyplot.plot( time, Y_predict[:,0], color='blue', lw=7, alpha=0.2 ) pyplot.plot( time, Y_predict[:,1], color='green', lw=7, alpha=0.2 ) pyplot.plot( time, Y_predict[:,2], color='red', lw=7, alpha=0.2 ) pyplot.plot( time, Y_true[:,0], color='blue', lw=2 ) pyplot.plot( time, Y_true[:,1], color='green', lw=2 ) pyplot.plot( time, Y_true[:,2], color='red', lw=2 ) pyplot.grid() pyplot.show() pyplot.plot( time, mape, color='red', lw=1 ) pyplot.grid() pyplot.show()
def LearnModelFromDataUsingSGD(data, mfmodel, parameters, extra_data_set=None): print("Training model using SGD") try: os.remove("output/SGD_train_error.txt") os.remove("output/SGD_test_error.txt") except OSError: pass for step in range(parameters.steps): predicted = mfmodel.calc_matrix() print("Step: %s, error: %f" % (step, mean_squared_error(mfmodel, predicted, data))) write_error_to_file(mfmodel, predicted, data, "SGD_train_error.txt") if extra_data_set is not None: write_error_to_file(mfmodel, predicted, extra_data_set, "SGD_test_error.txt") xs, ys = data.nonzero() for x, y in zip(xs, ys): sample = (x, y, data[x, y]) gradient_decent_update(sample, mfmodel, parameters)
def generate_plot_from_csv(name, dataset, ds_type, cropsize=224, dirname="predictions/debug-448/"): """ Generate plots from CSV Parameters --------- name: Model name (vgg16baseline or vgg16decoder) dataset: Dataset name (SHHA or SHHB) ds_type: Set type (train or test) cropsize: Input image crop size """ fname = f"{dirname}/{name}_{dataset}_{ds_type}_predictions_{cropsize}.csv" df = pd.read_csv(fname) df['diff'] = df.true_labels - df.predicted_labels scatter = alt.Chart(df).mark_circle().encode( alt.X("true_labels"), alt.Y("predicted_labels"), alt.Tooltip(["true_labels", "predicted_labels"])) line = alt.Chart(df).mark_line().encode(alt.X('true_labels', title="True"), alt.Y('true_labels', title="Predicted"), color=alt.value('rgb(0,0,0)')) mse = mean_squared_error(df.true_labels.values, df.predicted_labels.values) mae = mean_absolute_error(df.true_labels.values, df.predicted_labels.values) chart = (scatter + line).properties( title= f"INPUT {cropsize}, {dataset}:{ds_type.upper()}, MSE: {mse} | MAE: {mae}" ) return chart
def converged_gradient(self, num_iter, X, V, W, iter_check=50000, threshold=0.005, gradient_v=None, gradient_w=None, error=True, gradient_check=False, epsilon=10.**-5, x_j=None, y_j=None): training_error = None training_loss = None if num_iter > 1000000: return (True, training_error, training_loss) # There are two ways to determine if the gradient has converged. # (1) Use the training error (error=True) # (2) Use the magnitude of the gradient (error=False) # In both cases, training_error and training_loss are attached to the response # for the purposes of plotting. if error: if num_iter % iter_check != 0: return (False, training_error, training_loss) else: if gradient_check: # Randomly check five weights. for _ in range(5): # import pdb; pdb.set_trace() random_wi = np.random.randint(W.shape[0]) random_wj = np.random.randint(W.shape[1]) random_vi = np.random.randint(V.shape[0]) random_vj = np.random.randint(V.shape[1]) W_plus_epsilon = W.copy() W_plus_epsilon[random_wi][random_wj] = W_plus_epsilon[random_wi][random_wj] + epsilon Z_W_plus = self.perform_forward_pass(x_j, V, W_plus_epsilon)[1] W_minus_epsilon = W.copy() W_minus_epsilon[random_wi][random_wj] = W_minus_epsilon[random_wi][random_wj] - epsilon Z_W_minus = self.perform_forward_pass(x_j, V, W_minus_epsilon)[1] V_plus_epsilon = V.copy() V_plus_epsilon[random_vi][random_vj] = V_plus_epsilon[random_vi][random_vj] + epsilon Z_V_plus = self.perform_forward_pass(x_j, V_plus_epsilon, W)[1] V_minus_epsilon = V.copy() V_minus_epsilon[random_vi][random_vj] = V_minus_epsilon[random_vi][random_vj] - epsilon Z_V_minus = self.perform_forward_pass(x_j, V_minus_epsilon, W)[1] y = np.zeros(10) y[y_j] = 1 if self.loss_function == "mean-squared-error": W_plus_cost = mean_squared_error(Z_W_plus, y) W_minus_cost = mean_squared_error(Z_W_minus, y) V_plus_cost = mean_squared_error(Z_V_plus, y) V_minus_cost = mean_squared_error(Z_V_minus, y) else: W_plus_cost = cross_entropy_loss(Z_W_plus.T, y) W_minus_cost = cross_entropy_loss(Z_W_minus.T, y) V_plus_cost = cross_entropy_loss(Z_V_plus.T, y) V_minus_cost = cross_entropy_loss(Z_V_minus.T, y) gradient_approx_wij = (W_plus_cost - W_minus_cost) / (2. * epsilon) gradient_approx_vij = (V_plus_cost - V_minus_cost) / (2. * epsilon) if gradient_approx_wij > gradient_w[random_wi][random_wj] + threshold or \ gradient_approx_wij < gradient_w[random_wi][random_wj] - threshold or \ gradient_approx_vij > gradient_v[random_vi][random_vj] + threshold or \ gradient_approx_vij < gradient_v[random_vi][random_vj] - threshold: raise AssertionError("The gradient was incorrectly computed.") classifications_training, training_Z = self.predict(X, V, W, return_Z=True) training_error, training_indices_error = benchmark(classifications_training, self.labels) if self.validation_data is not None and self.validation_labels is not None: classifications_validation = self.predict(self.validation_data, V, W) validation_error, validation_indices_error = benchmark(classifications_validation, self.validation_labels) if self.loss_function == "mean-squared-error": training_loss = mean_squared_error(training_Z.T, self.Y) else: training_loss = cross_entropy_loss(training_Z.T, self.Y) print("Completed %d iterations.\nThe training error is %.2f.\n The training loss is %.2f." % (num_iter, training_error, training_loss)) if self.validation_data is not None and self.validation_labels is not None: print("The error on the validation set is %.2f." % validation_error) if training_error < threshold: return (True, training_error, training_loss) return (False, training_error, training_loss) else: if num_iter % iter_check == 0: classifications_training, training_Z = self.predict(X, V, W, return_Z=True) training_error, indices_error = benchmark(classifications_training, self.labels) if self.validation_data is not None and self.validation_labels is not None: classifications_validation = self.predict(self.validation_data, V, W) validation_error, validation_indices_error = benchmark(classifications_validation, self.validation_labels) if self.loss_function == "mean-squared-error": training_loss = mean_squared_error(training_Z.T, self.Y) else: training_loss = cross_entropy_loss(training_Z.T, self.Y) print("Completed %d iterations. The training error is %.2f. Training loss is %.2f" % (num_iter, training_error)) if self.validation_data is not None and self.validation_labels is not None: print("The error on the validation set is %.2f." % validation_error) if np.linalg.norm(gradient_v) < threshold and np.linalg.norm(gradient_w) < threshold: return (True, training_error, training_loss) else: return (False, training_error, training_loss)
def evaluate_dcign(learning_rate=0.1, weight_decay=4e-06, batch_size=50, n_epochs=5, dataset='../data/minecraft-2d.pkl'): """Runs and evaluates the deep convolutional inverse graphics network. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type weight_decay: float :param weight_decay: weight decay used (factor for the stochastic gradient) :type batch_size: int :param batch_size: batch size used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing """ # Declare custom variables, hyperparameters, etc. rng = numpy.random.RandomState(23455) # a random seed for replicability train_percentage = 0.7 # percentage of data to use for training # Custom variables n_filters = [96, 64, 32, 34, 66, 98] # number of filters per conv /deconv layer # filter_size = [5, 5, 5, 6, 7, 6, 7] # size of filter in conv /deconv layer filter_size = [(5, 5), (5, 5), (5, 5), (7, 7), (6, 6), (7, 7)] # The same pooling and unpooling size of (2, 2) is used across the network pool_size = [(2, 2)] # Note: in case of color images, use tripe tuples for filter and pooling sizes n_latent_vars = 23 # 3 # Dimensionality of the input data; since it's one long vector per image # and the number of channels unknown, this has to be specified n_channels = 1 n_rows = 128 n_cols = 128 # Load pickled data, shuffle and reshape them, split them into training, # validation and test data sets data = numpy.load(dataset).astype(numpy.float64) # convert ints to floats, too # Ensure that all batches are of the same size by throwing away out-of-batch samples num_batches = data.shape[0] // batch_size # integer division remainder = data.shape[0] % batch_size # get the remainder last_idx = num_batches * batch_size data = data[:last_idx] # throw away out-of-batch samples print("data shape: {0}".format(data.shape)) # Generate a data index and shuffle it; later use shuffled index to # determine which samples go in which data set data_index = numpy.arange(data.shape[0]) numpy.random.shuffle(data_index) ###################### # PREPROCESSING # ###################### # Normalize data to [0,1] data /= 255 # use 255 rather than data.max() because we know we're dealing w/ color values # Scale the data in accordance with the activation function(s) used # cf. https://cs231n.github.io/neural-networks-2/#datapre # Here: tanh which has a range of [-1; +1] data -= data.mean() # subtract the mean print("min: {0:.3f}, max: {1:.3f}".format(data.min(), data.max())) # Split data into train, validation and test sets split_idx = int(numpy.floor(train_percentage * num_batches)) * batch_size train_set = data[data_index[:split_idx]] valid_set = data[data_index[split_idx:split_idx + 4 * batch_size]] test_set = data[data_index[split_idx + 4 * batch_size:]] # Compute the number of minibatches for training, validation and testing n_train_batches = int(numpy.ceil(train_set.shape[0] / batch_size)) n_valid_batches = int(numpy.ceil(valid_set.shape[0] / batch_size)) n_test_batches = int(numpy.ceil(test_set.shape[0] / batch_size)) # Create shared theano variables for the training sets t_train_set = theano.shared(value=train_set.astype(T.config.floatX), name="t_train_set", borrow=True) t_valid_set = theano.shared(value=valid_set.astype(T.config.floatX), name="t_valid_set", borrow=True) t_test_set = theano.shared(value=test_set.astype(T.config.floatX), name="t_test_set", borrow=True) # Allocate symbolic variables for input x, prediction y, and batch index index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data as rasterized images y = T.ivector('y') # the labels presented as 1D vector of [int] labels ###################### # BUILDING THE MODEL # ###################### print('... building the model ...') # CONSTRUCT CONVOLUTION + POOLING LAYERS # Reshape matrix, which is a 2D tensor, to a 4D tensor; cf. ConvPoolLayer()'s requirements conv_input = x.reshape((batch_size, n_channels, n_rows, n_cols)) print("({0}, {1})".format(n_rows, n_cols)) # Filtering reduces the image size to n_rows - filter_size + 1; # pooling reduces it further by division by pool_size convlayer1 = ConvPoolLayer( rng, input=conv_input, image_shape=(batch_size, n_channels, n_rows, n_cols), filter_shape=(n_filters[0], n_channels, filter_size[0][0], filter_size[0][1]), poolsize=pool_size[0] ) # Compute the new image size n_im_rows = int(numpy.ceil((n_rows - filter_size[0][0] + 1) / pool_size[0][0])) n_im_cols = int(numpy.ceil((n_cols - filter_size[0][1] + 1) / pool_size[0][1])) print("({0}, {1})".format(n_im_rows, n_im_cols)) convlayer2 = ConvPoolLayer( rng, input=convlayer1.output, image_shape=(batch_size, n_filters[0], n_im_rows, n_im_cols), filter_shape=(n_filters[1], n_filters[0], filter_size[1][0], filter_size[1][1]), poolsize=pool_size[0] ) n_im_rows = int(numpy.ceil((n_im_rows - filter_size[1][0] + 1) / pool_size[0][0])) n_im_cols = int(numpy.ceil((n_im_cols - filter_size[1][1] + 1) / pool_size[0][1])) print("({0}, {1})".format(n_im_rows, n_im_cols)) convlayer3 = ConvPoolLayer( rng, input=convlayer2.output, image_shape=(batch_size, n_filters[1], n_im_rows, n_im_cols), filter_shape=(n_filters[2], n_filters[1], filter_size[2][0], filter_size[2][1]), poolsize=pool_size[0] ) n_im_rows = int(numpy.ceil((n_im_rows - filter_size[2][0] + 1) / pool_size[0][0])) n_im_cols = int(numpy.ceil((n_im_cols - filter_size[2][1] + 1) / pool_size[0][1])) print("({0}, {1})".format(n_im_rows, n_im_cols)) # CONSTRUCT FULLY-CONNECTED HIDDEN LAYERS # HiddenLayer operates on 2D matrices of shape (batch_size, num_pixels); # therefore reshape the output of the convolutional layers accordingly hidden_input = convlayer3.output.flatten(2) hiddenlayer1 = HiddenLayer( rng, input=hidden_input, n_in=n_filters[2] * n_im_rows * n_im_cols, n_out=n_latent_vars, activation=T.tanh ) n_im_chans = 1 n_im_rows = 21 n_im_cols = 21 hiddenlayer2 = HiddenLayer( rng, input=hiddenlayer1.output, n_in=n_latent_vars, # n_out=n_filters[2] * n_im_rows * n_im_cols, n_out = n_im_chans * n_im_rows * n_im_cols, activation=T.tanh ) # TODO: Imo tied weights would make a lot of sense here; implement them. # CONSTRUCT DECONVOLUTION + UNPOOLING LAYERS # deconv_input = hiddenlayer2.output.reshape((batch_size, n_filters[2], n_im_rows, n_im_cols)) deconv_input = hiddenlayer2.output.reshape((batch_size, n_im_chans, n_im_rows, n_im_cols)) # Note: here input and image shape differ in that image_shape has the shape of the # unpooled input, ie. input.shape[3] * pool_size[x][0] and input.shape[4] * pool_size[x][1] deconvlayer1 = DeconvUnpoolLayer( rng, input=deconv_input, # filter_shape=(n_filters[2], n_filters[3], filter_size[3][0], filter_size[3][1]), # image_shape=(batch_size, n_filters[3], n_im_rows * pool_size[0][0], n_im_cols * pool_size[0][1]), filter_shape=(n_filters[3], n_filters[3], filter_size[3][0], filter_size[3][1]), # (34, 1, 7, 7) image_shape=(batch_size, n_im_chans, n_im_rows * pool_size[0][0], n_im_cols * pool_size[0][1]), # (50, 1, 26, 26) unpoolsize=pool_size[0] ) # W = (32, 1, 7, 7) # Calculate the new image shape n_im_rows = int(numpy.ceil(n_im_rows * pool_size[0][0] - filter_size[3][0] + 1)) n_im_cols = int(numpy.ceil(n_im_cols * pool_size[0][1] - filter_size[3][1] + 1)) print("({0}, {1})".format(n_im_rows, n_im_cols)) deconvlayer2 = DeconvUnpoolLayer( rng, input=deconvlayer1.output, # filter_shape=(n_filters[4], n_im_chans, filter_size[4][0], filter_size[4][1]), filter_shape=(n_filters[4], n_filters[4], filter_size[4][0], filter_size[4][1]), # image_shape=(batch_size, n_im_chans, n_im_rows * pool_size[0][0], n_im_cols * pool_size[0][1]), image_shape=(batch_size, n_filters[3], n_im_rows * pool_size[0][0], n_im_cols * pool_size[0][1]), unpoolsize=pool_size[0] ) # W = (64, 1, 6, 6) n_im_rows = int(numpy.ceil(n_im_rows * pool_size[0][0] - filter_size[4][0] + 1)) n_im_cols = int(numpy.ceil(n_im_cols * pool_size[0][1] - filter_size[4][1] + 1)) print("({0}, {1})".format(n_im_rows, n_im_cols)) deconvlayer3 = DeconvUnpoolLayer( rng, input=deconvlayer2.output, # filter_shape=(n_filters[5], n_im_chans, filter_size[5][0], filter_size[5][1]), filter_shape=(n_filters[5], n_filters[5], filter_size[5][0], filter_size[5][1]), # image_shape=(batch_size, n_im_chans, n_im_rows * pool_size[0][0], n_im_cols * pool_size[0][1]), image_shape=(batch_size, n_filters[4], n_im_rows * pool_size[0][0], n_im_cols * pool_size[0][1]), unpoolsize=pool_size[0] ) # W = (96, 1, 7, 7) n_im_rows = int(numpy.ceil(n_im_rows * pool_size[0][0] - filter_size[5][0] + 1)) n_im_cols = int(numpy.ceil(n_im_cols * pool_size[0][1] - filter_size[5][1] + 1)) print("({0}, {1})".format(n_im_rows, n_im_cols)) # Define the objective function aka cost function to minimize y_pred = deconvlayer3.output.flatten(2) reconstruction_error = mean_squared_error(x, y_pred) weights = [ deconvlayer3.W, deconvlayer2.W, deconvlayer1.W, hiddenlayer2.W, hiddenlayer1.W, convlayer3.W, convlayer2.W, convlayer1.W ] # regularizer = weight_decay * sum((W**2).sum() for W in weights) regularizer = 0.005 * sum((W**2).sum() for W in weights) cost = reconstruction_error + regularizer # Create a function to compute the mistakes that are made by the model test_model = theano.function( [index], errors(y_pred, x), givens = {x: t_test_set[index * batch_size: (index + 1) * batch_size]} ) validate_model = theano.function( [index], errors(y_pred, x), givens = {x: t_valid_set[index * batch_size: (index + 1) * batch_size]} ) # Create a list of all model parameters to be fit by gradient descent params = deconvlayer3.params + deconvlayer2.params + deconvlayer1.params \ + hiddenlayer2.params + hiddenlayer1.params \ + convlayer3.params + convlayer2.params + convlayer1.params # Create a list of gradients for all model parameters grads = T.grad(cost, params) # Train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens = {x: t_train_set[index * batch_size: (index + 1) * batch_size]} ) ############### # TRAIN MODEL # ############### print('... training ...') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False # TODO: use fuel specific tools for iterating # for epoch in train_stream.iterate_epochs(): # for batch in epoch: # pass while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print(('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.))) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print(((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.))) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print(('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.))) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def __construct_graph(self): # Parameter for the first LSTM and find one root root_w = tf.get_variable("root_w", [self.hidden_dim, self.output_dim]) root_b = tf.get_variable("root_b", [self.output_dim]) # Parameter for the second LSTM that factorize fact_w = tf.get_variable("fact_w", [self.hidden_dim, self.output_dim]) fact_b = tf.get_variable("fact_b", [self.output_dim]) for l in self.seq_lens: Print("Creating RNN model for sequence length %d" % l) if l > self.seq_lens[0]: tf.get_variable_scope().reuse_variables() lstm = rnn_cell.BasicLSTMCell(self.hidden_dim) cell = rnn_cell.MultiRNNCell([lstm] * self.num_layers) # 0.0 LSTM1 from Coef0 to Root0 output_root_0, _ = rnn.rnn(cell, self.input_coef_0[:l], dtype=tf.float32, scope="root") yhat_root_0 = tf.matmul(output_root_0[-1], root_w) + root_b # 0.1. LSTM2 from Root0 to Coef1 input_root_0 = [] for i in xrange(l): input_root_0.append( tf.concat(1, [self.input_coef_0[i], yhat_root_0])) output_coef_0, _ = rnn.rnn(cell, input_root_0, dtype=tf.float32, scope="fact") # 1.0 LSTM1 from Coef1 to Root1 input_coef_1 = [] for i in xrange(l - 1): input_coef_1.append(tf.matmul(output_coef_0[i], fact_w) + fact_b) tf.get_variable_scope().reuse_variables() output_root_1, _ = rnn.rnn(cell, input_coef_1, dtype=np.float32, scope="root") yhat_root_1 = tf.matmul(output_root_1[-1], root_w) + root_b # 1.1. LSTM2 from Root1 to Coef2 input_root_1 = [] for i in xrange(l - 1): input_root_1.append( tf.concat(1, [input_coef_1[i], yhat_root_0])) output_coef_1, _ = rnn.rnn(cell, input_root_1, dtype=tf.float32, scope="fact") # 2.0. LSTM1 from Coef2 to Root 2 input_coef_2 = [] for i in xrange(l - 2): input_coef_2.append(tf.matmul(output_coef_1[i], fact_w) + fact_b) output_root_2, _ = rnn.rnn(cell, input_coef_2, dtype=np.float32, scope="root") yhat_root_2 = tf.matmul(output_root_1[-1], root_w) + root_b self.yhat.append(tf.concat(1, [yhat_root_0, yhat_root_1, yhat_root_2])) loss = .55 * mean_squared_error(self.target[0], yhat_root_0) + \ .27 * mean_squared_error(self.target[1], yhat_root_1) + \ .18 * mean_squared_error(self.target[2], yhat_root_2) r2 = np.min([r2_score(self.target[0], yhat_root_0), r2_score(self.target[1], yhat_root_1), r2_score(self.target[2], yhat_root_2)]) self.losses.append(loss) self.r2.append(r2) self.params = tf.trainable_variables() grads = tf.gradients(loss, self.params) grads, norm = tf.clip_by_global_norm(grads, self.max_grad_norm) self.grad_norms.append(norm) self.updates.append(tf.train.AdamOptimizer(self.learning_rate, epsilon=1e-4).apply_gradients( zip(grads, self.params), global_step=self.global_step)) self.saver = tf.train.Saver(tf.all_variables())
def __init__(self, learning_rate, input_dim, hidden_dim, output_dim, num_layers, max_grad_norm, seq_lens, is_seq_output=False): self.learning_rate = learning_rate self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = output_dim self.num_layers = num_layers self.max_grad_norm = max_grad_norm self.seq_lens = seq_lens self.is_seq_output = is_seq_output np.random.seed(FLAGS.seed) tf.set_random_seed(FLAGS.seed) self.inputs = [] if self.is_seq_output: self.target = [] else: self.target = tf.placeholder(tf.float32, [None, self.output_dim], name="target") for l in xrange(self.seq_lens[-1]): self.inputs.append( tf.placeholder( tf.float32, [None, self.input_dim], name="inp{0}".format(l))) if self.is_seq_output: self.target.append(tf.placeholder(tf.float32, [None, self.output_dim], name="tar{0}".format(l))) self.updates = [] self.losses = [] self.grad_norms = [] self.r2 = [] softmax_w = tf.get_variable("softmax_w", [self.hidden_dim, self.output_dim]) softmax_b = tf.get_variable("softmax_b", [self.output_dim]) for l in self.seq_lens: logger.info("Creating RNN model for sequence length %d", l) if l > self.seq_lens[0]: tf.get_variable_scope().reuse_variables() lstm_cell = rnn_cell.BasicLSTMCell(self.hidden_dim) # lstm_cell = rnn_cell.BasicRNNCell(self.hidden_dim) cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers) output, state = rnn.rnn(cell, self.inputs[:l], dtype=tf.float32) if self.is_seq_output: loss_list = [] r2_list = [] for out, tar in zip(output, self.target[:l]): yhat = tf.matmul(out, softmax_w) + softmax_b loss_list.append(mean_squared_error(tar, yhat)) r2_list.append(r2_score(tar, yhat)) loss = tf.python.math_ops.add_n(loss_list) / l r2 = tf.python.math_ops.add_n(r2_list) / l else: yhat = tf.matmul(output[-1], softmax_w) + softmax_b loss = mean_squared_error(self.target, yhat) if self.output_dim == 1: r2 = r2_score(self.target, yhat) else: r2 = r2_scores(self.target, yhat) self.losses.append(loss) self.r2.append(r2) params = tf.trainable_variables() grads = tf.gradients(loss, params) grads, norm = tf.clip_by_global_norm(grads, self.max_grad_norm) self.grad_norms.append(norm) self.updates.append( tf.train.AdamOptimizer( self.learning_rate, epsilon=1e-4).apply_gradients(zip(grads, params)))