def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ learning_rate = FLAGS.learning_rate feature_type = FLAGS.feature_type model_type = FLAGS.model_type # Load dataset. data = read_dataset('data/test_lab.txt', 'data/image_data') # Data Processing. data = preprocess_data(data, feature_type) # Initialize model. ndim = data['image'].shape[1] if model_type == 'linear': model = LinearRegressionTf(ndim, 'ones') elif model_type == 'logistic': model = LogisticRegression(ndim, 'zeros') elif model_type == 'svm': model = SupportVectorMachine(ndim, 'zeros') # Train Model. model = train_model(data, model, learning_rate, num_steps=20000) # Eval Model. data_test = read_dataset('data/test_lab.txt', 'data/image_data') data_test = preprocess_data(data_test, feature_type) acc, loss = eval_model(data_test, model) # Test Model. data_test = read_dataset('data/test_lab.txt', 'data/image_data') data_test = preprocess_data(data_test, feature_type)
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ learning_rate = FLAGS.learning_rate w_decay_factor = FLAGS.w_decay_factor num_steps = FLAGS.num_steps opt_method = FLAGS.opt_method feature_columns = FLAGS.feature_columns.split(',') # Load dataset. dataset = read_dataset("data/train.csv") # Data processing. train_set = preprocess_data(dataset, feature_columns=feature_columns, squared_features=True) # Initialize model. ndim = train_set[0].shape[1] model = LinearRegression(ndim, 'zeros') # Train model. if opt_method == 'iter': # Perform gradient descent. train_model(train_set, model, learning_rate, num_steps=num_steps, shuffle=True) print('Performed gradient descent.') else: # Compute closed form solution. train_model_analytic(train_set, model) print('Closed form solution.') train_loss = eval_model(train_set, model) print("Train loss: %s" % train_loss) # Plot the x vs. y if one dimension. if train_set[0].shape[1] == 1: plot_x_vs_y(train_set, model) # Eval model. raw_eval = read_dataset("data/val.csv") eval_set = preprocess_data(raw_eval, feature_columns=feature_columns, squared_features=True) eval_loss = eval_model(eval_set, model) print("Eval loss: %s" % eval_loss) # Test model. raw_test = read_dataset("data/test.csv") test_set = preprocess_data(raw_test, feature_columns=feature_columns, squared_features=True) test_loss = eval_model(test_set, model) print("Test loss: %s" % test_loss)
def test_preprocess_dataset_one_hot_encoding(self): feature_columns = ['BldgType'] data = data_tools.preprocess_data(self.dataset, feature_columns=feature_columns) self.assertEqual(data[0].shape, (self.N, 5)) feature_columns = ['BldgType', 'Id'] data = data_tools.preprocess_data(self.dataset, feature_columns=feature_columns) self.assertEqual(data[0].shape, (self.N, 6))
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ learning_rate = FLAGS.learning_rate w_decay_factor = FLAGS.w_decay_factor num_steps = FLAGS.num_steps opt_method = FLAGS.opt_method feature_type = FLAGS.feature_type # Load dataset and data processing. train_set = read_dataset("data/train.txt", "data/image_data/") train_set = preprocess_data(train_set, feature_type) # Initialize model. ndim = train_set['image'][0].shape[0] model = SupportVectorMachine(ndim, 'zeros', w_decay_factor=FLAGS.w_decay_factor) # Train model. if opt_method == 'iter': # Perform gradient descent. train_model(train_set, model, learning_rate, num_steps=num_steps, batch_size=100) print('Performed gradient descent.') else: # Compute closed form solution. train_model_qp(train_set, model) print('Finished QP Solver') train_loss, train_acc = eval_model(train_set, model) print("Train loss: %s" % train_loss) print("Train acc: %s" % train_acc) # Eval model. eval_set = read_dataset("data/val.txt", "data/image_data/") eval_set = preprocess_data(eval_set, feature_type) eval_loss, eval_acc = eval_model(eval_set, model) print("Eval loss: %s" % eval_loss) print("Eval acc: %s" % eval_acc) # Test model. test_set = read_dataset("data/test.txt", "data/image_data/") test_set = preprocess_data(test_set, feature_type) test_loss, test_acc = eval_model(test_set, model) print("Test loss: %s" % test_loss) print("Test ac: %s" % test_acc)
def main(_): """High level pipeline. This script performs the trainsing, evaling and testing state of the model. """ # learning_rate = FLAGS.learning_rate # feature_type = FLAGS.feature_type # model_type = FLAGS.model_type # num_steps = FLAGS.num_steps feature_type = 'default' model_type = 'svm' # Load dataset. data = read_dataset('data/train_lab.txt', 'data/image_data') # Data Processing. data = preprocess_data(data, 'default') print("Finish preprocessing...") # Initialize model. ndim = data['image'].shape[1] if model_type == 'linear': model = LinearRegression(ndim, 'uniform') elif model_type == 'logistic': model = LogisticRegression(ndim, 'uniform') elif model_type == 'svm': model = SupportVectorMachine(ndim, 'uniform') # Train Model. print("Start to train the model...") model = train_model(data, model) # Eval Model. print("Start to evaluate the model...") data_val = read_dataset('data/val_lab.txt', 'data/image_data') data_val = preprocess_data(data_val, feature_type) loss, acc = eval_model(data_val, model) print(loss, acc) # Test Model. print("Start doing the test") data_test = read_dataset('data/test_lab.txt', 'data/image_data') print("Start preprocess testing data") data_test = preprocess_data(data_test, feature_type) print("Making predictions") data_test['label'] = model.predict(model.forward(data_test['image'])) print("Output the results to csv file") write_dataset('data/test_lab.txt', data_test) # Generate Kaggle output. print("Finished!")
def test_preprocess_dataset_squared(self): feature_columns = ['OverallQual'] data = data_tools.preprocess_data(self.dataset, feature_columns=feature_columns, squared_features=True) keys = sorted(list(self.dataset.keys())) val0 = float(self.dataset[keys[0]][2])**2 self.assertEqual(49, val0)
def setUp(self): cols = ['GarageArea', 'OverallQual', 'BldgType'] self.dataset = io_tools.read_dataset("data/train.csv") self.processed_data = data_tools.preprocess_data(self.dataset, feature_columns=cols) self.N = self.processed_data[0].shape[0] self.ndims = self.processed_data[0].shape[1] self.model = linear_regression.LinearRegression(self.ndims, "zeros")
def test_preprocess_dataset_shape(self): feature_columns = ['Id', 'GarageArea'] data = data_tools.preprocess_data(self.dataset, feature_columns=feature_columns) self.assertEqual(len(data), 2) # check x self.assertEqual(data[0].shape, (self.N, 2)) # check y self.assertEqual(data[1].shape, (self.N, 1))
def test_default_shape(self): original_shape = self.dataset['image'].shape data = data_tools.preprocess_data(self.dataset, process_method='default')['image'] self.assertEqual(len(data.shape), 2) self.assertEqual(data.shape[0], original_shape[0]) self.assertEqual(data.shape[1], original_shape[1] * original_shape[2] * original_shape[ 3])
def main(_): """High level pipeline. This script performs the training, evaling and testing state of the model. """ # learning_rate = FLAGS.learning_rate # feature_type = FLAGS.feature_type # model_type = FLAGS.model_type feature_type = 'default' model_type = 'linear' # Load dataset. data = read_dataset('data/train_lab.txt', 'data/image_data') # Data Processing. data = preprocess_data(data, 'default') print("Finish preprocessing...") # Initialize model. ndim = data['image'].shape[1] if model_type == 'linear': model = LinearRegressionTf(ndim, 'gaussian') elif model_type == 'logistic': model = LogisticRegressionTf(ndim, 'uniform') elif model_type == 'svm': model = SupportVectorMachineTf(ndim, 'uniform') # Train Model. print("Start to train the model...") model = train_model(data, model) # Eval Model. print("Start to evaluate the model...") data_val = read_dataset('data/val_lab.txt', 'data/image_data') data_val = preprocess_data(data_val, feature_type) loss, acc = eval_model(data_val, model) print(loss, acc)
def setUp(self): self.dataset = io_tools.read_dataset("data/train.txt", "data/image_data/") self.dataset = data_tools.preprocess_data(self.dataset, 'raw') self.model = support_vector_machine.SupportVectorMachine( 8 * 8 * 3, 'zeros')
def update_step(x_batch, y_batch, model, learning_rate): """Performs on single update step, (i.e. forward then backward). Args: x_batch(numpy.ndarray): input data of dimension (N, ndims). y_batch(numpy.ndarray): label data of dimension (N, 1). model(LinearModel): Initialized linear model. """ f = LinearRegression.forward(model, x_batch) grad = learning_rate * LinearRegression.backward(model, f, y_batch) model.w = model.w - learning_rate * grad dataset = io_tools.read_dataset('train.csv') # print(dataset) data = data_tools.preprocess_data(dataset) ndim = data[0].shape[1] print('data[0]', data[0]) print('ndim', ndim) # print(data) train_model(data, LinearRegression(ndim)) def train_model_analytic(processed_dataset, model): """Computes and sets the optimal model weights (model.w). Args: processed_dataset(list): List of [x,y] processed from utils.data_tools.preprocess_data. model(LinearRegression): LinearRegression model. """
def test_preprocess_dataset_y_number(self): feature_columns = ['Id', 'GarageArea', 'SalePrice'] data = data_tools.preprocess_data(self.dataset, feature_columns=feature_columns) self.assertEqual(type(data[1][0][0]), np.float32)
def test_preprocess_dataset_x_not_price(self): feature_columns = ['Id', 'GarageArea', 'SalePrice'] data = data_tools.preprocess_data(self.dataset, feature_columns=feature_columns) self.assertEqual(data[0].shape, (self.N, 2))