Exemplos de preprocess_data em Python, exemplos de utils.data_tools.preprocess_data em Python

Exemplo n.º 1

0

Exibir arquivo

def main(_):
    """High level pipeline.

    This script performs the trainsing, evaling and testing state of the model.
    """
    learning_rate = FLAGS.learning_rate
    feature_type = FLAGS.feature_type
    model_type = FLAGS.model_type

    # Load dataset.
    data = read_dataset('data/test_lab.txt', 'data/image_data')

    # Data Processing.
    data = preprocess_data(data, feature_type)

    # Initialize model.
    ndim = data['image'].shape[1]
    if model_type == 'linear':
        model = LinearRegressionTf(ndim, 'ones')
    elif model_type == 'logistic':
        model = LogisticRegression(ndim, 'zeros')
    elif model_type == 'svm':
        model = SupportVectorMachine(ndim, 'zeros')

    # Train Model.
    model = train_model(data, model, learning_rate, num_steps=20000)

    # Eval Model.
    data_test = read_dataset('data/test_lab.txt', 'data/image_data')
    data_test = preprocess_data(data_test, feature_type)
    acc, loss = eval_model(data_test, model)

    # Test Model.
    data_test = read_dataset('data/test_lab.txt', 'data/image_data')
    data_test = preprocess_data(data_test, feature_type)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: main.py Projeto: nbermudezs/UIUC_CS446

def main(_):
    """High level pipeline.
    This script performs the trainsing, evaling and testing state of the model.
    """
    learning_rate = FLAGS.learning_rate
    w_decay_factor = FLAGS.w_decay_factor
    num_steps = FLAGS.num_steps
    opt_method = FLAGS.opt_method
    feature_columns = FLAGS.feature_columns.split(',')

    # Load dataset.
    dataset = read_dataset("data/train.csv")

    # Data processing.
    train_set = preprocess_data(dataset,
                                feature_columns=feature_columns,
                                squared_features=True)

    # Initialize model.
    ndim = train_set[0].shape[1]
    model = LinearRegression(ndim, 'zeros')

    # Train model.
    if opt_method == 'iter':
        # Perform gradient descent.
        train_model(train_set,
                    model,
                    learning_rate,
                    num_steps=num_steps,
                    shuffle=True)
        print('Performed gradient descent.')
    else:
        # Compute closed form solution.
        train_model_analytic(train_set, model)
        print('Closed form solution.')

    train_loss = eval_model(train_set, model)
    print("Train loss: %s" % train_loss)

    # Plot the x vs. y if one dimension.
    if train_set[0].shape[1] == 1:
        plot_x_vs_y(train_set, model)

    # Eval model.
    raw_eval = read_dataset("data/val.csv")
    eval_set = preprocess_data(raw_eval,
                               feature_columns=feature_columns,
                               squared_features=True)
    eval_loss = eval_model(eval_set, model)
    print("Eval loss: %s" % eval_loss)

    # Test model.
    raw_test = read_dataset("data/test.csv")
    test_set = preprocess_data(raw_test,
                               feature_columns=feature_columns,
                               squared_features=True)
    test_loss = eval_model(test_set, model)
    print("Test loss: %s" % test_loss)

Exemplo n.º 3

0

Exibir arquivo

    def test_preprocess_dataset_one_hot_encoding(self):
        feature_columns = ['BldgType']
        data = data_tools.preprocess_data(self.dataset,
                                          feature_columns=feature_columns)
        self.assertEqual(data[0].shape, (self.N, 5))

        feature_columns = ['BldgType', 'Id']
        data = data_tools.preprocess_data(self.dataset,
                                          feature_columns=feature_columns)
        self.assertEqual(data[0].shape, (self.N, 6))

Exemplo n.º 4

0

Exibir arquivo

Arquivo: main.py Projeto: nbermudezs/UIUC_CS446

def main(_):
    """High level pipeline.
    This script performs the trainsing, evaling and testing state of the model.
    """
    learning_rate = FLAGS.learning_rate
    w_decay_factor = FLAGS.w_decay_factor
    num_steps = FLAGS.num_steps
    opt_method = FLAGS.opt_method
    feature_type = FLAGS.feature_type

    # Load dataset and data processing.
    train_set = read_dataset("data/train.txt", "data/image_data/")
    train_set = preprocess_data(train_set, feature_type)

    # Initialize model.
    ndim = train_set['image'][0].shape[0]
    model = SupportVectorMachine(ndim,
                                 'zeros',
                                 w_decay_factor=FLAGS.w_decay_factor)

    # Train model.
    if opt_method == 'iter':
        # Perform gradient descent.
        train_model(train_set,
                    model,
                    learning_rate,
                    num_steps=num_steps,
                    batch_size=100)
        print('Performed gradient descent.')
    else:
        # Compute closed form solution.
        train_model_qp(train_set, model)
        print('Finished QP Solver')

    train_loss, train_acc = eval_model(train_set, model)
    print("Train loss: %s" % train_loss)
    print("Train acc: %s" % train_acc)

    # Eval model.
    eval_set = read_dataset("data/val.txt", "data/image_data/")
    eval_set = preprocess_data(eval_set, feature_type)
    eval_loss, eval_acc = eval_model(eval_set, model)
    print("Eval loss: %s" % eval_loss)
    print("Eval acc: %s" % eval_acc)

    # Test model.
    test_set = read_dataset("data/test.txt", "data/image_data/")
    test_set = preprocess_data(test_set, feature_type)
    test_loss, test_acc = eval_model(test_set, model)
    print("Test loss: %s" % test_loss)
    print("Test ac: %s" % test_acc)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: main.py Projeto: sashimiwithwasabi/Pattern_Recognition

def main(_):
    """High level pipeline.
    This script performs the trainsing, evaling and testing state of the model.
    """
    #    learning_rate = FLAGS.learning_rate
    #    feature_type = FLAGS.feature_type
    #    model_type = FLAGS.model_type
    #    num_steps = FLAGS.num_steps

    feature_type = 'default'
    model_type = 'svm'
    # Load dataset.
    data = read_dataset('data/train_lab.txt', 'data/image_data')

    # Data Processing.
    data = preprocess_data(data, 'default')
    print("Finish preprocessing...")

    # Initialize model.
    ndim = data['image'].shape[1]
    if model_type == 'linear':
        model = LinearRegression(ndim, 'uniform')
    elif model_type == 'logistic':
        model = LogisticRegression(ndim, 'uniform')
    elif model_type == 'svm':
        model = SupportVectorMachine(ndim, 'uniform')

    # Train Model.
    print("Start to train the model...")
    model = train_model(data, model)

    # Eval Model.
    print("Start to evaluate the model...")
    data_val = read_dataset('data/val_lab.txt', 'data/image_data')
    data_val = preprocess_data(data_val, feature_type)
    loss, acc = eval_model(data_val, model)
    print(loss, acc)

    # Test Model.
    print("Start doing the test")
    data_test = read_dataset('data/test_lab.txt', 'data/image_data')
    print("Start preprocess testing data")
    data_test = preprocess_data(data_test, feature_type)
    print("Making predictions")
    data_test['label'] = model.predict(model.forward(data_test['image']))
    print("Output the results to csv file")
    write_dataset('data/test_lab.txt', data_test)
    # Generate Kaggle output.
    print("Finished!")

Exemplo n.º 6

0

Exibir arquivo

 def test_preprocess_dataset_squared(self):
     feature_columns = ['OverallQual']
     data = data_tools.preprocess_data(self.dataset,
                                       feature_columns=feature_columns,
                                       squared_features=True)
     keys = sorted(list(self.dataset.keys()))
     val0 = float(self.dataset[keys[0]][2])**2
     self.assertEqual(49, val0)

Exemplo n.º 7

0

Exibir arquivo

 def setUp(self):
     cols = ['GarageArea', 'OverallQual', 'BldgType']
     self.dataset = io_tools.read_dataset("data/train.csv")
     self.processed_data = data_tools.preprocess_data(self.dataset,
                                                      feature_columns=cols)
     self.N = self.processed_data[0].shape[0]
     self.ndims = self.processed_data[0].shape[1]
     self.model = linear_regression.LinearRegression(self.ndims, "zeros")

Exemplo n.º 8

0

Exibir arquivo

 def test_preprocess_dataset_shape(self):
     feature_columns = ['Id', 'GarageArea']
     data = data_tools.preprocess_data(self.dataset,
                                       feature_columns=feature_columns)
     self.assertEqual(len(data), 2)
     # check x
     self.assertEqual(data[0].shape, (self.N, 2))
     # check y
     self.assertEqual(data[1].shape, (self.N, 1))

Exemplo n.º 9

0

Exibir arquivo

 def test_default_shape(self):
     original_shape = self.dataset['image'].shape
     data = data_tools.preprocess_data(self.dataset,
                                       process_method='default')['image']
     self.assertEqual(len(data.shape), 2)
     self.assertEqual(data.shape[0], original_shape[0])
     self.assertEqual(data.shape[1],
                      original_shape[1] * original_shape[2] * original_shape[
                          3])

Exemplo n.º 10

0

Exibir arquivo

Arquivo: main_tf.py Projeto: sashimiwithwasabi/Pattern_Recognition

def main(_):
    """High level pipeline.

    This script performs the training, evaling and testing state of the model.
    """
    #    learning_rate = FLAGS.learning_rate
    #    feature_type = FLAGS.feature_type
    #    model_type = FLAGS.model_type

    feature_type = 'default'
    model_type = 'linear'
    # Load dataset.
    data = read_dataset('data/train_lab.txt', 'data/image_data')

    # Data Processing.
    data = preprocess_data(data, 'default')
    print("Finish preprocessing...")

    # Initialize model.
    ndim = data['image'].shape[1]
    if model_type == 'linear':
        model = LinearRegressionTf(ndim, 'gaussian')
    elif model_type == 'logistic':
        model = LogisticRegressionTf(ndim, 'uniform')
    elif model_type == 'svm':
        model = SupportVectorMachineTf(ndim, 'uniform')

    # Train Model.
    print("Start to train the model...")
    model = train_model(data, model)

    # Eval Model.
    print("Start to evaluate the model...")
    data_val = read_dataset('data/val_lab.txt', 'data/image_data')
    data_val = preprocess_data(data_val, feature_type)
    loss, acc = eval_model(data_val, model)
    print(loss, acc)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test.py Projeto: hkim150/CS446-Machine_Learning

 def setUp(self):
     self.dataset = io_tools.read_dataset("data/train.txt",
                                          "data/image_data/")
     self.dataset = data_tools.preprocess_data(self.dataset, 'raw')
     self.model = support_vector_machine.SupportVectorMachine(
         8 * 8 * 3, 'zeros')

Exemplo n.º 12

0

Exibir arquivo

def update_step(x_batch, y_batch, model, learning_rate):
    """Performs on single update step, (i.e. forward then backward).

    Args:
        x_batch(numpy.ndarray): input data of dimension (N, ndims).
        y_batch(numpy.ndarray): label data of dimension (N, 1).
        model(LinearModel): Initialized linear model.
    """
    f = LinearRegression.forward(model, x_batch)
    grad = learning_rate * LinearRegression.backward(model, f, y_batch)
    model.w = model.w - learning_rate * grad


dataset = io_tools.read_dataset('train.csv')
# print(dataset)
data = data_tools.preprocess_data(dataset)
ndim = data[0].shape[1]
print('data[0]', data[0])
print('ndim', ndim)
# print(data)
train_model(data, LinearRegression(ndim))


def train_model_analytic(processed_dataset, model):
    """Computes and sets the optimal model weights (model.w).

    Args:
        processed_dataset(list): List of [x,y] processed
            from utils.data_tools.preprocess_data.
        model(LinearRegression): LinearRegression model.
    """

Exemplo n.º 13

0

Exibir arquivo

 def test_preprocess_dataset_y_number(self):
     feature_columns = ['Id', 'GarageArea', 'SalePrice']
     data = data_tools.preprocess_data(self.dataset,
                                       feature_columns=feature_columns)
     self.assertEqual(type(data[1][0][0]), np.float32)

Exemplo n.º 14

0

Exibir arquivo

 def test_preprocess_dataset_x_not_price(self):
     feature_columns = ['Id', 'GarageArea', 'SalePrice']
     data = data_tools.preprocess_data(self.dataset,
                                       feature_columns=feature_columns)
     self.assertEqual(data[0].shape, (self.N, 2))