Python read_dataset 예제들, utils.io_tools.read_dataset Python 예제들

예제 #1

0

파일 보기

def main(_):
    """High level pipeline.

    This script performs the trainsing, evaling and testing state of the model.
    """
    learning_rate = FLAGS.learning_rate
    feature_type = FLAGS.feature_type
    model_type = FLAGS.model_type

    # Load dataset.
    data = read_dataset('data/test_lab.txt', 'data/image_data')

    # Data Processing.
    data = preprocess_data(data, feature_type)

    # Initialize model.
    ndim = data['image'].shape[1]
    if model_type == 'linear':
        model = LinearRegressionTf(ndim, 'ones')
    elif model_type == 'logistic':
        model = LogisticRegression(ndim, 'zeros')
    elif model_type == 'svm':
        model = SupportVectorMachine(ndim, 'zeros')

    # Train Model.
    model = train_model(data, model, learning_rate, num_steps=20000)

    # Eval Model.
    data_test = read_dataset('data/test_lab.txt', 'data/image_data')
    data_test = preprocess_data(data_test, feature_type)
    acc, loss = eval_model(data_test, model)

    # Test Model.
    data_test = read_dataset('data/test_lab.txt', 'data/image_data')
    data_test = preprocess_data(data_test, feature_type)

예제 #2

0

파일 보기

def main(_):
    """High level pipeline.

    This scripts performs the training and evaling and testing stages for
    semi-supervised learning using kMeans algorithm.
    """
    # Load dataset.
    # _, unlabeled_data = io_tools.read_dataset('data/simple_test.csv')
    _, unlabeled_data = io_tools.read_dataset('data/mnist_train.csv')
    n_dims = unlabeled_data.shape[1]

    # Initialize model.
    model = GaussianMixtureModel(n_dims,
                                 n_components=FLAGS.n_components,
                                 max_iter=FLAGS.max_iter)

    # Unsupervised training.
    model.fit(unlabeled_data)
    # Supervised training.
    # train_label, train_data = io_tools.read_dataset('data/simple_test.csv')
    train_label, train_data = io_tools.read_dataset('data/mnist_train.csv')
    model.supervised_fit(train_data, train_label)

    # Eval model.
    # eval_label, eval_data = io_tools.read_dataset('data/simple_test.csv')
    eval_label, eval_data = io_tools.read_dataset('data/mnist_test.csv')
    y_hat_eval = model.supervised_predict(eval_data)
    print(eval_label)
    acc = np.sum(y_hat_eval == eval_label) / (1. * eval_data.shape[0])
    print("Accuracy: %s" % acc)

예제 #3

0

파일 보기

파일: main.py 프로젝트: ycui11/sample-generation-with-vae

def main(_):
    """High level pipeline.

    This scripts performs the training and evaling and testing stages for
    semi-supervised learning using kMeans algorithm.
    """
    # Load dataset.
    unlabeled_data, _ = io_tools.read_dataset('data/train_no_label.csv')
    n_dims = unlabeled_data.shape[1]

    # Initialize model.
    if FLAGS.model_type == 'kmeans':
        model = KMeans(n_dims, n_components=FLAGS.n_components,
                       max_iter=FLAGS.max_iter)
    else:
        model = GaussianMixtureModel(n_dims, n_components=FLAGS.n_components,
                                     max_iter=FLAGS.max_iter)

    # Unsupervised training.
    model.fit(unlabeled_data)

    # Supervised training.
    train_data, train_label = io_tools.read_dataset(('data/'
                                                     'train_with_label.csv'))
    model.supervised_fit(train_data, train_label)

    # Eval model.
    eval_data, eval_label = io_tools.read_dataset('data/val.csv')
    y_hat_eval = model.supervised_predict(eval_data)

    acc = np.sum(y_hat_eval == eval_label) / (1.*eval_data.shape[0])
    print("Accuracy: %s" % acc)

예제 #4

0

파일 보기

파일: main.py 프로젝트: nbermudezs/UIUC_CS446

def main(_):
    """High level pipeline.
    This script performs the trainsing, evaling and testing state of the model.
    """
    learning_rate = FLAGS.learning_rate
    w_decay_factor = FLAGS.w_decay_factor
    num_steps = FLAGS.num_steps
    opt_method = FLAGS.opt_method
    feature_columns = FLAGS.feature_columns.split(',')

    # Load dataset.
    dataset = read_dataset("data/train.csv")

    # Data processing.
    train_set = preprocess_data(dataset,
                                feature_columns=feature_columns,
                                squared_features=True)

    # Initialize model.
    ndim = train_set[0].shape[1]
    model = LinearRegression(ndim, 'zeros')

    # Train model.
    if opt_method == 'iter':
        # Perform gradient descent.
        train_model(train_set,
                    model,
                    learning_rate,
                    num_steps=num_steps,
                    shuffle=True)
        print('Performed gradient descent.')
    else:
        # Compute closed form solution.
        train_model_analytic(train_set, model)
        print('Closed form solution.')

    train_loss = eval_model(train_set, model)
    print("Train loss: %s" % train_loss)

    # Plot the x vs. y if one dimension.
    if train_set[0].shape[1] == 1:
        plot_x_vs_y(train_set, model)

    # Eval model.
    raw_eval = read_dataset("data/val.csv")
    eval_set = preprocess_data(raw_eval,
                               feature_columns=feature_columns,
                               squared_features=True)
    eval_loss = eval_model(eval_set, model)
    print("Eval loss: %s" % eval_loss)

    # Test model.
    raw_test = read_dataset("data/test.csv")
    test_set = preprocess_data(raw_test,
                               feature_columns=feature_columns,
                               squared_features=True)
    test_loss = eval_model(test_set, model)
    print("Test loss: %s" % test_loss)

예제 #5

0

파일 보기

파일: main.py 프로젝트: nbermudezs/UIUC_CS446

def main(_):
    """High level pipeline.
    This script performs the trainsing, evaling and testing state of the model.
    """
    learning_rate = FLAGS.learning_rate
    w_decay_factor = FLAGS.w_decay_factor
    num_steps = FLAGS.num_steps
    opt_method = FLAGS.opt_method
    feature_type = FLAGS.feature_type

    # Load dataset and data processing.
    train_set = read_dataset("data/train.txt", "data/image_data/")
    train_set = preprocess_data(train_set, feature_type)

    # Initialize model.
    ndim = train_set['image'][0].shape[0]
    model = SupportVectorMachine(ndim,
                                 'zeros',
                                 w_decay_factor=FLAGS.w_decay_factor)

    # Train model.
    if opt_method == 'iter':
        # Perform gradient descent.
        train_model(train_set,
                    model,
                    learning_rate,
                    num_steps=num_steps,
                    batch_size=100)
        print('Performed gradient descent.')
    else:
        # Compute closed form solution.
        train_model_qp(train_set, model)
        print('Finished QP Solver')

    train_loss, train_acc = eval_model(train_set, model)
    print("Train loss: %s" % train_loss)
    print("Train acc: %s" % train_acc)

    # Eval model.
    eval_set = read_dataset("data/val.txt", "data/image_data/")
    eval_set = preprocess_data(eval_set, feature_type)
    eval_loss, eval_acc = eval_model(eval_set, model)
    print("Eval loss: %s" % eval_loss)
    print("Eval acc: %s" % eval_acc)

    # Test model.
    test_set = read_dataset("data/test.txt", "data/image_data/")
    test_set = preprocess_data(test_set, feature_type)
    test_loss, test_acc = eval_model(test_set, model)
    print("Test loss: %s" % test_loss)
    print("Test ac: %s" % test_acc)

예제 #6

0

파일 보기

파일: main.py 프로젝트: sashimiwithwasabi/Pattern_Recognition

def main(_):
    """High level pipeline.
    This script performs the trainsing, evaling and testing state of the model.
    """
    #    learning_rate = FLAGS.learning_rate
    #    feature_type = FLAGS.feature_type
    #    model_type = FLAGS.model_type
    #    num_steps = FLAGS.num_steps

    feature_type = 'default'
    model_type = 'svm'
    # Load dataset.
    data = read_dataset('data/train_lab.txt', 'data/image_data')

    # Data Processing.
    data = preprocess_data(data, 'default')
    print("Finish preprocessing...")

    # Initialize model.
    ndim = data['image'].shape[1]
    if model_type == 'linear':
        model = LinearRegression(ndim, 'uniform')
    elif model_type == 'logistic':
        model = LogisticRegression(ndim, 'uniform')
    elif model_type == 'svm':
        model = SupportVectorMachine(ndim, 'uniform')

    # Train Model.
    print("Start to train the model...")
    model = train_model(data, model)

    # Eval Model.
    print("Start to evaluate the model...")
    data_val = read_dataset('data/val_lab.txt', 'data/image_data')
    data_val = preprocess_data(data_val, feature_type)
    loss, acc = eval_model(data_val, model)
    print(loss, acc)

    # Test Model.
    print("Start doing the test")
    data_test = read_dataset('data/test_lab.txt', 'data/image_data')
    print("Start preprocess testing data")
    data_test = preprocess_data(data_test, feature_type)
    print("Making predictions")
    data_test['label'] = model.predict(model.forward(data_test['image']))
    print("Output the results to csv file")
    write_dataset('data/test_lab.txt', data_test)
    # Generate Kaggle output.
    print("Finished!")

예제 #7

0

파일 보기

 def setUp(self):
     cols = ['GarageArea', 'OverallQual', 'BldgType']
     self.dataset = io_tools.read_dataset("data/train.csv")
     self.processed_data = data_tools.preprocess_data(self.dataset,
                                                      feature_columns=cols)
     self.N = self.processed_data[0].shape[0]
     self.ndims = self.processed_data[0].shape[1]
     self.model = linear_regression.LinearRegression(self.ndims, "zeros")

예제 #8

0

파일 보기

파일: main_tf.py 프로젝트: sashimiwithwasabi/Pattern_Recognition

def main(_):
    """High level pipeline.

    This script performs the training, evaling and testing state of the model.
    """
    #    learning_rate = FLAGS.learning_rate
    #    feature_type = FLAGS.feature_type
    #    model_type = FLAGS.model_type

    feature_type = 'default'
    model_type = 'linear'
    # Load dataset.
    data = read_dataset('data/train_lab.txt', 'data/image_data')

    # Data Processing.
    data = preprocess_data(data, 'default')
    print("Finish preprocessing...")

    # Initialize model.
    ndim = data['image'].shape[1]
    if model_type == 'linear':
        model = LinearRegressionTf(ndim, 'gaussian')
    elif model_type == 'logistic':
        model = LogisticRegressionTf(ndim, 'uniform')
    elif model_type == 'svm':
        model = SupportVectorMachineTf(ndim, 'uniform')

    # Train Model.
    print("Start to train the model...")
    model = train_model(data, model)

    # Eval Model.
    print("Start to evaluate the model...")
    data_val = read_dataset('data/val_lab.txt', 'data/image_data')
    data_val = preprocess_data(data_val, feature_type)
    loss, acc = eval_model(data_val, model)
    print(loss, acc)

예제 #9

0

파일 보기

파일: test.py 프로젝트: hkim150/CS446-Machine_Learning

 def setUp(self):
     self.dataset = io_tools.read_dataset("data/train.txt",
                                          "data/image_data/")
     self.dataset = data_tools.preprocess_data(self.dataset, 'raw')
     self.model = support_vector_machine.SupportVectorMachine(
         8 * 8 * 3, 'zeros')

예제 #10

0

파일 보기

파일: test.py 프로젝트: hkim150/CS446-Machine_Learning

 def setUp(self):
     self.dataset = io_tools.read_dataset("data/train.txt",
                                          "data/image_data/")

예제 #11

0

파일 보기

파일: test.py 프로젝트: hkim150/CS446-Machine_Learning

 def setUp(self):
     self.dataset = io_tools.read_dataset("data/train.csv")

예제 #12

0

파일 보기


def update_step(x_batch, y_batch, model, learning_rate):
    """Performs on single update step, (i.e. forward then backward).

    Args:
        x_batch(numpy.ndarray): input data of dimension (N, ndims).
        y_batch(numpy.ndarray): label data of dimension (N, 1).
        model(LinearModel): Initialized linear model.
    """
    f = LinearRegression.forward(model, x_batch)
    grad = learning_rate * LinearRegression.backward(model, f, y_batch)
    model.w = model.w - learning_rate * grad


dataset = io_tools.read_dataset('train.csv')
# print(dataset)
data = data_tools.preprocess_data(dataset)
ndim = data[0].shape[1]
print('data[0]', data[0])
print('ndim', ndim)
# print(data)
train_model(data, LinearRegression(ndim))


def train_model_analytic(processed_dataset, model):
    """Computes and sets the optimal model weights (model.w).

    Args:
        processed_dataset(list): List of [x,y] processed
            from utils.data_tools.preprocess_data.

예제 #13

0

파일 보기

 def test_io(self):
     train_label, train_data = io_tools.read_dataset('data/simple_test.csv')
     np.testing.assert_array_equal(train_data.shape, np.asarray([200, 2]))

예제 #14

0

파일 보기

파일: main.py 프로젝트: handsomeboy/Piface

def main(_):
    """High level pipeline."""
    # pp.pprint(flags.FLAGS.__flags)

    # Preprocess method supports ['default', 'rgb', 'hsv']
    preprocess_method = FLAGS.preprocess_method
    feature_type = FLAGS.feature_type

    # Training/Validation/Testing image txt dir
    traintxtdir = FLAGS.traintxtdir
    # valtxtdir = FLAGS.valtxtdir
    testtxtdir = FLAGS.testtxtdir

    # All the image together
    totaltxtdir = FLAGS.totaltxtdir

    # Training image dataset dir
    imgdir = FLAGS.imgdir
    preprocesed_imgdir = FLAGS.preprocesed_imgdir
    rescaled_imgdir = FLAGS.rescaled_imgdir

    # Read all the images
    # -------------------------------------------------------------- #
    # print("[*] Reading and preprocessing dataset...")
    # raw_dataset, filename = read_dataset(totaltxtdir, imgdir)

    # Resize all the images -> save to new folder, all image in same size
    # rescale_data(raw_dataset, filename, feature_type)

    # Preprocess images
    # preprocess_data(rescaled_imgdir, preprocesed_imgdir, preprocess_method)

    # Load train/val/test set
    # -------------------------------------------------------------- #
    print("[*] Loading training set...")
    try:
        train_set, _ = read_dataset(traintxtdir, preprocesed_imgdir)
        train_image = train_set['image']
        train_label = train_set['label']
        positive_train_img_num = np.sum(train_label == 1)
        negative_train_img_num = np.sum(train_label == 0)
    except:
        print("[*] Oops! Please try loading training set again...")

    print("[*] Loading training set successfully!")
    print("[*] " + str(positive_train_img_num) + " faces loaded! " +
          str(negative_train_img_num) + " non-faces loaded!")

    # Compute integral image of training set
    for idx in range(train_image.shape[0]):
        tmp = integral_image(train_image[idx])
        train_image[idx] = tmp

    # Adaboost and Cascade classifiers
    classifiers = AdaBoost(train_image,
                           train_label,
                           positive_train_img_num,
                           negative_train_img_num,
                           feature_size=0)

    # -------------------------------------------------------------- #
    print("[*] Loading test set...")
    try:
        test_set, _ = read_dataset(testtxtdir, preprocesed_imgdir)
        test_image = test_set['image']
        test_label = test_set['label']
        positive_test_img_num = np.sum(test_label == 1)
        negative_test_img_num = np.sum(test_label == 0)
    except:
        print("[*] Oops! Please try loading test set again...")

    print("[*] Loading test set successfully!")
    print("[*] " + str(positive_test_img_num) + " faces loaded! " +
          str(negative_test_img_num) + " non-faces loaded!")

    # Compute integral image of test set
    for idx in range(test_image.shape[0]):
        tmp = integral_image(test_image[idx])
        test_image[idx] = tmp

    # Start test
    # -------------------------------------------------------------- #
    print("[*] Start testing...")

    positive_test_images = []
    negative_test_images = []

    for idx in range(positive_test_img_num + negative_test_img_num):
        if test_label[idx]:
            positive_test_images.append(test_image[idx])
        else:
            negative_test_images.append(test_image[idx])

    pred_pos = np.sum(
        ensemble_vote_all(np.array(positive_test_images), classifiers))
    acc_pos = float(pred_pos / positive_test_img_num)

    pred_neg = np.sum(
        ensemble_vote_all(np.array(negative_test_images), classifiers))
    acc_neg = float(pred_pos / positive_test_img_num)

    print("[*] Test done!")
    print("Faces [" + str(pred_pos) + " / " + str(positive_test_img_num) +
          "] accuracy: " + str(acc_pos))
    print("Objects [" + str(pred_neg) + " / " + str(negative_test_img_num) +
          "] accuracy: " + str(acc_neg))