Python partition_data示例，matlab_port.utils.partition_data Python示例

示例#1

0

显示文件

文件： train_opt.py 项目： indraastra/nn-ocr

def optimize_hyperparams(X, y, num_classes, hidden_layer_sizes,
                         regularization_terms, max_iterations):

    ## === Divide training data. ===
    X_train, y_train, X_val, y_val = partition_data(X, y, split=.9)

    trainer = make_trainer(X_train, y_train, num_classes, silent=True)

    all_combos = itertools.product(hidden_layer_sizes, regularization_terms,
                                   max_iterations)

    best_classifier = None
    best_opts = None
    best_accuracy = 0

    # Find hyperparameter combination that maximizes validation set accuracy.
    # TODO: Use multiprocessing to map across these combinations!
    # TODO: Rewrite this as some kind of argmax.
    for combo in all_combos:
        print('> Attempting:', combo)
        classifier = trainer(*combo)
        accuracy = classifier_accuracy(classifier, X_val, y_val)
        print('< Validation set accuracy:', accuracy)
        if accuracy > best_accuracy:
            best_opts = combo
            best_classifier = classifier
            best_accuracy = accuracy

    return best_classifier, best_opts, best_accuracy

示例#2

0

显示文件

文件： train_opt.py 项目： indraastra/nn-ocr

def run_optimized_training(input, output, num_classes, source):
    X, y = load_data(input, source)
    X, y = shuffle_data(X, y)

    ## === Divide training data. ===
    X, y, X_test, y_test = partition_data(X, y, split=.9)

    classifier, opts, val_accuracy = optimize_hyperparams(
        X, y, num_classes, HIDDEN_LAYER_OPTS, REGULARIZATION_OPTS,
        MAX_ITERATION_OPTS)

    test_accuracy = classifier_accuracy(classifier, X_test, y_test)

    print()
    print('===================')
    print('OPTIMAL PARAMETERS:')
    print('Hidden layer size: ', opts[0])
    print('Regularization value: ', opts[1])
    print('Max training iterations: ', opts[2])
    print('Accuracy on validation set:', val_accuracy)
    print('Accuracy on test set:', test_accuracy)
    print('===================')
    print()

    print('Saving out Neural Network weights.')
    save_classifier(classifier, output)

示例#3

0

显示文件

文件： train_opt.py 项目： indraastra/ml-sandbox

def run_optimized_training(input, output, num_classes, source):
    X, y = load_data(input, source)
    X, y = shuffle_data(X, y)

    ## === Divide training data. ===
    X, y, X_test, y_test = partition_data(X, y, split=.9)

    classifier, opts, val_accuracy = optimize_hyperparams(
            X, y, num_classes,
            HIDDEN_LAYER_OPTS,
            REGULARIZATION_OPTS,
            MAX_ITERATION_OPTS)

    test_accuracy = classifier_accuracy(classifier, X_test, y_test)

    print()
    print('===================')
    print('OPTIMAL PARAMETERS:')
    print('Hidden layer size: ', opts[0])
    print('Regularization value: ', opts[1])
    print('Max training iterations: ', opts[2])
    print('Accuracy on validation set:', val_accuracy)
    print('Accuracy on test set:', test_accuracy)
    print('===================')
    print()

    print('Saving out Neural Network weights.')
    save_classifier(classifier, output)

示例#4

0

显示文件

文件： train_opt.py 项目： indraastra/ml-sandbox

def optimize_hyperparams(X, y, num_classes,
        hidden_layer_sizes, regularization_terms, max_iterations):

    ## === Divide training data. ===
    X_train, y_train, X_val, y_val = partition_data(X, y, split=.9)

    trainer = make_trainer(X_train, y_train, num_classes, silent=True)

    all_combos = itertools.product(
            hidden_layer_sizes, regularization_terms, max_iterations)

    best_classifier = None
    best_opts = None
    best_accuracy = 0

    # Find hyperparameter combination that maximizes validation set accuracy.
    # TODO: Use multiprocessing to map across these combinations!
    # TODO: Rewrite this as some kind of argmax.
    for combo in all_combos:
        print('> Attempting:', combo)
        classifier = trainer(*combo)
        accuracy = classifier_accuracy(classifier, X_val, y_val)
        print('< Validation set accuracy:', accuracy)
        if accuracy > best_accuracy:
            best_opts = combo
            best_classifier = classifier
            best_accuracy = accuracy

    return best_classifier, best_opts, best_accuracy

示例#5

0

显示文件

  print(X.shape, A.shape)
  print(y.shape, b.shape)
  return np.vstack((X, A)), np.vstack((y, b))


def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])


font_dataset, font_labels = load_data(data_file, 'numpy')
font_dataset = font_dataset.astype(np.float32)
font_labels = make_one_hot(font_labels)

font_dataset, font_labels = shuffle_data(font_dataset, font_labels)
train_dataset, train_labels, X, y = partition_data(font_dataset, font_labels, split=.9)
valid_dataset, valid_labels, test_dataset, test_labels = partition_data(X, y, split=.5)


print('Original set', font_dataset.shape, font_labels.shape)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)


graph = tf.Graph()
with graph.as_default():

  # Input data. For the training data, we use a placeholder that will be fed
  # at run time with a training minibatch.
  tf_train_dataset = tf.placeholder(tf.float32,

示例#6

0

显示文件

文件： train.py 项目： indraastra/nn-ocr

import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.preprocessing import StandardScaler

from matlab_port.utils import partition_data, shuffle_data
from train import load_data

X, y = load_data('data/ex4data1_conv.mat', 'numpy')
X, y = shuffle_data(X, y)
X, y, X_test, y_test = partition_data(X, y, split=.8)

classifier = LogisticRegression(C=.1)
classifier.fit(X, y)

z = classifier.predict(X_test)
print("Test accuracy: {acc:.1%}".format(acc=(sum(y_test == z) / y_test.size)))

示例#7

0

显示文件

文件： ocr_nn_relu_sgd.py 项目： indraastra/ml-sandbox

def gen_feedforwarder(weights_1, biases_1, weights_2, biases_2):
  def feedforwarder(data):
    logits_1 = tf.matmul(data, weights_1) + biases_1
    output_1 = tf.nn.relu(logits_1)
    logits_2 = tf.matmul(output_1, weights_2) + biases_2
    return logits_2
  return feedforwarder


font_dataset, font_labels = load_data(data_file, 'numpy')
font_dataset = font_dataset.astype(np.float32)
font_labels = make_one_hot(font_labels)

font_dataset, font_labels = shuffle_data(font_dataset, font_labels)
train_dataset, train_labels, X, y = partition_data(font_dataset, font_labels, split=.9)
valid_dataset, valid_labels, test_dataset, test_labels = partition_data(X, y, split=.5)


print('Original set', font_dataset.shape, font_labels.shape)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)


graph = tf.Graph()
with graph.as_default():

  # Input data. For the training data, we use a placeholder that will be fed
  # at run time with a training minibatch.
  tf_train_dataset = tf.placeholder(tf.float32,