Пример #1
0
 def __init__(self, X: csr_matrix, Y: np.array, tune_parameters=False):
     super().__init__(X, Y, tune_parameters=False)
     input_layer, output_layer = self.X.shape[1], len(np.unique(Y))
     inp = tn.layers.base.Input(size=input_layer, sparse='csr')
     self.clf = tn.Classifier(layers=[
         inp, (100, 'linear'), (50, 'norm:mean+relu'), output_layer
     ])
Пример #2
0
 def build(self, *hiddens):
     input = dict(form='input',
                  size=self.NUM_INPUTS,
                  sparse='csr',
                  name='in')
     return theanets.Classifier([input] + list(hiddens) +
                                [self.NUM_CLASSES])
Пример #3
0
def rcnn_train(imdb):
    use_gpu = False
    opts = Opts()
    opts.net_def_file = './model-defs/rcnn_batch_256_output_fc7.prototxt'
    conf = [imdb.name, 1, 'cachedir', 'cachedir/' + imdb.name]

    rcnn_model = RcnnModel(opts.net_def_file, opts.net_file, opts.cache_name)
    # rcnn_load_model(rcnn_model, use_gpu)
    rcnn_model.detectors.crop_mode = opts.crop_mode
    rcnn_model.detectors.crop_padding = opts.crop_padding
    rcnn_model.classes = imdb.classes
    rcnn_model.opts = opts

    X, y = rcnn_get_all_feature(imdb, rcnn_model)
    np.savez('feat', X=X, y=y)
    classifier = LogisticRegression(class_weight='balanced',
                                    solver='lbfgs',
                                    multi_class='multinomial',
                                    verbose=1,
                                    n_jobs=-1,
                                    max_iter=1000)
    classifier.fit(X, y)

    # climate.enable_default_logging()
    net = theanets.Classifier(layers=[4096, 21])
    net.train((X, y),
              algo='sgd',
              learning_rate=0.1,
              momentum=0.9,
              save_every=60.0,
              save_progress='net.{}.netsave',
              validate_every=100)
Пример #4
0
 def _build_ann(self):
     hidden_neurons = 2 * (len(self.inputs) + len(self.languages)) // 3
     self._net = theanets.Classifier([
         len(self.inputs), {
             'size': hidden_neurons,
             'activation': 'tanh'
         },
         len(self.languages)
     ])
Пример #5
0
def main(input_file, model_path):
    batch_size = 128
    nb_classes = 62  # A-Z, a-z and 0-9
    nb_epoch = 2

    # Input image dimensions
    img_rows, img_cols = 32, 32

    # Path of data files
    path = input_file

    # Load the preprocessed data and labels
    X_train_all = np.load(path + "/trainPreproc_" + str(img_rows) + "_" +
                          str(img_cols) + ".npy")
    Y_train_all = np.load(path + "/labelsPreproc.npy")

    X_train, X_val, Y_train, Y_val = \
        train_test_split(X_train_all, Y_train_all, test_size=0.25, stratify=np.argmax(Y_train_all, axis=1))

    print X_train.shape

    labels = convert_(Y_train)
    validation = convert_(Y_val)

    X_train = X_train.reshape(
        (X_train.shape[0], X_train.shape[2] * X_train.shape[3]))
    X_val = X_val.reshape((X_val.shape[0], X_val.shape[2] * X_val.shape[3]))

    print 'Training...'
    class_input = 62
    climate.enable_default_logging()
    # Build a classifier model with 100 inputs and 10 outputs.
    net = theanets.Classifier(layers=[X_train.shape[1], class_input])

    X_train = X_train.astype('f')
    labels = labels.astype('i')

    X_val = X_val.astype('f')
    validation = validation.astype('i')

    train = X_train, labels
    valid = X_val, validation

    arg = 'adadelta'
    net.train(train,
              valid,
              algo=arg,
              learning_rate=1e-10,
              momentum=0.00001,
              input_noise=0.3,
              hidden_l1=0.1)

    print 'saving model paramters to {}'.format(model_path)
    with open(model_path, 'wb') as fid:
        pickle.dump(net, fid)
    print 'Done.'
Пример #6
0
def get_model(train_data, train_labels, validation_data, validation_labels, model_fname, output_size, hidden_layer_size):
    minimal_accuracy = 0.95
    # 1. create a model -- here, a regression model.
    print "fname", model_fname
    if os.path.exists(model_fname + "..."):
        print "Loading an existing model..."
        net = theanets.Classifier.load(model_fname)
        net._rng = 13
        valid_acc = np.sum(net.predict(validation_data) == np.int32(validation_labels)) / float(len(validation_labels))
        train_acc = np.sum(net.predict(train_data) == np.int32(train_labels)) / float(len(train_labels))
        print "train_acc, valid_acc", train_acc, valid_acc
        if valid_acc > minimal_accuracy:
            return net
    else:
        input_size = train_data.shape[1]
        print "Creating a new model..."

        if hidden_layer_size > 0:
            hidden_layer = dict(name='hidden1', size=hidden_layer_size, std=1. / hidden_layer_size ** 0.5)
            layers = [theanets.layers.base.Input(size=input_size, sparse='csr'), hidden_layer, dict(size=output_size, diagonal=1)]
        else:
            layers = [theanets.layers.base.Input(size=input_size, sparse='csr'), output_size]
        net = theanets.Classifier(layers, loss='xe')

    # 2. train the model.
    print "Training..."
    alpha = 1.
    for eta in [0.01, 0.05]:
        print '(eta, alpha)', (eta, alpha)
        count = 0
        for train, valid in net.itertrain([train_data, np.int32(train_labels)],
                                          valid=[validation_data, np.int32(validation_labels)],
                                          algo='sgd',
                                          learning_rate=eta,
                                          hidden_l1=alpha):
            net.save(model_fname)
            valid_acc = np.sum(net.predict(validation_data) == np.int32(validation_labels)) / float(len(validation_labels))
            print 'valid acc', valid_acc
            count += 1
            if valid_acc > minimal_accuracy or count == 2:
                break

    return net
Пример #7
0
    percent should be an int between 1 and 99
    '''
    s = int(percent * len(data) / 100)
    tdata = data[0:s]
    vdata = data[s:]
    tlabels = labels[0:s]
    vlabels = labels[s:]
    return ((tdata, tlabels), (vdata, vlabels))


# make a validation set from the train set
train, valid = split_validation(90, data, labels)

# build our classifier
print "We're building a RBM of 1 input layer node, 4 hidden layer nodes, and an output layer of 4 nodes. The output layer has 4 nodes because we have 4 classes that the neural network will output."
cnet = theanets.Classifier([1, 4, 4])
cnet.train(train, valid, algo='layerwise', patience=1, max_updates=mupdates)
cnet.train(train, valid, algo='rprop', patience=1, max_updates=mupdates)

print "%s / %s " % (sum(cnet.classify(data) == labels), tsize)
print "%s / %s " % (sum(cnet.classify(test_data) == test_labels), tsize)

# so what does that output layer look like?
print "The output layer looks something like:"
print "For %s we get %s which is interpreted as class: %s -- but it was %s" % (
    data[0:1], cnet.predict_proba(data[0:1]), cnet.classify(
        data[0:1]), labels[0])

# now that's kind of interesting, an accuracy of .3 to .5 max
# still pretty inaccurate, but 1 sample might never be enough.
Пример #8
0
 def _build(self, *hiddens):
     return theanets.Classifier(
         [self.NUM_INPUTS] + list(hiddens) + [self.NUM_CLASSES])
Пример #9
0
 def _build(self, *hiddens):
     return theanets.Classifier([self.NUM_INPUTS] + list(hiddens) +
                                [self.NUM_CLASSES],
                                sparse_input='csr')
Пример #10
0
weights = np.ones_like(labels)
weights[labels.nonzero()] *= 10


def split(a, b):
    return [
        samples[a:b].astype('float32'), labels[a:b].astype('int32'),
        weights[a:b].astype('float32')
    ]


train = split(0, 9000)
valid = split(9000, 10000)

net = theanets.Classifier(
    layers=(100, 10, 2),
    weighted=True,
)

net.train(train, valid)

truth = valid[1]
print('# of true 1s:', truth.sum())

guess = net.predict(valid[0])
print('# of predicted 1s:', guess.sum())

cm = sklearn.metrics.confusion_matrix(truth, guess)
print('confusion matrix (true class = rows, predicted class = cols):')
print(cm)
Пример #11
0
def main(data_file, test_file):
    try:
        # Load the data.
        _data = np.loadtxt(data_file,
                           delimiter=',',
                           usecols=range(3),
                           dtype=DTYPE)
        _test = np.loadtxt(test_file,
                           delimiter=',',
                           usecols=range(3),
                           dtype=DTYPE)
    except Exception:
        print "Could not load the data in " + data_file
        return 1

    # Separate the training dataset from it's labels.
    data = np.array([(_data[i][0], _data[i][1])
                     for i in xrange(_data.shape[0])])
    labels_t = np.array([_data[i][2] for i in xrange(_data.shape[0])])
    labels_n = np.array([
        CLASS2NUM(labels_t[i]) for i in xrange(labels_t.shape[0])
    ]).astype(np.int32)

    # Separate the test dataset from it's labels. The labels are stored for checking later.
    test = np.array([(_test[i][0], _test[i][1])
                     for i in xrange(_test.shape[0])])
    test_real = np.array([_test[i][2] for i in xrange(_test.shape[0])])

    # Plot the data if needed and able to.
    if PLOT and plt is not None:
        if XKCD:
            with plt.xkcd():
                plot_dataset(data, labels_t)
                plot_test_input(test, test_real)
        else:
            plot_dataset(data, labels_t)
            plot_test_input(test, test_real)

    # Create the classifiers.
    cnet_1 = tn.Classifier([2, 4, 3])
    cnet_2 = tn.Classifier([2, 8, 4, 3])
    cnet_3 = tn.Classifier([2, 8, 6, 4, 3])

    # Shuffle and split the training data and labels.
    tu.joint_shuffle(data, labels_n)
    train, valid = tu.split_validation(90, data, labels_n)

    print "*******************************************************************"
    print "* TESTING CLASSIFIER: 2, 4, 3                                     *"
    print "*******************************************************************"

    test_net(cnet_1, train, valid, test, test_real, "[2,4,3]")

    print "*******************************************************************"
    print "* TESTING CLASSIFIER: 2, 8, 4, 3                                  *"
    print "*******************************************************************"

    test_net(cnet_2, train, valid, test, test_real, "[2,8,4,3]")

    print "*******************************************************************"
    print "* TESTING CLASSIFIER: 2, 8, 6, 4, 3                               *"
    print "*******************************************************************"

    test_net(cnet_3, train, valid, test, test_real, "[2,8,6,4,3]")

    return 0
Пример #12
0
 def _build(self, *hiddens, **kwargs):
     return theanets.Classifier(
         layers=(784, ) + hiddens + (10, ),
         activation='logistic',
         **kwargs)
Пример #13
0
 def _build(self, *hiddens):
     return theanets.Classifier((self.DIGIT_SIZE, ) + hiddens + (10, ))
def g(x, y, z):
    return round(x) or round(y) or round(z)


def npg(vec):
    return g(vec[0], vec[1], vec[2])


possible_points = rnd.uniform(0.0, 0.2, (100, 3))
unlikely_points = rnd.uniform(0.0, 1, (100, 3))
points = np.concatenate((possible_points, unlikely_points))
labels = np.apply_along_axis(npf, 1, points)
glabels = np.apply_along_axis(npg, 1, points)

points = points.astype(np.float32)
labels = labels.astype(np.float32)
ilabels = labels.astype(np.int32)
iglabels = glabels.astype(np.int32)

#res = list()
#for i in range(2,18):
cnet = theanets.Classifier([3, 3, 3, ('softmax', 2)])
cnet.train([points, ilabels], algo='layerwise', patience=1)
cnet.train([points, ilabels], algo='rprop', patience=1)

cgnet = theanets.Classifier([3, ('tanh', 3), ('softmax', 2)])
cgnet.train([points, iglabels], algo='rprop', patience=1)

rnet = theanets.Regressor([3, 3, 1])
rnet.train([points, labels], algo='rprop', patience=10, batch_size=4)
Пример #15
0
                                   100 * tm['acc']))

import climate
import theanets
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix

climate.enable_default_logging()

# Create a classification dataset.
X, y = make_classification(n_samples=3000,
                           n_features=100,
                           n_classes=10,
                           n_informative=10)
X = X.astype('f')
y = y.astype('i')
cut = int(len(X) * 0.8)  # training / validation split
train = X[:cut], y[:cut]
valid = X[cut:], y[cut:]

# Build a classifier model with 100 inputs and 10 outputs.
net = theanets.Classifier([100, 10])

# Train the model using SGD with momentum.
net.train(train, valid, algo='sgd', learning_rate=1e-4, momentum=0.9)

# Show confusion matrices on the training/validation splits.
for label, (X, y) in (('training:', train), ('validation:', valid)):
    print(label)
    print(confusion_matrix(y, net.predict(X)))
Пример #16
0
#!/usr/bin/env python

import matplotlib.pyplot as plt
import theanets

from utils import load_mnist, plot_layers

train, valid, _ = load_mnist(labels=True)

N = 10

net = theanets.Classifier([784, N * N, ('softmax', 10)])
net.train(train, valid, min_improvement=0.001)

plot_layers([net.find('hid1', 'w'), net.find('out', 'w')])
plt.tight_layout()
plt.show()
#!/usr/bin/env python

import matplotlib.pyplot as plt
import theanets

from utils import load_mnist, plot_layers, plot_images


net = theanets.Classifier(
    layers=(784, 1024, 256, 64, ('softmax', 10)),
)

# first, run an unsupervised layerwise pretrainer.
train, valid, _ = load_mnist()
net.train(train, valid,
          algo='pretrain',
          patience=1,
          min_improvement=0.1,
          train_batches=100)

# second, run a supervised trainer on the classifier model.
train, valid, _ = load_mnist(labels=True)
net.train(train, valid, min_improvement=0.01, train_batches=100)

plot_layers([net.find(i, 'w') for i in (1, 2, 3)])
plt.tight_layout()
plt.show()
Пример #18
0
def create_algorithm(train, valid, config, n_features, n_targets, plots=False):
    '''Configure and train a theanets neural network

    Args
    ----
    train: tuple (ndarray, ndarray)
        Tuple containing feature and target values for training
    valid: tuple (ndarray, ndarray)
        Tuple containing feature and target values for training validation
    test: tuple (ndarray, ndarray)
        Tuple containing feature and target values for testing
    config: dict
        Dictionary of network configuration parameters
    n_features: int
        Number of features (inputs) for configuring input layer of network
    n_targets: int
        Number of targets (outputs) for configuring output layer of network
    plots: bool
        Switch for generating diagnostic plots after each network training

    Returns
    -------
    net: theanets.Classifier object
        Neural network object
    accuracy: float
        Accuracy value of the network configuration from the validation dataset
    monitors: dict
        Dictionary of "monitor" objects produced during network training
        Contains two labels 'train' and 'valid' with the following attributes:
        - 'loss': percentage from loss function (default: cross-entropy)
        - 'err': percentage of error (default: )
        - 'acc': percentage of accuracy (defualt: true classifications)
    '''
    from collections import OrderedDict
    import theanets

    def plot_monitors(attrs, monitors_train, monitors_valid):
        import matplotlib.pyplot as plt
        import seaborn

        seaborn.set_style('whitegrid')

        labels = {'loss': 'Loss', 'err': 'Error', 'acc': 'Accuracy'}

        fig, axes = plt.subplots(1, len(attrs), sharex=True)
        legend_on = True
        for ax, attr in zip(axes, attrs):
            ax.yaxis.label.set_text(labels[attr])
            ax.xaxis.label.set_text('Epic')
            ax.plot(monitors['train'][attr], label='Training')
            ax.plot(monitors['valid'][attr], label='Validation')
            if legend_on:
                ax.legend(loc='upper right')
                legend_on = False
        plt.show()

        # Keep seaborn from messing up confusion matrix plots
        seaborn.reset_orig()

        return None

    # Build neural net with defined configuration
    hidden_layers = [
        config['hidden_nodes'],
    ] * config['hidden_layers']
    net = theanets.Classifier([
        n_features,
    ] + hidden_layers + [
        n_targets,
    ])

    # SGD converges to minima/maxima faster with momentum
    # NAG, ADADELTA, RMSProp have equivalents of parameter specific momentum
    if config['algorithm'] is 'sgd':
        config['momentum'] = 0.9

    # Create dictionary for storing monitor lists
    attrs = ['loss', 'err', 'acc']
    monitors = OrderedDict()
    for mtype in ['train', 'valid']:
        monitors[mtype] = dict()
        for attr in attrs:
            monitors[mtype][attr] = list()

    print('')
    print('Train samples:       {:8d}'.format(len(train[0])))
    print('Valididation samples:{:8d}'.format(len(valid[0])))
    print('Hidden layers:       {:8d}'.format(config['hidden_layers']))
    print('Hidden nodes/layer:  {:8d}'.format(config['hidden_nodes']))
    print('')

    kwargs = {
        'train': train,
        'valid': valid,
        'algo': config['algorithm'],
        'learning_rate': config['learning_rate'],
        'momentum': config['momentum'],
        'hidden_l1': config['hidden_l1'],
        'weight_l2': config['weight_l2'],
    }

    # Run with monitors if `plots` flag set to true
    for t_monitors, v_monitors in net.itertrain(**kwargs):
        for key in attrs:
            monitors['train'][key].append(t_monitors[key])
            monitors['valid'][key].append(v_monitors[key])

    if plots == True:
        plot_monitors(attrs, monitors['train'], monitors['valid'])

    # Classify features against label/target value to get accuracy
    # where `valid` is a tuple with validation (features, label)
    accuracy = net.score(valid[0], valid[1])

    return net, accuracy, monitors
Пример #19
0
trainingSetLabels = trainingSet[:, 12]  #putting labels in separate array

trainingSetLabels[trainingSetLabels ==
                  0] = -1  #replacing all 0 with -1 to match sklearn format

trainingSet = trainingSet[:, 1:11]  #removing label cols from actual inputs

trainingSet, testingSet, trainingSetLabels, testingSetLabels = train_test_split(
    trainingSet, trainingSetLabels, test_size=0.6,
    random_state=0)  #fixes random_state so results reproducible

startTime = time.time()
print "Time before training = ", startTime

clf = theanets.Classifier(
    (10, 5, 5, 5, 2)
)  #dummy values for layers for now, 1 hidden layer -- 10 inputs mapped to a binary classification output (2 choices)
clf.itertrain(
    [trainingSet, trainingSetLabels],
    [testingSet, testingSetLabels],
    algo=
    'sgd',  #theanets uses training/validation split to mean training/testing split, methinks
    learning_rate=0.1,
    momentum=0.9,
    hidden_l1=0.000001,  #sparse regularizer
    input_noise=0.1,
    hidden_noise=0.1,
    input_dropout=0.3,
    hidden_dropout=0.3,  #Dropout and Noise regularizer to prevent overfitting
    save_progress="TN1_model_save.txt",
    save_every=1000,
Пример #20
0
 def _build(self, *hiddens, **kwargs):
     return theanets.Classifier(
         layers=(self.DIGIT_SIZE, ) + hiddens + (10, ),
         hidden_activation='logistic',
         **kwargs)
Пример #21
0
 def test_feed_forward(self):
     net = theanets.Classifier((self.NUM_INPUTS, self.l))
     out = net.predict_proba(self.INPUTS)
     assert out.shape == (self.NUM_EXAMPLES, self.NUM_CLASSES)
Пример #22
0
 def test_classification(self, loss):
     net = theanets.Classifier([
         self.NUM_INPUTS, 10, self.NUM_CLASSES], loss=loss)
     self.assert_progress(net, 'sgd', [self.INPUTS, self.CLASSES])
 def net(self):
     return theanets.Classifier(u.CLF_LAYERS)
Пример #24
0
 def build(self, *hiddens):
     return theanets.Classifier(
         [self.NUM_INPUTS] + list(hiddens) + [self.NUM_CLASSES],
         weighted=True)
Пример #25
0
learner = oneNN(train[0], train[1])

oneclasses = np.apply_along_axis(learner, 1, test[0])
print "1-NN classifier!"
print "%s / %s " % (sum(oneclasses == test[1]), len(test[1]))
print theautil.classifications(oneclasses, test[1])

print '''
########################################################################
# Part 3. Let's start using neural networks!
########################################################################
'''

# try different combos here
net = theanets.Classifier([2, 3, 2])
net.train(train, valid, algo='layerwise', max_updates=mupdates, patience=1)
net.train(train, valid, algo='rprop', max_updates=mupdates, patience=1)

print "Learner on the test set"
classify = net.classify(test[0])
print "%s / %s " % (sum(classify == test[1]), len(test[1]))
print collections.Counter(classify)
print theautil.classifications(classify, test[1])

print net.layers[2].params[0].get_value()
print net.layers[2].params[0].get_value()


def real_function(pt):
    rad = 0.1643167672515498
Пример #26
0
def test_classification(loss):
    net = theanets.Classifier([
        u.NUM_INPUTS, u.NUM_HID1, u.NUM_CLASSES], loss=loss)
    u.assert_progress(net, u.CLF_DATA)