def run_research():
    # a research to exam which feature is best to ignore for highest accuracy

    knn_fac = classifier.knn_factory(1)
    folds = [
        hw3_utils.load_data('ecg_fold_' + str(i + 1) + '.pickle')
        for i in range(2)
    ]
    features_num = len(folds[0][0][0])

    max_accuracy_feature = None
    for run_num in range(1, 8):

        if max_accuracy_feature is not None:
            folds = [(np.delete(data, max_accuracy_feature, 1), labels, test)
                     for data, labels, test in folds]
            features_num = len(folds[0][0][0])

        results = [
            evaluate_comp(knn_fac, folds, feature)
            for feature in range(features_num)
        ]
        max_accuracy_feature = max(results, key=lambda item: item[1])[0]

        with open('my_experiments' + str(run_num) + '.csv',
                  'w+') as result_file:
            for feature, accuracy, error in results:
                line = str(feature) + ',' + str(accuracy) + ',' + str(
                    error) + '\n'
                result_file.write(line)
示例#2
0
def main():
    # import data
    train_set_full, train_tags, test_set_full = load_data('data/Data.pickle')

    # ### Pre-processing ###
    # Trim data
    train_set_full[train_set_full < 0] = 0
    train_set_full[train_set_full > 1] = 1
    test_set_full[test_set_full < 0] = 0
    test_set_full[test_set_full > 1] = 1
    # Select 70 best features
    feature_sel_1 = SelectKBest(f_classif, k=70)
    feature_sel_1.fit(train_set_full, train_tags)
    train_set_1 = feature_sel_1.transform(train_set_full)
    test_set_1 = feature_sel_1.transform(test_set_full)

    # ### Train classifiers ###
    clf_1 = neighbors.KNeighborsClassifier(n_neighbors=1, weights='uniform', p=2).fit(train_set_1, train_tags)
    clf_2 = neighbors.KNeighborsClassifier(n_neighbors=3, weights='distance', p=2).fit(train_set_1, train_tags)
    clf_3 = neighbors.KNeighborsClassifier(n_neighbors=5, weights='distance', p=1).fit(train_set_1, train_tags)
    clf_4 = svm.SVC(kernel='poly', C=0.78, degree=11, coef0=2, gamma='auto').fit(train_set_1, train_tags)
    clf_5 = RandomForestClassifier(n_estimators=200, criterion='entropy', max_depth=None).fit(train_set_1, train_tags)

    # create voting classifier
    final_clf = VotingClassifier(estimators=[('knn1', clf_1), ('knn3', clf_2), ('knn5', clf_3),
                                             ('svm', clf_4), ('rf', clf_5)], voting='hard')
    final_clf.fit(train_set_1, train_tags)
    write_prediction(final_clf.predict(test_set_1).astype(int))
示例#3
0
文件: main.py 项目: ShirTech/HW3
def main():
    train_features, train_labels, test_features = load_data()
    x = (train_features, train_labels)
    # split_crosscheck_groups(x, 2)

    # KNN_test()
    # Additional_tests()
    compete()
示例#4
0
def train_model_and_classify_test():
    training_set, labels, test_set = utils.load_data(
        r'Shuffled_scaled_data.data')
    training_set_pca, labels_pca, test_set_pca = utils.load_data(
        r'Shuffled_scaled_PCA_data.data')
    # r'Shuffled_scaled_PCA_data.data'
    # Create classifier and train them
    svm = Svm_factory()
    svm = svm.train(training_set, labels)

    tree_classifier = DecisionTree_factory()
    tree_classifier = tree_classifier.train(training_set_pca, labels_pca)

    knn_7 = knn_factory(7)
    knn_7 = knn_7.train(training_set_pca, labels_pca)

    knn_9 = knn_factory(9)
    knn_9 = knn_9.train(training_set_pca, labels_pca)

    knn_11 = knn_factory(11)
    knn_11 = knn_11.train(training_set_pca, labels_pca)

    # Predictions for test set
    predictions = []
    #[svm.classify(sample) for sample in test_set]

    for sample, sample_with_pca in zip(test_set, test_set_pca):
        #sample, sample_with_pca in zip(training_set, training_set_pca):
        counter = 0
        counter += 1 if svm.classify(sample) else 0
        counter += 1 if tree_classifier.classify(sample_with_pca) else 0
        counter += 1 if knn_7.classify(sample_with_pca) else 0
        counter += 1 if knn_9.classify(sample_with_pca) else 0
        counter += 1 if knn_11.classify(sample_with_pca) else 0
        if counter > 3:
            predictions.append(True)
        else:
            predictions.append(False)

    print(np.where(np.array(predictions) == False)[0].shape)
示例#5
0
def evaluate(classifier_factory, k):
    # load all folds
    folds = [load_data('ecg_fold_' + str(i + 1) + '.pickle') for i in range(k)]

    accuracies = []
    errors = []
    for i in range(k):
        # choose 1 group to be test group all others will be train groups
        test_data = folds[i][0]
        test_labels = folds[i][1]

        train_folds = [folds[j][0] for j in range(k) if j != i]
        train_data = []
        for train_fold in train_folds:
            for features in train_fold:
                train_data.append(features)
        train_data = np.array(train_data)  # converstion to np array
        train_labels = []
        for j in range(k):
            if j != i:
                for train_label in folds[j][1]:
                    train_labels.append(train_label)

        # run groups with classifier
        classifier = classifier_factory.train(train_data, train_labels)
        res_list = [classifier.classify(features) for features in test_data]

        '''
        classify each classify result when True means subject is actually sick
        and res_list = 0 means classified as sick 
        '''
        test_false_positive = 0
        test_false_negative = 0
        test_true_positive = 0
        test_true_negative = 0
        N = len(res_list)
        for j in range(N):
            if res_list[j] == 1 and test_labels[j] == True:
                test_true_positive += 1
            elif res_list[j] == 1 and test_labels[j] == False:
                test_false_positive += 1
            elif res_list[j] == 0 and test_labels[j] == True:
                test_false_negative += 1
            elif res_list[j] == 0 and test_labels[j] == False:
                test_true_negative += 1

        accuracies.append((test_true_positive + test_true_negative) / N)
        errors.append((test_false_positive + test_false_negative) / N)

    return np.average(accuracies), np.average(errors)
def run_my_classify():
    # predicts the test data with specific features

    data, labels, tests = hw3_utils.load_data()

    # features list to ignore that came from the research before
    features_to_ignore = [90, 23, 90, 103, 36]

    for feature in features_to_ignore:
        data = np.delete(data, feature, 1)
        tests = np.delete(tests, feature, 1)

    clf = classifier.knn_factory(1).train(data, labels)
    results = [clf.classify(test) for test in tests]

    hw3_utils.write_prediction(results)
示例#7
0
def compete():
    train_features, train_labels, test_features = load_data()

    # TODO: execute feature selection only once for saving time
    path = "updated_features.data"
    my_file = Path(path)
    if not my_file.is_file():
        X_train_subset, X_test_subset = feature_selection(
            train_features, train_labels, test_features)
        with open(path, 'wb') as f:
            tuple_to_store = (X_train_subset, X_test_subset)
            pickle.dump(tuple_to_store, f, protocol=pickle.HIGHEST_PROTOCOL)
    else:
        with open(path, 'rb') as f:
            X_train_subset, X_test_subset = pickle.load(f)

    competition_test(X_train_subset, train_labels, X_test_subset)
示例#8
0
def main():
    train_set, train_tags, test_set = load_data()
    split_crosscheck_groups((train_set, train_tags), 2)

    with open('experiment6.csv', 'w') as f:
        k_vals, acc_list, err_list = [1, 3, 5, 7, 13], [], []
        for k in k_vals:
            knn = knn_factory(k)
            acc, err = evaluate(knn, 2)
            f.write(', '.join([str(k), str(acc), str(err)]) + '\n')
            acc_list.append(acc)
            err_list.append(err_list)

    tf = tree_factory()
    tree_acc, tree_err = evaluate(tf, 2)

    pf = perceptron_factory()
    percp_acc, percp_err = evaluate(pf, 2)

    with open('experiment12.csv', 'w') as f:
        f.write(', '.join([str(1), str(tree_acc), str(tree_err)]) + '\n')
        f.write(', '.join([str(2), str(percp_acc), str(percp_err)]) + '\n')
示例#9
0
def evaluate(classifier_factory, k):
    accuracy = 0
    # create training and test sets for the i'th fold
    full_train_data, full_train_tags, _ = load_data()
    full_train_set = [list(l) for l in full_train_data]
    for fold in range(k):
        fold_test_set = load_k_fold_data(fold + 1)
        fold_train_set = [
            smpl for smpl in full_train_set if smpl not in fold_test_set[0]
        ]
        fold_train_tags = [
            full_train_tags[full_train_set.index(smpl)]
            for smpl in fold_train_set
        ]
        classifier = classifier_factory.train(fold_train_set, fold_train_tags)

        # compute accuracy and error
        accuracy += classifier.score(fold_test_set[0], fold_test_set[1])

    accuracy /= k
    error = 1 - accuracy
    return accuracy, error
示例#10
0
def test_CDNN(learning_rate=0.1,
              n_epochs=1000,
              nkerns=[16, 512],
              batch_size=200,
              verbose=False,
              filter_size=5):
    """
    Wrapper function for testing CNN in cascade with DNN
    """
    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # TODO: Construct the first convolutional pooling layer
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 3, 32, 32),
                                filter_shape=(nkerns[0], 3, filter_size,
                                              filter_size),
                                poolsize=(2, 2))

    # TODO: Construct the second convolutional pooling layer
    new_shape = (32 - filter_size + 1) // 2
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], new_shape,
                                             new_shape),
                                filter_shape=(nkerns[1], nkerns[0],
                                              filter_size, filter_size),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    layer2_input = layer1.output.flatten(2)

    # TODO: construct a fully-connected sigmoidal layer
    new_factors = (new_shape - filter_size + 1) // 2
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * new_factors * new_factors,
                         n_out=500,
                         activation=T.tanh)

    layer3 = HiddenLayer(rng,
                         input=layer2.output,
                         n_in=500,
                         n_out=500,
                         activation=T.tanh)

    # TODO: classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer4.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # TODO: create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    return train_nn(train_model, validate_model, test_model, n_train_batches,
                    n_valid_batches, n_test_batches, n_epochs, verbose)
def test_noise_injection_at_weight(learning_rate=0.1,
             L1_reg=0.00, L2_reg=0.0001, n_epochs=100,
             batch_size=128, n_hidden=500, n_hiddenLayers=3,
             verbose=True,noise_level=0.001,noise_dist='uniform'):
    """
    Wrapper function for experiment of noise injection at weights

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    """
    rng = numpy.random.RandomState(23455)

    # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared)
    # train_set, valid_set, test_set format: tuple(input, target)
    # input is a numpy.ndarray of 2 dimensions (a matrix), where each row
    # corresponds to an example. target is a numpy.ndarray of 1 dimension
    # (vector) that has the same length as the number of rows in the input.

    # Load the smaller dataset
    datasets = load_data(ds_rate=5)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    classifier = myMLP(
        rng=rng,
        input=x,
        n_in=32*32*3,
        n_hidden=n_hidden,
        n_hiddenLayers=n_hiddenLayers,
        n_out=10
    )

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta (stored in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]

    # TODO: modify updates to inject noise to the weight
    #    # the parameters of the model are the parameters of the two layer it is made out of
    #    self.params = sum([x.params for x in self.hiddenLayers], []) + self.logRegressionLayer.params
    #        # parameters of hiddenlayer and logRegressionLayer
    #        self.params = [self.W, self.b]

    updates = [
    # W b W b W b layernumx2
#        (classifier.params[0::2], classifier.params[0::2] - learning_rate * gparams[0::2]),
#        (classifier.params[1::2], classifier.params[1::2] - learning_rate * gparams[1::2])
#        (param, param - learning_rate * gparam)
#        for param, gparam in zip(classifier.params, gparams)
        (param, param - learning_rate * gparam + noise_injection(param.get_value(),noise_level,noise_dist))
        for param, gparam in zip(classifier.params[0::2], gparams[0::2]) 
        +
        [(param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params[1::2], gparams[1::2])]
        
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
示例#12
0
def test_data_augmentation(learning_rate=0.01,
                           L1_reg=0.00,
                           L2_reg=0.0001,
                           n_epochs=100,
                           batch_size=128,
                           n_hidden=500,
                           n_hiddenLayers=3,
                           verbose=False):
    """
    Wrapper function for experiment of data augmentation

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    """
    rng = numpy.random.RandomState(23455)

    # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared)
    # train_set, valid_set, test_set format: tuple(input, target)
    # input is a numpy.ndarray of 2 dimensions (a matrix), where each row
    # corresponds to an example. target is a numpy.ndarray of 1 dimension
    # (vector) that has the same length as the number of rows in the input.

    # Load the smaller dataset in raw Format, since we need to preprocess it
    train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False)

    # Repeat the training set 5 times
    train_set[1] = numpy.tile(train_set[1], 5)

    # TODO: translate the dataset
    train_set_x_u = translate_image(train_set[0], "w")
    train_set_x_d = translate_image(train_set[0], "s")
    train_set_x_r = translate_image(train_set[0], "d")
    train_set_x_l = translate_image(train_set[0], "a")

    # Stack the original dataset and the synthesized datasets
    train_set[0] = numpy.vstack((train_set[0], train_set_x_u, train_set_x_d,
                                 train_set_x_r, train_set_x_l))

    # Convert raw dataset to Theano shared variables.
    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    classifier = myMLP(rng=rng,
                       input=x,
                       n_in=32 * 32 * 3,
                       n_hidden=n_hidden,
                       n_hiddenLayers=n_hiddenLayers,
                       n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    output = train_nn(train_model, validate_model, test_model, n_train_batches,
                      n_valid_batches, n_test_batches, n_epochs, verbose)
    return output
示例#13
0
from id3 import Id3Estimator
from hw3_utils import load_data
from id3 import export_graphviz
import classifier
examples, labels, test = load_data()
data = list()
data.append(examples)
data.append(labels)

examples_len = len(examples)

training_set_examples = list(examples[0:int(examples_len * (8 / 10))])
training_set_examples.extend(examples[int(examples_len * (9 / 10)):])
training_set_labels = list(labels[0:int(examples_len * (8 / 10))])
training_set_labels.extend(labels[int(examples_len * (9 / 10)):])

test_set_examples = examples[int(examples_len * (8 / 10)):int(examples_len *
                                                              (9 / 10))]
test_set_labels = labels[int(examples_len * (8 / 10)):int(examples_len *
                                                          (9 / 10))]

classifier.split_crosscheck_groups(data, 2)

estimator = Id3Estimator()
estimator.fit(training_set_examples, training_set_labels)
# predicted_labels = estimator.predict(test_set_examples)
print(estimator.predict_proba(test_set_examples))


def get_predictions(test_set):
    return
示例#14
0
import sys

import hw3_utils
import cross_validation

folds = 2
if len(sys.argv) > 1:
    folds = int(sys.argv[1])

dataset = hw3_utils.load_data()
print("Splitting dataset to {} folds".format(folds))
cross_validation.split_crosscheck_groups((dataset[0], dataset[1]), folds)
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size]
            #y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    test_model = theano.function(
        #layer5.errors(y),
        [index],output,
        givens={
            x: test_set_x[0+index: batch_size+index]
        }
    )
    train_nn(train_model,validate_model,test_model,
             n_train_batches, n_valid_batches, n_test_batches, n_epochs,
             verbose=True)
from hw3_utils import load_data

# downsample the training and validation dataset if needed, ds_rate should be larger than 1.
ds_rate = None
datasets = []
gc.collect()
datasets = load_data(ds_rate=ds_rate, theano_shared=True)
train_set_x, train_set_y = datasets[0]
print(train_set_x.get_value().shape)
import sys
#sys.exit()
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
evaluate_lenet5(train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y)
示例#16
0
def test_dropout(learning_rate=0.1,
                 n_epochs=1000,
                 nkerns=[64, 128],
                 batch_size=120,
                 verbose=False):
    """
    Wrapper function for testing LeNet on SVHN dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels
    testing = T.iscalar('testing')
    testValue = testing
    getTestValue = theano.function([testing], testValue)

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # TODO: Construct the first convolutional pooling layer
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 3, 32, 32),
                                filter_shape=(nkerns[0], 3, 5, 5),
                                poolsize=(2, 2))

    # TODO: Construct the second convolutional pooling layer
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 14, 14),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    layer2_input = layer1.output.flatten(2)

    # TODO: construct a fully-connected sigmoidal layer
    layer2 = DropOut(rng,
                     input=layer2_input,
                     n_in=nkerns[1] * 5 * 5,
                     n_out=batch_size,
                     testing=testing)

    # TODO: classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            testing: getTestValue(1)
        },
        on_unused_input='ignore')

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            testing: getTestValue(1)
        },
        on_unused_input='ignore')
    # TODO: create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    grads = T.grad(cost, params)
    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.

    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            testing: getTestValue(1)
        },
        on_unused_input='ignore',
        allow_input_downcast=True)

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model, n_train_batches,
             n_valid_batches, n_test_batches, n_epochs, verbose)
示例#17
0
def test_adversarial_example(learning_rate=0.03, L1_reg=0.0001, L2_reg=0.0001, n_epochs=1,
             batch_size=128, n_hidden=400, n_hiddenLayers=12, verbose=False,
             noise_mean=0.0, noise_var=1.0):
    """
    Wrapper function for testing adversarial examples
    """
    rng = numpy.random.RandomState(23455)
    # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared)
    # train_set, valid_set, test_set format: tuple(input, target)
    # input is a numpy.ndarray of 2 dimensions (a matrix), where each row
    # corresponds to an example. target is a numpy.ndarray of 1 dimension
    # (vector) that has the same length as the number of rows in the input.

    # Load the smaller dataset
    datasets = load_data(ds_rate=5)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]



    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)
    srng = RandomStreams(seed=234)

    classifier = myMLP(
        rng=rng,
        input=x,
        n_in=32*32*3,
        n_hidden=n_hidden,
        n_hiddenLayers=n_hiddenLayers,
        n_out=10
    )

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    get_preds = theano.function(
        inputs=[index],
        outputs=[classifier.y_pred, classifier.p_y_given_x],
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams

    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]

    # TODO: modify updates to inject noise to the weight
    updates = [
        (param, param - learning_rate * gparam + srng.normal(size=gparam.shape, avg=noise_mean, std=noise_var)) 
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # This function takes the gradient with respect to the input
    gparamx = T.grad(cost, classifier.input)

    calc_gradx = theano.function( [index], gparamx, 
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        })
    # Intermedaite step to get the original data
    get_x = theano.function( [index], test_set_x[index * batch_size: (index + 1) * batch_size])
    get_y = theano.function( [index], test_set_y[index * batch_size: (index + 1) * batch_size])

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)    
    # Get the gradient for a batch of inputs
    x_adv = get_x(1)
    gx_adv = numpy.sign(calc_gradx(1)[0])
    ad_example = x_adv + gx_adv * numpy.random.random(gx_adv.shape)*0.0000000001
    shared_adv_x = theano.shared(numpy.asarray(ad_example, dtype=theano.config.floatX), borrow=True)
    get_predsadv = theano.function(
        inputs=[index],
        outputs=[classifier.y_pred, classifier.p_y_given_x],        
        givens = {
            x:  shared_adv_x[(index*0):]
        }
    )
    ap = get_predsadv(1)
    op = get_preds(1)
    ys = get_y(1)
    indexes = [i for i in range(128) if ys[i]==op[0][i]] 
    # This is the selection of the third element with correct class from the original prediction
    indx = indexes[3]
    return x_adv, op, ap, ad_example, ys, indx
def test_adversarial_example(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100,
                         batch_size=128, n_hidden=500, n_hiddenLayers=3,
                         verbose=False, smaller_set=True):
    """
    Wrapper function for testing adversarial examples
    
    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.
    """
    
    # load the dataset; download the dataset if it is not present
    if smaller_set:
        datasets = load_data(ds_rate=5)
    else:
        datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    # TODO: construct a neural network, either MLP or CNN.
    classifier = myMLP(rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
    
    y_pred_model = theano.function(
        inputs=[index],
        outputs=classifier.y_pred,
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )
    
    p_y_given_x_model = theano.function(
        inputs=[index],
        outputs=classifier.p_y_given_x,
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
        }
    )
    
    y_pred=numpy.array([])
    y_actual=numpy.array([])
    for i in range(n_test_batches):
        y_pred=numpy.append(y_pred, y_pred_model(i)) 
        y_actual=numpy.append(y_actual, test_set_y.eval()[i*batch_size:(i + 1) * batch_size])
    
    print 'y_pred', y_pred
    print 'y_actual', y_actual
    
    
    grad_input=T.grad(cost, classifier.input)
    f1=theano.function(
        inputs=[x,y], 
        outputs=T.add(x, T.sgn(grad_input)))
    
    new_x = f1(test_set_x.eval(), test_set_y.eval())    
    new_x = theano.shared(numpy.asarray(new_x, dtype=theano.config.floatX), borrow=True)
    
    y_pred_model_adverse = theano.function(
        inputs=[index],
        outputs=classifier.y_pred,
        givens={
            x: new_x[index * batch_size:(index + 1) * batch_size],
        }
    )
    
    
    p_y_given_x_model_adverse = theano.function(
        inputs=[index],
        outputs=classifier.p_y_given_x,
        givens={
            x: new_x[index * batch_size:(index + 1) * batch_size],
        }
    )
    p_y_given_x_adverse=numpy.array([])
    p_y_given_x_original=numpy.array([])
    y_pred_adverse=numpy.array([])
    for i in range(n_test_batches):
        y_pred_adverse=numpy.append(y_pred_adverse, y_pred_model_adverse(i)) 
        if i==0:
            p_y_given_x_adverse=p_y_given_x_model_adverse(i)
            p_y_given_x_original=p_y_given_x_model(i)
        elif i>0:
            p_y_given_x_adverse=numpy.vstack((p_y_given_x_adverse, p_y_given_x_model_adverse(i)))
            p_y_given_x_original=numpy.vstack((p_y_given_x_original, p_y_given_x_model(i)))
            
    f, ax = plt.subplots(5,4, figsize=(15,15))
    for i in range(5): 
        pred=y_pred[y_actual==y_pred][i]
        pred_adv=y_pred_adverse[y_actual==y_pred][i]
        pyx=p_y_given_x_original[y_actual==y_pred][i]
        pyx_adverse=p_y_given_x_adverse[y_actual==y_pred][i]
        img=numpy.array(test_set_x.eval())[y_actual==y_pred,:][i,:].reshape(3,32,32)
        img_adverse=numpy.array(new_x.eval())[y_actual==y_pred,:][i,:].reshape(3,32,32)
        ax[i,0].imshow(numpy.transpose(img,(1,2,0)))
        ax[i,0].axis('off')
        ax[i,0].set_title('Example %s:\nCorrectly predicted value: %s' % (i+1,int(pred)))
        ax[i,1].imshow(numpy.transpose(img_adverse,(1,2,0)))
        ax[i,1].axis('off')
        ax[i,1].set_title('Example %s:\nAdversarial example\nPredicted value: %s' % (i+1, int(pred_adv)))
        ax[i,2].bar(numpy.arange(0,10)-0.5, pyx)
        ax[i,2].set_xticks(numpy.arange(0,10))
        ax[i,2].set_title('Example %s: Class specific\nprobabilities for original data' % (i+1))
        ax[i,2].set_ylabel('p(y|x)')
        ax[i,3].bar(numpy.arange(0,10)-0.5, pyx_adverse)
        ax[i,3].set_xticks(numpy.arange(0,10))
        ax[i,3].set_title('Example %s: Class specific\nprobabilities for adversarial data' % (i+1))
        ax[i,3].set_ylabel('p(y|x)')
    plt.tight_layout()
    
    return p_y_given_x_adverse
def test_mlp_bonus(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100,
             batch_size=128, n_hidden=500, n_hiddenLayers=3,
             verbose=False, smaller_set=True):
    """
    Wrapper function for training and testing MLP

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    """

    # load the dataset; download the dataset if it is not present
    if smaller_set:
        datasets = load_data(ds_rate=5)
    else:
        datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    # TODO: construct a neural network, either MLP or CNN.
    classifier = myMLP(rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
    
    return [x.params[0].get_value() for x in classifier.hiddenLayers]+[classifier.logRegressionLayer.params[0].get_value()]
示例#20
0
def main():
    #q b.3
    train_features, train_labels, test_features = hw3_utils.load_data()
    classifier.split_crosscheck_groups((train_features, train_labels), 8)
示例#21
0
def main():
    train_set_full, train_tags, _ = load_data('data/Data.pickle')
    clf_type = ['SVM', 'Tree', 'KNN', 'Perceptron', 'Bayes', 'RF', 'Voting']
    kernel_type = ['Linear', 'Polynomial', 'Gaussian', 'Sigmoid']
    selectors = ['f_classif', 'mutual_info_classif', 'chi2']
    path = 'Classifiers_comparison.csv'

    with open(path, 'w', newline='') as csv_f:
        writer = csv.writer(csv_f)
        writer.writerow(['Selection', 'Features', 'Classifier', 'Param1', 'Param2', 'Param3', 'Param4', 'Param5',
                         'Accuracy', 'Var'])

        for num_of_features in [50, 70, 85, 98, 115, 140, 187]:
            best_svm_score = 0
            for selection_method in selectors:
                if selection_method is selectors[0]:
                    train_set = SelectKBest(f_classif, k=num_of_features).fit_transform(train_set_full, train_tags)
                elif selection_method is selectors[1]:
                    train_set = SelectKBest(mutual_info_classif, k=num_of_features).fit_transform(train_set_full, train_tags)
                else:
                    train_set = SelectKBest(chi2, k=num_of_features).fit_transform(abs(train_set_full), train_tags)

                # Testing SVM classifier
                C_param = np.linspace(0.78, 0.9, 12)
                kernel_param = ['linear', 'poly', 'rbf', 'sigmoid']
                degree_param = np.linspace(7, 11, 5, dtype=int)
                coef0_param = np.linspace(-3, 3, 10)

                # Testing linear kernel
                for penal in C_param:
                    clf = svm.SVC(kernel=kernel_param[0], C=penal, gamma='auto')
                    scores = cross_val_score(clf, train_set, train_tags, cv=3)
                    avg_score = np.mean(scores)
                    var = np.var(scores)
                    writer.writerow([selection_method, num_of_features, clf_type[0], kernel_type[0], str(penal), '', '', '',
                                     avg_score, var])
                    if avg_score > best_svm_score:
                        best_svm_score = avg_score
                        svm_clf = svm.SVC(kernel=kernel_param[0], C=penal, gamma='auto')

                # Testing Polynomial kernel
                for penal in C_param:
                    for deg in degree_param:
                        for C0 in coef0_param:
                            clf = svm.SVC(kernel=kernel_param[1], C=penal, degree=deg, coef0=C0, gamma='auto')
                            scores = cross_val_score(clf, train_set, train_tags, cv=3)
                            avg_score = np.mean(scores)
                            var = np.var(scores)
                            writer.writerow([selection_method, num_of_features, clf_type[0], kernel_type[1], str(penal),
                                             str(deg), str(C0), '', avg_score, var])
                            if avg_score > best_svm_score:
                                best_svm_score = avg_score
                                svm_clf = svm.SVC(kernel=kernel_param[1], C=penal, degree=deg, coef0=C0, gamma='auto')

                # Testing Gaussian kernel
                for penal in C_param:
                    clf = svm.SVC(kernel=kernel_param[2], C=penal, gamma='auto')
                    scores = cross_val_score(clf, train_set, train_tags, cv=3)
                    avg_score = np.mean(scores)
                    var = np.var(scores)
                    writer.writerow([selection_method, num_of_features, clf_type[0], kernel_type[2], str(penal), '', '', '',
                                     avg_score, var])
                    if avg_score > best_svm_score:
                        best_svm_score = avg_score
                        svm_clf = svm.SVC(kernel=kernel_param[2], C=penal, gamma='auto')

                # Testing Sigmoidial kernel
                for penal in C_param:
                    for C0 in coef0_param:
                        clf = svm.SVC(kernel=kernel_param[3], C=penal, coef0=C0, gamma='auto')
                        scores = cross_val_score(clf, train_set, train_tags, cv=3)
                        avg_score = np.mean(scores)
                        var = np.var(scores)
                        writer.writerow([selection_method, num_of_features, clf_type[0], kernel_type[3], str(penal), '',
                                         str(C0), '', avg_score, var])
                        if avg_score > best_svm_score:
                            best_svm_score = avg_score
                            svm_clf = svm.SVC(kernel=kernel_param[3], C=penal, coef0=C0, gamma='auto')

                print("SVM Done")

                # Testing Decision-tree classifier
                critirion_param = ['gini', 'entropy']
                splitter_param = ['best', 'random']
                weight_param = [None, 'balanced']

                best_tree_score = 0
                for crit in critirion_param:
                    for split in splitter_param:
                        for weight in weight_param:
                            clf = tree.DecisionTreeClassifier(criterion=crit, splitter=split, class_weight=weight)
                            scores = cross_val_score(clf, train_set, train_tags, cv=3)
                            avg_score = np.mean(scores)
                            var = np.var(scores)
                            writer.writerow([selection_method, num_of_features, clf_type[1], crit, split, str(weight), '',
                                             '', avg_score, var])
                            if avg_score > best_tree_score:
                                best_tree_score = avg_score
                                tree_clf = tree.DecisionTreeClassifier(criterion=crit, splitter=split, class_weight=weight)

                print("Decision Tree Done")

                # Testing KNN classifier
                neighbors_param = [1, 3, 5, 9]
                weight_param = ['uniform', 'distance']
                dist_metric_param = [1, 2, 3]
                dst = ['manhattan', 'euclidean', 'minkowski']

                best_knn1_score, best_knn3_score = 0, 0
                for n in neighbors_param:
                    for weight in weight_param:
                        for dm in dist_metric_param:
                            clf = neighbors.KNeighborsClassifier(n_neighbors=n, weights=weight, p=dm)
                            scores = cross_val_score(clf, train_set, train_tags, cv=3)
                            avg_score = np.mean(scores)
                            var = np.var(scores)
                            writer.writerow([selection_method, num_of_features, clf_type[2], str(n), weight, '', '',
                                             dst[dm-1], avg_score, var])
                            if n == 1:
                                if avg_score > best_knn1_score:
                                    best_knn1_score = avg_score
                                    knn1_clf = neighbors.KNeighborsClassifier(n_neighbors=n, weights=weight, p=dm)
                            elif n == 3:
                                if avg_score > best_knn3_score:
                                    best_knn3_score = avg_score
                                    knn3_clf = neighbors.KNeighborsClassifier(n_neighbors=n, weights=weight, p=dm)
                print("KNN Done")

                # Testing Perceptron classifier
                penalty_param = [None, 'l1', 'l2']
                alpha_param = np.logspace(-7, -2, 6, dtype=float)
                intercept_param = [True, False]
                tol_param = np.logspace(-7, -2, 6, dtype=float)
                weight_param = [None, 'balanced']

                for penal in penalty_param:
                    for a in alpha_param:
                        for fip in intercept_param:
                            for tl in tol_param:
                                for weight in weight_param:
                                    clf = linear_model.Perceptron(penalty=penal, alpha=a, fit_intercept=fip, tol=tl,
                                                                  class_weight=weight)
                                    scores = cross_val_score(clf, train_set, train_tags, cv=3)
                                    avg_score = np.mean(scores)
                                    var = np.var(scores)
                                    writer.writerow([selection_method, num_of_features, clf_type[3], str(penal), str(a),
                                                     str(fip), str(tl), weight, avg_score, var])
                print("Perceptron Done")

                # Testing Naive Bayes classifier
                NB_type = ['Gaussian', 'Multinomial', 'Bernoulli']
                alpha_param = np.linspace(1e-5, 1, 6, dtype=float)
                prio_param = [True, False]

                clf = naive_bayes.GaussianNB()
                scores = cross_val_score(clf, train_set, train_tags, cv=3)
                avg_score = np.mean(scores)
                var = np.var(scores)
                writer.writerow([selection_method, num_of_features, clf_type[4], NB_type[0], '', '', '', '', avg_score,
                                 var])
                for a in alpha_param:
                    for prio in prio_param:
                        clf = naive_bayes.MultinomialNB(alpha=a, fit_prior=prio)
                        scores = cross_val_score(clf, abs(train_set), train_tags, cv=3)
                        avg_score = np.mean(scores)
                        var = np.var(scores)
                        writer.writerow([selection_method, num_of_features, clf_type[4], NB_type[1], str(a), str(prio), '',
                                         '', avg_score, var])

                for a in alpha_param:
                    for prio in prio_param:
                        clf = naive_bayes.BernoulliNB(alpha=a, fit_prior=prio)
                        scores = cross_val_score(clf, abs(train_set), train_tags, cv=3)
                        avg_score = np.mean(scores)
                        var = np.var(scores)
                        writer.writerow([selection_method, num_of_features, clf_type[4], NB_type[2], str(a), str(prio), '',
                                         '', avg_score, var])
                print("Naive Bayes Done")

                # Testing Random Forest classifier
                n_param = [100, 200, 300]
                critirion_param = ['gini', 'entropy']
                depth_param = [5, 50, None]

                best_rf_score = 0
                for n in n_param:
                    for crit in critirion_param:
                        for d in depth_param:
                            clf = RandomForestClassifier(n_estimators=n, criterion=crit, max_depth=d)
                            scores = cross_val_score(clf, train_set, train_tags, cv=3)
                            avg_score = np.mean(scores)
                            var = np.var(scores)
                            writer.writerow([selection_method, num_of_features, clf_type[5], n, crit, str(d), '', '',
                                             avg_score, var])
                            if avg_score > best_rf_score:
                                best_rf_score = avg_score
                                rf_clf = RandomForestClassifier(n_estimators=n, criterion=crit, max_depth=d)
                print("Random forest Done")

                # Testing Voting classifier
                vote_clf = VotingClassifier(estimators=[('svm', svm_clf), ('tree', tree_clf), ('knn1', knn1_clf),
                                                        ('knn3', knn3_clf), ('rf', rf_clf)], voting='hard')
                scores = cross_val_score(vote_clf, train_set, train_tags, cv=3)
                avg_score = np.mean(scores)
                var = np.var(scores)
                writer.writerow([selection_method, num_of_features, clf_type[6], 'Hard', '', '', '', '', avg_score, var])
                print("Voting Done")

    print("\n ***** ALL Done *****")
def test_adversarial_example(learning_rate=0.01,
             L1_reg=0.00, L2_reg=0.0001, n_epochs=100,
             batch_size=128, n_hidden=500, n_hiddenLayers=3,
             verbose=False):
    """
    Wrapper function for testing adversarial examples
    """

    # First, train a network using the small dataset.
    rng = numpy.random.RandomState(23455)

    # Load the smaller dataset
    train_set, valid_set, test_set = load_data(ds_rate=5)

    test_set_x, test_set_y = test_set
    valid_set_x, valid_set_y = valid_set
    train_set_x, train_set_y = train_set

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    classifier = myMLP(
        rng=rng,
        input=x,
        n_in=32*32*3,
        n_hidden=n_hidden,
        n_hiddenLayers=n_hiddenLayers,
        n_out=10
    )

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )
    
    probability = theano.function(
        inputs=[],
        outputs=[classifier.logRegressionLayer.p_y_given_x, y],
        givens={
            x: test_set_x,
            y: test_set_y
        }
    )

    gradient = theano.function(
        inputs=[],
        outputs=classifier.input + 0.007 * T.sgn(T.grad(cost, classifier.input)),
        givens={
            x: test_set_x,
            y: test_set_y
        }
    )

    # compute the gradient of cost with respect to theta (sorted in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)

    ori_prob, ori_y = probability()
    
    # I use MATLAB to compare the predicted classification and y in test_32x32.mat
    # the 14th test data is correctly classified thus using idx = 13
    idx = 13

    new_test_x = gradient()
    adversarial = theano.function(
        inputs=[],
        outputs=[classifier.logRegressionLayer.p_y_given_x, classifier.logRegressionLayer.y_pred, y],
        givens={
            x: new_test_x,
            y: test_set_y
        }
    )
    adver_prob, adver_y, _ = adversarial()

    return ori_prob[idx], ori_y[idx], adver_prob[idx], adver_y[idx], test_set_x.get_value(borrow=True), new_test_x
def test_data_augmentation(learning_rate=0.01,
             L1_reg=0.00, L2_reg=0.0001, n_epochs=100,
             batch_size=128, n_hidden=500, n_hiddenLayers=3,
             verbose=False):
    """
    Wrapper function for experiment of data augmentation

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    """
    rng = numpy.random.RandomState(23455)

    # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared)
    # train_set, valid_set, test_set format: tuple(input, target)
    # input is a numpy.ndarray of 2 dimensions (a matrix), where each row
    # corresponds to an example. target is a numpy.ndarray of 1 dimension
    # (vector) that has the same length as the number of rows in the input.

    # Load the smaller dataset in raw Format, since we need to preprocess it
    train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False)

    # Repeat the training set 5 times
    train_set[1] = numpy.tile(train_set[1], 5)

    # TODO: translate the dataset
    train_set_x_u = translate_image(train_set[0], 1)
    train_set_x_d = translate_image(train_set[0], 2)
    train_set_x_r = translate_image(train_set[0], 3)
    train_set_x_l = translate_image(train_set[0], 4)

    # Stack the original dataset and the synthesized datasets
    train_set[0] = numpy.vstack((train_set[0],
                       train_set_x_u,
                       train_set_x_d,
                       train_set_x_r,
                       train_set_x_l))

    # Convert raw dataset to Theano shared variables.
    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    classifier = myMLP(
        rng=rng,
        input=x,
        n_in=32*32*3,
        n_hidden=n_hidden,
        n_hiddenLayers=n_hiddenLayers,
        n_out=10
    )

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
示例#24
0
def my_lenet(batch_size=250,
             n_epochs=2000,
             learning_rate=0.01,
             L2_reg=0.0001,
             activation=T.tanh):

    # load data
    ds_rate = None
    datasets = load_data(ds_rate=ds_rate, theano_shared=True)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    rng = np.random.RandomState(23455)

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    training_enabled = T.iscalar(
        'training_enabled'
    )  # pseudo boolean for switching between training and prediction

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # 4D output tensor is thus of shape (batch_size, 32, 16, 16)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 3, 32, 32),
        filter_shape=(
            32, 3, 3, 3
        ),  # (number of output feature maps, number of input feature maps, height, width)
        poolsize=(2, 2),
        activation=activation)

    # 4D output tensor is thus of shape (batch_size, 64, 8, 8)
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, 32, 16, 16),
                                filter_shape=(64, 32, 3, 3),
                                poolsize=(2, 2),
                                activation=activation)

    layer2_input = layer1.output.flatten(2)

    layer2 = DropoutHiddenLayer(rng=rng,
                                is_train=training_enabled,
                                input=layer2_input,
                                n_in=64 * 8 * 8,
                                n_out=4096,
                                W=None,
                                b=None,
                                activation=activation,
                                p=0.7)

    layer3 = DropoutHiddenLayer(rng=rng,
                                is_train=training_enabled,
                                input=layer2.output,
                                n_in=4096,
                                n_out=512,
                                W=None,
                                b=None,
                                activation=activation,
                                p=0.7)

    layer4 = LogisticRegression(input=layer3.output, n_in=512, n_out=10)

    # the cost we minimize during training is the NLL of the model
    L2_sqr = (layer4.W**2).sum() + (layer3.W**2).sum() + (
        layer2.W**2).sum() + (layer1.W**2).sum() + (layer0.W**2).sum()

    cost = (layer4.negative_log_likelihood(y) + L2_reg * L2_sqr)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](0)
        })

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            training_enabled: numpy.cast['int32'](0)
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs
    momentum = theano.shared(numpy.cast[theano.config.floatX](0.5),
                             name='momentum')
    updates = []
    for param in params:
        param_update = theano.shared(param.get_value() *
                                     numpy.cast[theano.config.floatX](0.))
        updates.append((param, param - learning_rate * param_update))
        updates.append((param_update, momentum * param_update +
                        (numpy.cast[theano.config.floatX](1.) - momentum) *
                        T.grad(cost, param)))

    ######################
    # TRAIN ACTUAL MODEL #
    ######################
    # early-stopping parameters
    patience = 20000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.85  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False
    verbose = True

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1

        ##### implement flip
        train_set_x, train_set_y = datasets[0]
        flip_image(train_set_x, 1)

        ##### redefine train_model
        train_model_FLIP = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size],
                training_enabled: numpy.cast['int32'](1)
            })

        # train with augmentation data_set
        print('-----Training with augmented data (flip)-----')
        for minibatch_index in range(n_train_batches):
            cost_ij = train_model_FLIP(minibatch_index)
        print('-----Training over-----')

        # compute zero-one loss on validation set
        validation_losses = [validate_model(i) for i in range(n_valid_batches)]
        this_validation_loss = numpy.mean(validation_losses)

        if verbose:
            print(
                'epoch %i, train augmented data (flip), validation error %f %%'
                % (epoch, this_validation_loss * 100.))
        '''  
        ##### add noise
        train_set_x, train_set_y = datasets[0]
        ran = int(random.uniform(0,2))
        noise_injection(train_set_x, ran)
        
        ##### redefine train_model
        train_model_NOISE = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size: (index + 1) * batch_size],
                training_enabled: numpy.cast['int32'](1)
            }
        ) 
           
          
        # train with augmentation data_set
        print('-----Training with augmented data (noise)-----')
        for minibatch_index in range(n_train_batches):
            cost_ij = train_model_NOISE(minibatch_index)   
        print('-----Training over-----')
        
        # compute zero-one loss on validation set
        validation_losses = [validate_model(i) for i
                             in range(n_valid_batches)]
        this_validation_loss = numpy.mean(validation_losses)

        if verbose:
            print('epoch %i, train augmented data (noise), validation error %f %%' %
                (epoch,
                 this_validation_loss * 100.))
        '''

        ##### get original data
        train_set_x, train_set_y = datasets[0]

        ##### redefine train_model
        train_model_1 = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size],
                training_enabled: numpy.cast['int32'](1)
            })

        # train with original data_set
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter % 100 == 0) and verbose:
                print('training @ iter = ', iter)
            cost_ij = train_model_1(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                if verbose:
                    print('epoch %i, minibatch %i/%i, validation error %f %%' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    if verbose:
                        print(('     epoch %i, minibatch %i/%i, test error of '
                               'best model %f %%') %
                              (epoch, minibatch_index + 1, n_train_batches,
                               test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()

    # Retrieve the name of function who invokes train_nn() (caller's name)
    curframe = inspect.currentframe()
    calframe = inspect.getouterframes(curframe, 2)

    # Print out summary
    print('Optimization complete.')
    print('Best validation error of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print('Best validation accuracy: %f%%.' %
          ((1.0 - best_validation_loss) * 100.))
    print('Best test accuracy: %f%%.' % ((1.0 - test_score) * 100.))
    print(('The training process for function ' + calframe[1][3] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)),
          file=sys.stderr)
def test_lenet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512],
            batch_size=200, filter_size=5, dnn_layers=1, n_hidden=500, gabor=False, lmbda=None, verbose=False):
    """
    Wrapper function for testing LeNet on SVHN dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    """
    print test_lenet.__name__, nkerns, filter_size, gabor, lmbda

    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    if gabor is True:
        # Generate Gabor filters
        filters = build_gabor(filter_size, nkerns[0], lmbda)
        # filters = numpy.array([filters[i][0] for i in range(len(filters))])
        filters = numpy.array([filters[i] for i in range(len(filters))])
        # print filters.shape
        filter_weights = numpy.tile(filters, (1, 3, 1)).reshape(nkerns[0], 3, filter_size, filter_size)
        layer0 = LeNetConvPoolLayer(
            rng,
            input=layer0_input,
            image_shape=(batch_size, 3, 32, 32),
            filter_shape=(nkerns[0], 3, filter_size, filter_size),
            poolsize=(2,2),
            weights = filter_weights
        )
        print 'gabor filter weights are working'
    else:
        # TODO: Construct the first convolutional pooling layer
        layer0 = LeNetConvPoolLayer(
            rng,
            input=layer0_input,
            image_shape=(batch_size, 3, 32, 32),
            filter_shape=(nkerns[0], 3, filter_size, filter_size),
            poolsize=(2,2)
        )

    # TODO: Construct the second convolutional pooling layer
    i_s_1 = (32 - filter_size + 1) / 2

    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], i_s_1, i_s_1),
        filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size),
        poolsize=(2,2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    layer2_input = layer1.output.flatten(2)

    # TODO: construct a fully-connected sigmoidal layer
    i_s_2 = (i_s_1 - filter_size + 1) / 2

    if hasattr(n_hidden, '__iter__'):
        assert(len(n_hidden) == dnn_layers)
    else:
        n_hidden = (n_hidden,)*dnn_layers

    DNN_Layers = []
    for i in xrange(dnn_layers):
        h_input = layer2_input if i == 0 else DNN_Layers[i-1].output
        h_in = nkerns[1] * i_s_2 * i_s_2 if i == 0 else n_hidden[i-1]
        DNN_Layers.append(
            HiddenLayer(
                rng=rng,
                input=h_input,
                n_in=h_in,
                n_out=n_hidden[i],
                activation=T.tanh
        ))

    # layer2 = HiddenLayer(
    #     rng,
    #     input=layer2_input,
    #     n_in=nkerns[1] * i_s_2 * i_s_2,
    #     n_out=500,
    #     activation=T.tanh
    # )

    # TODO: classify the values of the fully-connected sigmoidal layer
    LR_Layer = LogisticRegression(
        input=DNN_Layers[-1].output,
        n_in=n_hidden[i],
        n_out=10
    )

    # the cost we minimize during training is the NLL of the model
    cost = LR_Layer.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        LR_Layer.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        LR_Layer.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # TODO: create a list of all model parameters to be fit by gradient descent
    params = LR_Layer.params
    for layer in DNN_Layers:
        params += layer.params
    if gabor is True:
        print 'gabor params is workings'
        params += layer1.params
    else:
        params += layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
示例#26
0
def test_adversarial_example(learning_rate=0.01,
                             L1_reg=0.00,
                             L2_reg=0.0001,
                             n_epochs=100,
                             batch_size=128,
                             n_hidden=500,
                             n_hiddenLayers=3,
                             verbose=False,
                             smaller_set=False):
    """
    Wrapper function for testing adversarial examples
    """
    # load the dataset; download the dataset if it is not present
    if smaller_set:
        datasets = load_data(ds_rate=5)
    else:
        datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # TODO: construct a neural network, either MLP or CNN.
    classifier = myMLP(rng=rng,
                       input=x,
                       n_in=32 * 32 * 3,
                       n_hidden=n_hidden,
                       n_out=10,
                       n_hiddenLayers=n_hiddenLayers)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model, n_train_batches,
             n_valid_batches, n_test_batches, n_epochs, verbose)

    filter_model = theano.function(
        inputs=[index],
        outputs=[
            x, classifier.logRegressionLayer.y_pred, y,
            classifier.logRegressionLayer.p_y_given_x
        ],
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    filter_output = [filter_model(i) for i in range(n_test_batches)]

    sample_x = None
    sample_y = None
    test_output = None
    expected_distribution = None
    for i in filter_output:
        if numpy.array_equal(i[1], i[2]):
            sample_x = i[0]
            sample_y = i[1]
            expected_distribution = i[3]
            print("successfully classified sample ", sample_y)
            t_sample_x, t_sample_y = shared_dataset((sample_x, sample_y))
            grad_input = classifier.input + 0.1 * T.sgn(
                T.grad(cost, classifier.input))
            grad_input_fn = theano.function(inputs=[],
                                            outputs=grad_input,
                                            givens={
                                                x: t_sample_x,
                                                y: t_sample_y
                                            })
            gradient = grad_input_fn()
            new_t_sample_x, t_sample_y = shared_dataset((gradient, sample_y))
            testing_gradient = theano.function(
                inputs=[],
                outputs=[
                    y, classifier.logRegressionLayer.y_pred,
                    classifier.logRegressionLayer.p_y_given_x
                ],
                givens={
                    x: new_t_sample_x,
                    y: t_sample_y
                })
            test_output = testing_gradient()
            if not numpy.array_equal(test_output[0], test_output[1]):
                break

    return test_output, expected_distribution
示例#27
0
def main():
    # Load the data
    train_set, train_tags, test_set = load_data('data/Data.pickle')

    # Range of the data
    print("max(train_set) = " + '{:.4f}'.format(np.max(train_set)))
    print("min(train_set) = " + '{:.4f}'.format(np.min(train_set)))
    print("mean(train_set) = " + '{:.4f}'.format(np.mean(train_set)))
    print("var(train_set) = " + '{:.4f}'.format(np.var(train_set)))
    print("")
    print("max(test_set) = " + '{:.4f}'.format(np.max(test_set)))
    print("min(test_set) = " + '{:.4f}'.format(np.min(test_set)))
    print("mean(test_set) = " + '{:.4f}'.format(np.mean(test_set)))
    print("var(test_set) = " + '{:.4f}'.format(np.var(test_set)))

    plt.figure(figsize=(10, 6))
    plt.subplot(211)
    plt.title('Dynamic range vs feature (Train set)')
    plt.plot(range(train_set.shape[1]), np.min(train_set, axis=0), 'r')
    plt.plot(range(train_set.shape[1]), np.max(train_set, axis=0), 'g')
    plt.plot(range(train_set.shape[1]), np.mean(train_set, axis=0), 'b')
    plt.plot(range(train_set.shape[1]),
             np.mean(train_set, axis=0) + 2 * np.var(train_set, axis=0),
             linestyle=':',
             color='k')
    plt.plot(range(train_set.shape[1]),
             np.mean(train_set, axis=0) - 2 * np.var(train_set, axis=0),
             linestyle=':',
             color='k')
    plt.legend(['max', 'min', 'avg', '+2 Sigma', '-2 Sigma'])
    plt.grid()
    plt.subplot(212)
    plt.title('Dynamic range vs feature (Test set)')
    plt.plot(range(test_set.shape[1]), np.min(test_set, axis=0), 'r')
    plt.plot(range(test_set.shape[1]), np.max(test_set, axis=0), 'g')
    plt.plot(range(test_set.shape[1]), np.mean(test_set, axis=0), 'b')
    plt.plot(range(test_set.shape[1]),
             np.mean(test_set, axis=0) + 2 * np.var(test_set, axis=0),
             linestyle=':',
             color='k')
    plt.plot(range(test_set.shape[1]),
             np.mean(test_set, axis=0) - 2 * np.var(test_set, axis=0),
             linestyle=':',
             color='k')
    plt.legend(['max', 'min', 'avg', '+2 Sigma', '-2 Sigma'])
    plt.grid()
    plt.show()

    # Divide the examples to true/false
    good = train_set[train_tags]
    bad = np.asarray(
        [exmp for exmp in train_set if exmp not in train_set[train_tags]])

    plt.figure(figsize=(10, 6))
    plt.subplot(211)
    plt.title('Dynamic range vs feature (True examples)')
    plt.plot(range(good.shape[1]), np.min(good, axis=0), 'r')
    plt.plot(range(good.shape[1]), np.max(good, axis=0), 'g')
    plt.plot(range(good.shape[1]), np.mean(good, axis=0), 'b')
    plt.plot(range(good.shape[1]),
             np.mean(good, axis=0) + 2 * np.var(good, axis=0),
             linestyle=':',
             color='k')
    plt.plot(range(good.shape[1]),
             np.mean(good, axis=0) - 2 * np.var(good, axis=0),
             linestyle=':',
             color='k')
    plt.legend(['max', 'min', 'avg', '+2 Sigma', '-2 Sigma'])
    plt.grid()
    plt.subplot(212)
    plt.title('Dynamic range vs feature (False examples)')
    plt.plot(range(bad.shape[1]), np.min(bad, axis=0), 'r')
    plt.plot(range(bad.shape[1]), np.max(bad, axis=0), 'g')
    plt.plot(range(bad.shape[1]), np.mean(bad, axis=0), 'b')
    plt.plot(range(bad.shape[1]),
             np.mean(bad, axis=0) + 2 * np.var(bad, axis=0),
             linestyle=':',
             color='k')
    plt.plot(range(bad.shape[1]),
             np.mean(bad, axis=0) - 2 * np.var(bad, axis=0),
             linestyle=':',
             color='k')
    plt.legend(['max', 'min', 'avg', '+2 Sigma', '-2 Sigma'])
    plt.grid()
    plt.show()
def test_filter(learning_rate=0.1, n_epochs=1000, nkerns=[3, 512],
            batch_size=200, verbose=True):
    """
    Wrapper function for testing LeNet on SVHN dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # TODO: Construct the first convolutional pooling layer
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        # (batch size, num input feature maps,image height, image width)
        image_shape=(batch_size,3,32,32),
        # number of filters, num input feature maps,filter height, filter width)
        filter_shape=(nkerns[0],3,5,5),
        poolsize=(2,2)
    )

    # TODO: Construct the second convolutional pooling layer
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        # (32-5+1)/2
        image_shape=(batch_size,nkerns[0],14,14),
        filter_shape=(nkerns[1],nkerns[0],5,5),
        poolsize=(2,2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    layer2_input = layer1.output.flatten(2)

    # TODO: construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        # (14-5+1)/2
        n_in=nkerns[1] * 5 * 5,
        n_out=500,
        activation=T.nnet.sigmoid
    )

    # TODO: classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(
         input=layer2.output,
         n_in=500,
         n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # TODO: create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
    
    mean_w_0 = layer0.W.get_value().mean()

    plt.figure()
    for knkerns0 in range(nkerns[0]):
        for kch in range(3):
            plt.subplot(3,3,knkerns0*3+kch+1)
            plt.imshow(layer0.W.get_value()[knkerns0,kch,:,:])
    plt.title('trained filter')
    
    
    ###########################################################################
    ###########################################################################
    ###########################################################################
    
    filter_shape_input = (nkerns[0],3,5,5)

    pt_input = numpy.zeros((filter_shape_input[2],filter_shape_input[3]))
    pt_input[(filter_shape_input[2]-1)/2,(filter_shape_input[3]-1)/2]=1.0
    
    W = numpy.zeros(filter_shape_input)
    
    from scipy.ndimage.filters import gaussian_filter as gf    
    
    for knkerns0 in range(nkerns[0]):
        for kch in range(3):
            W[knkerns0,kch,:,:]=gf(pt_input,(knkerns0+1.0))
            W[knkerns0,kch,:,:] = W[knkerns0,kch,:,:]/W[knkerns0,kch,:,:].mean()*mean_w_0
    
    W = theano.shared(W,borrow=True)
    # TODO: Construct the first convolutional pooling layer
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        # (batch size, num input feature maps,image height, image width)
        image_shape=(batch_size,3,32,32),
        # number of filters, num input feature maps,filter height, filter width)
        filter_shape=filter_shape_input,
        poolsize=(2,2)
    )
    layer0.W = W

    # TODO: Construct the second convolutional pooling layer
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        # (32-5+1)/2
        image_shape=(batch_size,nkerns[0],14,14),
        filter_shape=(nkerns[1],nkerns[0],5,5),
        poolsize=(2,2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    layer2_input = layer1.output.flatten(2)

    # TODO: construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        # (14-5+1)/2
        n_in=nkerns[1] * 5 * 5,
        n_out=500,
        activation=T.nnet.sigmoid
    )

    # TODO: classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(
         input=layer2.output,
         n_in=500,
         n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # TODO: create a list of all model parameters to be fit by gradient descent
    # the param of layer0 is excluded
    params = layer3.params + layer2.params + layer1.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)

    plt.figure()
    for knkerns0 in range(nkerns[0]):
        for kch in range(3):
            plt.subplot(3,3,knkerns0*3+kch+1)
            plt.imshow(layer0.W.get_value()[knkerns0,kch,:,:])
    plt.title('pre-defined filter')
示例#29
0
def my_cnn(batch_size, n_epochs, learning_rate=0.01, patience=12000):

    # load data
    ds_rate = None
    datasets = load_data(ds_rate=ds_rate, theano_shared=True)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    rng = np.random.RandomState(23455)

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    layerX_input = x.reshape((batch_size, 3, 32, 32))

    layerX = DropLayer(input=layerX_input)

    layer0 = LeNetConvPoolLayer(
        rng,
        input=layerX.output,
        image_shape=(batch_size, 3, 32, 32),
        filter_shape=(
            64, 3, 3, 3
        ),  # (number of output feature maps, number of input feature maps, height, width)
        poolsize=(1, 1))
    # 4D output tensor is thus of shape (batch_size, 64, 32, 32)

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, 64, 32, 32),
                                filter_shape=(64, 64, 3, 3),
                                poolsize=(2, 2))
    # 4D output tensor is thus of shape (batch_size, 64, 16, 16)

    layer2 = LeNetConvPoolLayer(rng,
                                input=layer1.output,
                                image_shape=(batch_size, 64, 16, 16),
                                filter_shape=(128, 64, 3, 3),
                                poolsize=(1, 1))
    # 4D output tensor is thus of shape (batch_size, 128, 16, 16)

    layer3 = LeNetConvPoolLayer(rng,
                                input=layer2.output,
                                image_shape=(batch_size, 128, 16, 16),
                                filter_shape=(128, 128, 3, 3),
                                poolsize=(2, 2))
    # 4D output tensor is thus of shape (batch_size, 128, 8, 8)

    layer4 = LeNetConvPoolLayer(rng,
                                input=layer3.output,
                                image_shape=(batch_size, 128, 8, 8),
                                filter_shape=(256, 128, 3, 3),
                                poolsize=(1, 1))
    # 4D output tensor is thus of shape (batch_size, 256, 8, 8)

    layer5 = UpSampleLayer(input=layer4.output)
    # 4D output tensor is thus of shape (batch_size, 256, 16, 16)

    layer6 = LeNetConvPoolLayer(rng,
                                input=layer5.output,
                                image_shape=(batch_size, 256, 16, 16),
                                filter_shape=(128, 256, 3, 3),
                                poolsize=(1, 1))
    # 4D output tensor is thus of shape (batch_size, 128, 16, 16)

    layer7 = LeNetConvPoolLayer(rng,
                                input=layer6.output,
                                image_shape=(batch_size, 128, 16, 16),
                                filter_shape=(128, 128, 3, 3),
                                poolsize=(1, 1))
    # 4D output tensor is thus of shape (batch_size, 128, 16, 16)

    layer8 = UpSampleLayer(input=layer7.output + layer3.output_x)
    # 4D output tensor is thus of shape (batch_size, 128, 32, 32)

    layer9 = LeNetConvPoolLayer(rng,
                                input=layer8.output,
                                image_shape=(batch_size, 128, 32, 32),
                                filter_shape=(64, 128, 3, 3),
                                poolsize=(1, 1))
    # 4D output tensor is thus of shape (batch_size, 64, 32, 32)

    layer10 = LeNetConvPoolLayer(rng,
                                 input=layer9.output,
                                 image_shape=(batch_size, 64, 32, 32),
                                 filter_shape=(64, 64, 3, 3),
                                 poolsize=(1, 1))
    # 4D output tensor is thus of shape (batch_size, 64, 32, 32)

    layer11 = LeNetConvPoolLayer(rng,
                                 input=layer10.output + layer1.output_x,
                                 image_shape=(batch_size, 64, 32, 32),
                                 filter_shape=(3, 64, 3, 3),
                                 poolsize=(1, 1))
    # 4D output tensor is thus of shape (batch_size, 3, 32, 32)

    cost = layer11.ob_func(layerX_input)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [], [layerX_input, layerX.output, layer11.output, cost],
        givens={x: test_set_x[0:100]})

    validate_model = theano.function(
        [index],
        cost,
        givens={x: valid_set_x[index * batch_size:(index + 1) * batch_size]})

    # create a list of all model parameters to be fit by gradient descent
    params = layer11.params + layer10.params + layer9.params + layer7.params + layer6.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    print('... training the model')

    # early-stopping parameters
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.85  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_cost = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False
    verbose = True

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter % 100 == 0) and verbose:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_cost = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_cost = numpy.mean(validation_cost)

                if verbose:
                    print('epoch %i, minibatch %i/%i, validation cost %f' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_cost))

                # if we got the best validation score until now
                if this_validation_cost < best_validation_cost:

                    # save best validation score and iteration number
                    best_validation_cost = this_validation_cost
                    best_iter = iter

            if patience <= iter:
                done_looping = True
                break

    TEST_MODEL_RESULT = test_model()
    GT_Images_T = TEST_MODEL_RESULT[0]
    Drop_Images_T = TEST_MODEL_RESULT[1]
    Reconstructed_Images_T = TEST_MODEL_RESULT[2]
    cost_list = TEST_MODEL_RESULT[3]

    # plot 8*3 images
    print("Ground Truth, Corrupted Images, and Recontructed Images:")
    f, axarr = plt.subplots(8, 3, figsize=(20, 20))
    for i in range(8):
        plt.axes(axarr[i, 0])
        plt.imshow(np.transpose(GT_Images_T[i], (1, 2, 0)))

        plt.axes(axarr[i, 1])
        plt.imshow(np.transpose(Drop_Images_T[i], (1, 2, 0)))

        plt.axes(axarr[i, 2])
        plt.imshow(np.transpose(Reconstructed_Images_T[i], (1, 2, 0)))

    end_time = timeit.default_timer()

    # Retrieve the name of function who invokes train_nn() (caller's name)
    curframe = inspect.currentframe()
    calframe = inspect.getouterframes(curframe, 2)

    # Print out summary
    print('Optimization complete.')
    print('Best validation cost %f obtained at iteration %i, ' %
          (best_validation_cost, best_iter + 1))
    print(('The training process for function ' + calframe[1][3] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)),
          file=sys.stderr)
def test_gaussian(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512],
            batch_size=200, verbose=False):
    """
    Wrapper function for testing LeNet on SVHN dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # TODO: Construct the first convolutional pooling layer
    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (32-5+1 , 32-5+1) = (28, 28)
    # maxpooling reduces this further to (28/2, 28/2) = (14, 14)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 14, 14)

    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 3, 32, 32),
        filter_shape=(nkerns[0], 3, 5, 5),
        poolsize=(2, 2)
    )


    # TODO: Construct the second convolutional pooling layer
    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (14-5+1, 14-5+1) = (10, 10)
    # maxpooling reduces this further to (10/2, 10/2) = (5, 5)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 14, 14),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    layer2_input = layer1.output.flatten(2)

    # TODO: construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 5 * 5,
        n_out=500,
        activation=T.tanh
    )

    # TODO: classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(
         input=layer2.output,
         n_in=500,
    n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # TODO: create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    layer0.W = [make_Gaussian(size = 5), make_Gaussian(size = 5), make_Gaussian(size = 5)]
    layer0.b = numpy.zeros((nkerns[0],), dtype=theano.config.floatX)

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
示例#31
0
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=1000,
             batch_size=20,
             n_hidden=500,
             verbose=True,
             fileName='predictionsMLP'):
    """
    Wrapper function for testing LeNet on SVHN dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels
    learning_rate = theano.shared(learning_rate)
    testing = T.lscalar('testing')
    testValue = testing
    getTestValue = theano.function([testing], testValue)

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))
    layer0_input = layer0_input.flatten(2)
    # TODO: Construct the first convolutional pooling layer
    layer0 = HiddenLayer(rng,
                         input=layer0_input,
                         n_in=32 * 32 * 3,
                         n_out=n_hidden,
                         activation=T.tanh)

    layer1 = HiddenLayer(rng,
                         input=layer0.output,
                         n_in=n_hidden,
                         n_out=n_hidden,
                         activation=T.tanh)
    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).

    # TODO: construct a fully-connected sigmoidal layer
    layer2 = DropConnect(rng,
                         input=layer1.output,
                         n_in=n_hidden,
                         n_out=batch_size,
                         testing=testing)

    # TODO: classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    print("Model building complete")

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            testing: getTestValue(1)
        },
        on_unused_input='ignore')

    getPredictedValue = theano.function(
        [index],
        layer3.predictedValue(),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            testing: getTestValue(1)
        },
        on_unused_input='ignore')

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            testing: getTestValue(1)
        },
        on_unused_input='ignore')

    # TODO: create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params
    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)
    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.

    #updates = [
    #    (param_i, param_i - learning_rate * layer2.maskW.get_value() * grad_i) if (param_i.name == 'WDrop') else (param_i, param_i - learning_rate * layer2.maskb.get_value() * grad_i) if(param_i.name == 'bDrop') else (param_i, param_i - learning_rate * grad_i)
    #    for param_i, grad_i in zip(params, grads)
    #]

    updates = []
    momentum = 0.9
    for param in params:
        param_update = theano.shared(param.get_value() * 0.,
                                     broadcastable=param.broadcastable)
        if (param.name == 'WDrop'):
            updates.append((param, param - learning_rate.get_value().item() *
                            layer2.maskW.get_value() * param_update))
        elif (param.name == 'bDrop'):
            updates.append((param, param - learning_rate.get_value().item() *
                            layer2.maskb.get_value() * param_update))
        else:
            updates.append(
                (param,
                 param - learning_rate.get_value().item() * param_update))
        updates.append(
            (param_update,
             momentum * param_update + (1. - momentum) * T.grad(cost, param)))
    '''
    updates = [
        (param_i, param_i - learning_rate * grad_i) if ((param_i.name == 'WDrop') or (param_i.name == 'bDrop')) else (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]
    '''
    print("Commpiling the train model function")

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            testing: getTestValue(0)
        },
        on_unused_input='ignore',
        allow_input_downcast=True)
    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    predictions = train_nn(train_model, validate_model, test_model,
                           getPredictedValue, n_train_batches, n_valid_batches,
                           n_test_batches, n_epochs, learning_rate, verbose)

    f = open(fileName, 'wb')
    cPickle.dump(predictions, f, protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()
def test_convnet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512, 20],filter_shape=[9,5],
        batch_size=200, verbose=True):
    """
    Wrapper function for testing Multi-Stage ConvNet on SVHN dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # TODO: Construct the first convolutional pooling layer:
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        # (batch size, num input feature maps,image height, image width)
        image_shape=(batch_size,3,32,32),
        # number of filters, num input feature maps,filter height, filter width)
        filter_shape=(nkerns[0],3,filter_shape[0],filter_shape[0]),
        poolsize=(2,2)
    )

    # TODO: Construct the second convolutional pooling layer
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        # (32-9+1)/2 = 12 
        image_shape=(batch_size,nkerns[0],(33-filter_shape[0])/2,(33-filter_shape[0])/2),
        filter_shape=(nkerns[1],nkerns[0],filter_shape[1],filter_shape[1]),
        poolsize=(2,2)
    )

    # Combine Layer 0 output and Layer 1 output
    # TODO: downsample the first layer output to match the size of the second
    # layer output.
    layer0_output_ds = downsample.max_pool_2d(
            # nkerns[0] 12 x 12
            # nkerns[1] 4 x 4
            input=layer0.output,
            ds=(3,3), # TDOD: change ds
            ignore_border=False
    )
    # concatenate layer
    layer2_input = T.concatenate([layer1.output, layer0_output_ds], axis=1)

    filter_shape_2 = ((33-filter_shape[0])/2 - filter_shape[1]+1)/2
    # TODO: Construct the third convolutional pooling layer
    layer2 = LeNetConvPoolLayer(
        rng,
        input=layer2_input,
        # (12-5+1)/2 = 4        
        image_shape=(batch_size,nkerns[1]+nkerns[0],filter_shape_2,filter_shape_2), #TODO
        filter_shape=(nkerns[2],nkerns[1]+nkerns[0],filter_shape_2,filter_shape_2), #TODO
        poolsize= (1,1)#TODO
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1).
    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer3 = HiddenLayer(
        rng,
        input=layer3_input,
        n_in=nkerns[2] * 1 * 1,
        n_out= 10,#TODO,
        activation=T.nnet.sigmoid
    )

    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer3.output,
        n_in= 10,#TODO
        n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer4.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # TODO: create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
示例#33
0
import hw3_utils
import our_utils
import random
#from classifier import *
import pickle
from sklearn.decomposition import PCA
import sklearn.preprocessing

data, labels, test = hw3_utils.load_data()

#Shuffle
combined = list(zip(data, labels))
random.shuffle(combined)
data[:], labels[:] = zip(*combined)

#Scale
scaler = sklearn.preprocessing.StandardScaler().fit(data)
data = scaler.transform(data)
test = scaler.transform(test)

# Use PCA
pca=PCA(n_components=5)
pca.fit(data)
data = pca.fit_transform(data)
test = pca.fit_transform(test)

new_dataset = (data, labels, test)

with open("Shuffled_scaled_PCA_data.data","wb") as f:
    pickle.dump(new_dataset, f)
示例#34
0
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             batch_size=20, n_hidden=500, 
             verbose=True, fileName='predictionsMLP'):
    """
    Wrapper function for testing LeNet on SVHN dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels
    learning_rate = theano.shared(learning_rate)
    testing = T.lscalar('testing')
    testValue = testing
    getTestValue = theano.function([testing],testValue) 
    
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))
    layer0_input = layer0_input.flatten(2)
    # TODO: Construct the first convolutional pooling layer
    layer0 = HiddenLayer(
        rng,
        input=layer0_input,
        n_in=32*32*3,
        n_out=n_hidden,
        activation=T.tanh
    )
    
    layer1 = HiddenLayer(
        rng,
        input=layer0.output,
        n_in=n_hidden,
        n_out=n_hidden,
        activation=T.tanh
    )
    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    
    
    # TODO: construct a fully-connected sigmoidal layer
    layer2 = DropConnect(
        rng,
        input=layer1.output,
        n_in=n_hidden,
        n_out=batch_size,
        testing=testing
    )
    
    # TODO: classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(
         input=layer2.output,
         n_in=batch_size,
         n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    print("Model building complete")

   
    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size],
            testing: getTestValue(1)
        },
        on_unused_input='ignore'
    )
    
    getPredictedValue = theano.function(        
        [index],
        layer3.predictedValue(),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size],
            testing: getTestValue(1)
        },
        on_unused_input='ignore'
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size],
            testing: getTestValue(1)
        },
        on_unused_input='ignore'
    )

    # TODO: create a list of all model parameters to be fit by gradient descent
    params = layer3.params  + layer2.params + layer1.params + layer0.params
    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)
    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    
    #updates = [
    #    (param_i, param_i - learning_rate * layer2.maskW.get_value() * grad_i) if (param_i.name == 'WDrop') else (param_i, param_i - learning_rate * layer2.maskb.get_value() * grad_i) if(param_i.name == 'bDrop') else (param_i, param_i - learning_rate * grad_i)
    #    for param_i, grad_i in zip(params, grads)
    #]
    
    updates = []
    momentum = 0.9
    for param in params:
        param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
        if (param.name == 'WDrop'):
            updates.append((param,param - learning_rate.get_value().item() * layer2.maskW.get_value() * param_update))
        elif(param.name == 'bDrop'):
            updates.append((param,param - learning_rate.get_value().item() * layer2.maskb.get_value() * param_update))
        else:
            updates.append((param,param - learning_rate.get_value().item() * param_update))
        updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param)))
    '''
    updates = [
        (param_i, param_i - learning_rate * grad_i) if ((param_i.name == 'WDrop') or (param_i.name == 'bDrop')) else (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]
    '''
    print("Commpiling the train model function")

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size],
            testing : getTestValue(0)
        },
        on_unused_input='ignore',
        allow_input_downcast=True
    )
    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    predictions = train_nn(train_model, validate_model, test_model, getPredictedValue,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, learning_rate, verbose)

    f = open(fileName, 'wb')
    cPickle.dump(predictions, f, protocol=cPickle.HIGHEST_PROTOCOL)
    f.close()
示例#35
0
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=100,
             batch_size=128,
             n_hidden=500,
             n_hiddenLayers=3,
             verbose=False,
             smaller_set=True):
    """
    Wrapper function for training and testing MLP

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    """

    # load the dataset; download the dataset if it is not present
    if smaller_set:
        datasets = load_data(ds_rate=5)
    else:
        datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # TODO: construct a neural network, either MLP or CNN.
    classifier = myMLP(rng=rng,
                       input=x,
                       n_in=32 * 32 * 3,
                       n_hidden=n_hidden,
                       n_out=10,
                       n_hiddenLayers=n_hiddenLayers)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    return train_nn(train_model, validate_model, test_model, n_train_batches,
                    n_valid_batches, n_test_batches, n_epochs, verbose)
def test_adversarial_example(learning_rate=0.1, L1_reg=0.00, L2_reg=0.0001, n_epochs=100,
             batch_size=128, n_hidden=500, n_hiddenLayers=3,
             verbose=True, smaller_set=True):
    """
    Wrapper function for testing adversarial examples
    """
    # load the dataset; download the dataset if it is not present
    if smaller_set:
        datasets = load_data(ds_rate=5)
    else:
        datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

#    test_set_x = test_set_x[0:1]
#    test_set_y = test_set_y[0:1]
    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    # TODO: construct a neural network, either MLP or CNN.
    classifier = myMLP(
        rng=rng,
        input=x,
        n_in=32*32*3,
        n_hidden=n_hidden,
        n_hiddenLayers=n_hiddenLayers,
        n_out=10
    )


    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )
    
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    test_model_single = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index:index+1],
            y: test_set_y[index:index+1]
        }
    )


    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    
    gx = T.grad(cost, x)

    train_nn(train_model, validate_model, test_model,
       n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
    
    f = theano.function(
        inputs=[index],
        outputs=gx,
        givens={
            x: test_set_x[index : (index + 1)],
            y: test_set_y[index : (index + 1)]
        }
    )
    ind_oi = 3

    from matplotlib import pyplot as plt
    plt.figure()
    plt.imshow(test_set_x.get_value()[ind_oi,:].reshape(3,32,32).transpose((1,2,0)))

    h = theano.function(
        inputs=[index],
        outputs=classifier.logRegressionLayer.y_pred,
        givens={
            x: test_set_x[index : (index + 1)]
        }
    )

    print('predicted number original: %i' % h(ind_oi))	
    
    Y = T.matrix()
    X_update = (test_set_x, T.inc_subtensor(test_set_x[ind_oi:(ind_oi+1)], Y))
    g = theano.function([Y], updates=[X_update])
    g(0.01*numpy.sign(f(ind_oi)))

    print('predicted number adverserial: %i' % h(ind_oi))
    
    plt.figure()
    plt.imshow(test_set_x.get_value()[ind_oi,:].reshape(3,32,32).transpose((1,2,0)))
示例#37
0
import matplotlib.pyplot as plt
import hw3_utils as utils
import part_c_classifiers
from classifier import id3_factory, perceptron_factory
from classifier import split_crosscheck_groups, knn_factory, evaluate
from sklearn.feature_selection import SelectKBest, f_classif

# question 3.2

patients, labels, test = utils.load_data()
split_crosscheck_groups([patients, labels], 2)

# question 5.1

k_list = [1, 3, 5, 7, 13]
accuracy_list = []

file_name = 'experiments6.csv'
with open(file_name, 'wb') as file:
    for k in k_list:
        knn_f = knn_factory(k)
        accuracy, error = evaluate(knn_f, 2)
        line = str(k) + "," + str(accuracy) + "," + str(error) + "\n"
        accuracy_list.append(accuracy)
        file.write(line.encode())

# question 5.2

plt.plot(k_list, accuracy_list)
plt.xlabel('K value')
plt.ylabel('Average accuracy')
示例#38
0
import hw3_utils as utils
import classifier

# TEST for question 1
list1 = [1, 2, 3, 4, 5, 6, 7]
list2 = [7, 6, 5, 4, 3, 2, 1]

print(classifier.euclidean_distance(list1, list2))

# TEST for question 3.2

data = utils.load_data()
classifier.split_crosscheck_groups(data, 2)
print(classifier.load_k_fold_data(1)[1][0])
def test_para_num(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512],L1_reg=0.00, L2_reg=0.0001,
             batch_size=128, n_hiddenLayers=2,verbose=True):
    """
    Wrapper function for testing Multi-Stage ConvNet on SVHN dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')
    
    ###########################################################################
    ################################## CNN ####################################
    ###########################################################################
    
    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # TODO: Construct the first convolutional pooling layer
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        # (batch size, num input feature maps,image height, image width)
        image_shape=(batch_size,3,32,32),
        # number of filters, num input feature maps,filter height, filter width)
        filter_shape=(nkerns[0],3,5,5),
        poolsize=(2,2)
    )

    # TODO: Construct the second convolutional pooling layer
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        # (32-5+1)/2
        image_shape=(batch_size,nkerns[0],14,14),
        filter_shape=(nkerns[1],nkerns[0],5,5),
        poolsize=(2,2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    layer2_input = layer1.output.flatten(2)

    # TODO: construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        # (14-5+1)/2
        n_in=nkerns[1] * 5 * 5,
        n_out=500,
        activation=T.nnet.sigmoid
    )

    # TODO: classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(
         input=layer2.output,
         n_in=500,
         n_out=10)
    
    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # TODO: create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
        
    ###########################################################################
    ################################## MLP ####################################
    ###########################################################################
    
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    n_hidden = [0,0];
    n_hidden[0]=nkerns[0]*14*14
    n_hidden[1]=nkerns[1]*5*5
    # TODO: construct a neural network, either MLP or CNN.
    classifier = myMLP(
        rng=rng,
        input=x,
        n_in=32*32*3,
        n_hidden=n_hidden,
        n_hiddenLayers=n_hiddenLayers,
        n_out=10
    )

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
示例#40
0
def main():
    # Variables used for debug
    skip_knn = True
    skip_tree = True
    skip_perc = True

    train_features, train_labels, test_features = load_data('data/Data.pickle')

    # Split once the dataset to two folds.
    folds = 2
    #split_crosscheck_groups(train_features, train_labels, folds)

    if skip_knn != True:
        # Evaluating KNN with different k value:
        k_list = [1, 3, 5, 7, 13]
        acc_list = []
        err_list = []
        with open('experiments6.csv', mode='w', newline='') as csv_file:
            exp_writer = csv.writer(csv_file)
            for k in k_list:
                knn_fac = knn_factory(k)
                err, acc = evaluate(knn_fac, folds)
                print("k=", k, " acc=", acc, " err=", err)
                exp_writer.writerow([k, acc, err])
                acc_list.append(acc)
                err_list.append(err)

        # Plot KNN Results
        plt.subplot(2, 1, 1)
        plt.plot(k_list, acc_list, '--', color='g')
        plt.plot(k_list, acc_list, 'bo')
        plt.ylabel("Accuracy")
        plt.xlabel("k")
        plt.xticks(k_list)
        plt.subplot(2, 1, 2)
        plt.plot(k_list, err_list, '--', color='r')
        plt.plot(k_list, err_list, 'bo')
        plt.ylabel("Error")
        plt.xlabel("k")
        plt.xticks(k_list)
        plt.tight_layout()
        plt.show()

    # Perform classification for Perceptron and Tree and write to files.
    with open('experiments12.csv', mode='w', newline='') as csv_file:
        exp_writer = csv.writer(csv_file)
        if skip_tree != True:
            # Decision Tree experiment
            myTree = tree.DecisionTreeClassifier(criterion="entropy")
            err, acc = evaluate(myTree, folds)
            print("tree acc=", acc, " tree err=", err)
            exp_writer.writerow([1, acc, err])

        if skip_perc != True:
            # Perceptron experiment
            myPerc = Perceptron(tol=1e-3, random_state=0)
            err, acc = evaluate(myPerc, folds)
            print("perceptron acc=", acc, " perceptron err=", err)
            exp_writer.writerow([2, acc, err])

    # Competition: Classify test_features
    print("Triple model")
    my_model = triple_model()
    my_model.fit(train_features, train_labels)
    res = my_model.final_predict(preprocessing.scale(test_features))
    write_prediction(res)
示例#41
0
def MY_CNN(learning_rate=0.1, n_epochs=0, batch_size=100):
    
    rng = numpy.random.RandomState(23455)
    
    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    test_im = test_set_x.get_value(borrow=True)

    #train_set_x_drop = drop(train_set_x, p=0.7)
    #valid_set_x_drop = drop(valid_set_x, p=0.7)
    #test_set_x_drop = drop(test_set_x, p=0.7)
    
    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.matrix('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')
    
    Input = x.reshape((batch_size, 3, 32, 32))
    
    ConvLayer1_input = drop(Input, p=0.7)

    ConvLayer1 = ConvLayer(
        rng, 
        input=ConvLayer1_input, 
        filter_shape=(64, 3, 3, 3), 
        image_shape=(batch_size, 3, 32, 32),
        padding='half'
    )
    
    ConvLayer2 = ConvLayer(
        rng, 
        input=ConvLayer1.output, 
        filter_shape=(64, 64, 3, 3), 
        image_shape=(batch_size, 64, 32, 32),
        padding='half'
    )
    
    MaxPoolLayer1 = MaxPooling(
        input=ConvLayer2.output, 
        poolsize=(2, 2), 
        ignore_border=False
    )
    
    ConvLayer3 = ConvLayer(
        rng, 
        input=MaxPoolLayer1.output, 
        filter_shape=(128, 64, 3, 3), 
        image_shape=(batch_size, 64, 16, 16),
        padding='half'
    )
    
    ConvLayer4 = ConvLayer(
        rng, 
        input=ConvLayer3.output, 
        filter_shape=(128, 128, 3, 3), 
        image_shape=(batch_size, 128, 16, 16),
        padding='half'
    )
    
    MaxPoolLayer2 = MaxPooling(
        input=ConvLayer4.output, 
        poolsize=(2, 2), 
        ignore_border=False
    )

    ConvLayer5 = ConvLayer(
        rng, 
        input=MaxPoolLayer2.output, 
        filter_shape=(256, 128, 3, 3), 
        image_shape=(batch_size, 128, 8, 8),
        padding='half'
    )
    
    UpPoolLayer2 = Unpooling2D(
        input=ConvLayer5.output, 
        poolsize=(2, 2)
    )
    
    DeconvLayer5 = ConvLayer(
        rng, 
        input=UpPoolLayer2.output, 
        filter_shape=(128, 256, 3, 3), 
        image_shape=(batch_size, 256, 16, 16),
        padding='half'
    )
    
    DeconvLayer4 = ConvLayer(
        rng, 
        input=DeconvLayer5.output, 
        filter_shape=(128, 128, 3, 3), 
        image_shape=(batch_size, 128, 16, 16),
        padding='half'
    )
    
    # ADD INPUTS
    UpPoolLayer1_input = ConvLayer4.output + DeconvLayer4.output
    
    UpPoolLayer1 = Unpooling2D(
        input=UpPoolLayer1_input, 
        poolsize=(2, 2)
    )
    
    DeconvLayer3 = ConvLayer(
        rng, 
        input=UpPoolLayer1.output, 
        filter_shape=(64, 128, 3, 3), 
        image_shape=(batch_size, 128, 32, 32),
        padding='half'
    )
    
    DeconvLayer2 = ConvLayer(
        rng, 
        input=DeconvLayer3.output, 
        filter_shape=(64, 64, 3, 3), 
        image_shape=(batch_size, 64, 32, 32),
        padding='half'
    )
    
    # ADD INPUTS
    DeconvLayer1_input = ConvLayer2.output + DeconvLayer2.output
    
    DeconvLayer1 = ConvLayer(
        rng, 
        input=DeconvLayer1_input, 
        filter_shape=(3, 64, 3, 3), 
        image_shape=(batch_size, 64, 32, 32),
        padding='half'
    )
    
    Output = DeconvLayer1.output
    
    # create a list of all model parameters to be fit by gradient descent
    params = (
        ConvLayer1.params 
        + ConvLayer2.params  
        + ConvLayer3.params
        + ConvLayer4.params
        + ConvLayer5.params
        + DeconvLayer1.params
        + DeconvLayer2.params
        + DeconvLayer3.params
        + DeconvLayer4.params
        + DeconvLayer5.params
    )
    
    #cost = T.mean((Output - y) ** 2)
    cost = T.mean(T.sqr(Output - Input) )

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        Output,
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size]#,
            #y: test_set_x[index * batch_size: (index + 1) * batch_size]       
        }
    )

    validate_model = theano.function(
        [index],
        cost,
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size]#,
            #y: valid_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )
    
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    #gparams = [T.grad(cost, param) for param in params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs
    momentum =theano.shared(numpy.cast[theano.config.floatX](0.5), name='momentum')
    updates = []
    for param in  params:
        param_update = theano.shared(param.get_value()*numpy.cast[theano.config.floatX](0.))    
        updates.append((param, param - learning_rate*param_update))
        updates.append(
            (param_update, 
             momentum*param_update + (numpy.cast[theano.config.floatX](1.) - momentum)*T.grad(cost, param))
        )
            
    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size]#,
            #y: train_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    train_nn_restore(
        train_model, 
        validate_model, 
        test_model,
        n_train_batches, 
        n_valid_batches,
        n_test_batches, 
        n_epochs,
        verbose = True
    )
    
    
    plt.figure(figsize=(16,6))
    # drop_input = T.dtensor4('drop_input')
    imdrop = theano.function([x], drop(x, p=0.7))
    drop_image = imdrop(test_im[0:8])
    restored = test_model(0)[0:8,:,:,:]
    # print restored.shape
    # print test_im[0]
    for i in range(8):
        plt.subplot(3,8,i+1)
        img_original = (np.reshape(test_im[i],(3,32,32))).transpose(1,2,0)
        plt.imshow(img_original)
        plt.xticks([])
        plt.yticks([])
        plt.xlabel('Original Image')

        plt.subplot(3,8,i+9)
        img_drop = (np.reshape(drop_image[i],(3,32,32))).transpose(1,2,0)
        plt.imshow(img_drop)
        plt.xticks([])
        plt.yticks([])
        plt.xlabel('Corrupted Image')

        plt.subplot(3,8,i+17)
        img_restored = (restored[i,:,:,:]).transpose(1,2,0)
        plt.imshow(img_restored)
        plt.xticks([])
        plt.yticks([])
        plt.xlabel('Restored Image')
示例#42
0
def test_lenet(learning_rate=0.1,
               n_epochs=1000,
               nkerns=[16, 512],
               batch_size=200,
               filter_size=5,
               dnn_layers=1,
               n_hidden=500,
               gabor=False,
               lmbda=None,
               verbose=False):
    """
    Wrapper function for testing LeNet on SVHN dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    """
    print test_lenet.__name__, nkerns, filter_size, gabor, lmbda

    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    if gabor is True:
        # Generate Gabor filters
        filters = build_gabor(filter_size, nkerns[0], lmbda)
        # filters = numpy.array([filters[i][0] for i in range(len(filters))])
        filters = numpy.array([filters[i] for i in range(len(filters))])
        # print filters.shape
        filter_weights = numpy.tile(filters,
                                    (1, 3, 1)).reshape(nkerns[0], 3,
                                                       filter_size,
                                                       filter_size)
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(batch_size, 3, 32, 32),
                                    filter_shape=(nkerns[0], 3, filter_size,
                                                  filter_size),
                                    poolsize=(2, 2),
                                    weights=filter_weights)
        print 'gabor filter weights are working'
    else:
        # TODO: Construct the first convolutional pooling layer
        layer0 = LeNetConvPoolLayer(rng,
                                    input=layer0_input,
                                    image_shape=(batch_size, 3, 32, 32),
                                    filter_shape=(nkerns[0], 3, filter_size,
                                                  filter_size),
                                    poolsize=(2, 2))

    # TODO: Construct the second convolutional pooling layer
    i_s_1 = (32 - filter_size + 1) / 2

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], i_s_1,
                                             i_s_1),
                                filter_shape=(nkerns[1], nkerns[0],
                                              filter_size, filter_size),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    layer2_input = layer1.output.flatten(2)

    # TODO: construct a fully-connected sigmoidal layer
    i_s_2 = (i_s_1 - filter_size + 1) / 2

    if hasattr(n_hidden, '__iter__'):
        assert (len(n_hidden) == dnn_layers)
    else:
        n_hidden = (n_hidden, ) * dnn_layers

    DNN_Layers = []
    for i in xrange(dnn_layers):
        h_input = layer2_input if i == 0 else DNN_Layers[i - 1].output
        h_in = nkerns[1] * i_s_2 * i_s_2 if i == 0 else n_hidden[i - 1]
        DNN_Layers.append(
            HiddenLayer(rng=rng,
                        input=h_input,
                        n_in=h_in,
                        n_out=n_hidden[i],
                        activation=T.tanh))

    # layer2 = HiddenLayer(
    #     rng,
    #     input=layer2_input,
    #     n_in=nkerns[1] * i_s_2 * i_s_2,
    #     n_out=500,
    #     activation=T.tanh
    # )

    # TODO: classify the values of the fully-connected sigmoidal layer
    LR_Layer = LogisticRegression(input=DNN_Layers[-1].output,
                                  n_in=n_hidden[i],
                                  n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = LR_Layer.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        LR_Layer.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        LR_Layer.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # TODO: create a list of all model parameters to be fit by gradient descent
    params = LR_Layer.params
    for layer in DNN_Layers:
        params += layer.params
    if gabor is True:
        print 'gabor params is workings'
        params += layer1.params
    else:
        params += layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model, n_train_batches,
             n_valid_batches, n_test_batches, n_epochs, verbose)
def test_CDNN(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512],
        batch_size=200, n_hidden=[200,200,200], verbose=True):
    """
    Wrapper function for testing CNN in cascade with DNN
    """
    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # TODO: Construct the first convolutional pooling layer
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        # (batch size, num input feature maps,image height, image width)
        image_shape=(batch_size,3,32,32),
        # number of filters, num input feature maps,filter height, filter width)
        filter_shape=(nkerns[0],3,5,5),
        poolsize=(2,2)
    )

    # TODO: Construct the second convolutional pooling layer
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        # (32-5+1)/2
        image_shape=(batch_size,nkerns[0],14,14),
        filter_shape=(nkerns[1],nkerns[0],5,5),
        poolsize=(2,2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    layer2_input = layer1.output.flatten(2)
    
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 5 * 5,
        n_out= n_hidden[0],#TODO,
        activation=T.nnet.sigmoid
    )
    
    layer3 = HiddenLayer(
        rng,
        input=layer2.output,
        n_in=n_hidden[0],
        n_out=n_hidden[1],#TODO,
        activation=T.nnet.sigmoid
    )

    layer4 = HiddenLayer(
        rng,
        input=layer3.output,
        n_in=n_hidden[1],
        n_out=n_hidden[2],#TODO,
        activation=T.nnet.sigmoid
    )
    
    layer5 = LogisticRegression(
            input=layer4.output,
            n_in=n_hidden[2],
            n_out=10
    )

    # the cost we minimize during training is the NLL of the model
    cost = layer5.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer5.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer5.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # TODO: create a list of all model parameters to be fit by gradient descent
    params = layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    train_nn(train_model, validate_model, test_model,
        n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
示例#44
0
def test_convnet(learning_rate=0.1,
                 n_epochs=1000,
                 nkerns=[16, 512, 20],
                 batch_size=200,
                 verbose=False,
                 filter_size=2):
    """
    Wrapper function for testing Multi-Stage ConvNet on SVHN dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data()

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # TODO: Construct the first convolutional pooling layer:
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 3, 32, 32),
                                filter_shape=(nkerns[0], 3, filter_size,
                                              filter_size),
                                poolsize=(2, 2))

    # TODO: Construct the second convolutional pooling layer
    new_shape = (32 - filter_size + 1) // 2
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], new_shape,
                                             new_shape),
                                filter_shape=(nkerns[1], nkerns[0],
                                              filter_size, filter_size),
                                poolsize=(2, 2))

    # Combine Layer 0 output and Layer 1 output
    # TODO: downsample the first layer output to match the size of the second
    # layer output.
    # TDOD: change ds
    layer0_output_ds = downsample.max_pool_2d(input=layer0.output,
                                              ds=(2, 2),
                                              ignore_border=True)
    # concatenate layer
    layer2_input = T.concatenate([layer1.output, layer0_output_ds], axis=1)

    # TODO: Construct the third convolutional pooling layer
    new_shape = (new_shape - filter_size + 1) // 2
    layer2 = LeNetConvPoolLayer(rng,
                                input=layer2_input,
                                image_shape=(batch_size, nkerns[0] + nkerns[1],
                                             new_shape, new_shape),
                                filter_shape=(nkerns[2], nkerns[0] + nkerns[1],
                                              filter_size, filter_size),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1).
    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    new_shape = (new_shape - filter_size + 1) // 2
    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=nkerns[2] * new_shape * new_shape,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer4.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # TODO: create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    return train_nn(train_model, validate_model, test_model, n_train_batches,
                    n_valid_batches, n_test_batches, n_epochs, verbose)