示例#1
0
    def __init__(self, words, pos, rels, cpos, langs, w2i, ch, options):

        import dynet as dy # import here so we don't load Dynet if just running parser.py --help for example
        global dy

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)

        self.activations = {'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu':
                            dy.rectify, 'tanh3': (lambda x:
                            dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))}
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle


        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.k

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0)
        self.feature_extractor = FeatureExtractor(self.model,options,words,rels,langs,w2i,ch,self.nnvecs)
        self.irels = self.feature_extractor.irels


        mlp_in_dims = options.lstm_output_size*2*self.nnvecs*(self.k+1)
        self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims,
                                 options.mlp_hidden2_dims, 4, self.activation)
        self.labeled_MLP = MLP(self.model, 'labeled' ,mlp_in_dims, options.mlp_hidden_dims,
                               options.mlp_hidden2_dims,2*len(self.irels)+2,self.activation)
示例#2
0
    def train(self, train_x, train_y):
        x_ids, x_train, y_train = self.get_xys(train_x, train_y)
        self.get_train_info(x_train, y_train)
        if self.task_undefined:
            return

        if self.classifier_type == 'perceptron':
            #averaged perceptron
            from sklearn.linear_model import SGDClassifier
            self.model = SGDClassifier(loss="perceptron",
                                       eta0=1,
                                       learning_rate="constant",
                                       penalty=None,
                                       average=10)
            self.model.fit(x_train, y_train)

        elif self.classifier_type == 'mlp':
            from multilayer_perceptron import MLP
            data = zip(x_train, y_train)
            labels = set(y_train)
            input_size = len(x_train[0])
            out_size = len(labels)
            hidden_size = input_size  # same as Adi et al.
            self.model = MLP(input_size,
                             hidden_size,
                             out_size,
                             labels,
                             epochs=100)
            self.model.train(data)
示例#3
0
def main():
    import random
    from pylab import plot, show

    def func(x):
        return math.pow(x, 2.0) - 10.0 * x + 21

    # Variando do x' até x'' (3 -> 7), dividido em 100 partes
    train_set = tuple(
        ([i], [func(i)]) for i in util.divide_arange(1.0, 9.0, 40))

    mlp = MLP(1, [10, 30, 1], ACTIVATIONS_FUNCTIONS['sigmoid'],
              ACTIVATIONS_FUNCTIONS['custom'])

    mlp.randomise_weights(lambda: random.uniform(-0.05, 0.05))

    sup = Supervisor(mlp, 0.001)

    sup.train_set(train_set, 0.0005, 10000)

    validation = tuple(
        ([x], [func(x)]) for x in util.divide_arange(-1.0, 11.0, 200))

    plot([i[0][0] for i in validation], [i[1][0] for i in validation], 'b',
         [i[0][0] for i in validation],
         [mlp.predict(i[0]) for i in validation], 'r')
    show()
示例#4
0
def main():
    # https://www.mathworks.com/help/deeplearning/ug/improve-neural-network-generalization-and-avoid-overfitting.html;jsessionid=d7ccdb5dad86ecd28c93a845c8c8
    def func(x):
        return 2*math.pow(x, 3) - math.pow(x, 2) + 10*x - 4

    train_set = tuple(
        ([i], [func(i)])
        for i in util.divide_arange(-3.0, 3.0, 40)
    )

    import random
    from pylab import plot, show

    mlp = MLP(1, [10, 30, 1],
              ACTIVATIONS_FUNCTIONS['sigmoid'],
              ACTIVATIONS_FUNCTIONS['linear'])

    mlp.randomise_weights(lambda: random.uniform(-1.0, 1.0))

    sup = Supervisor(mlp, 0.01)

    sup.train_set(train_set, 0.005, 3000)

    validation = tuple(
        ([x], [func(x)])
        for x in util.divide_arange(-4.0, 4.0, 200)
    )

    plot(
        [i[0][0] for i in validation], [i[1][0] for i in validation], 'b',
        [i[0][0] for i in validation], [mlp.predict(i[0]) for i in validation], 'r'
    )
    show()
    def __init__(self, vocab, options):

        # import here so we don't load Dynet if just running parser.py --help for example
        from multilayer_perceptron import MLP
        from feature_extractor import FeatureExtractor
        import dynet as dy
        global dy

        global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP
        LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)

        self.activations = {
            'tanh':
            dy.tanh,
            'sigmoid':
            dy.logistic,
            'relu':
            dy.rectify,
            'tanh3':
            (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle

        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.k
        self.recursive_composition = options.use_recursive_composition
        #ugly hack

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else
                       0) + (2 if self.rlFlag or self.rlMostFlag else
                             0) + (1 if self.recursive_composition else 0)
        self.feature_extractor = FeatureExtractor(self.model, options, vocab,
                                                  self.nnvecs)
        self.irels = self.feature_extractor.irels

        if options.no_bilstms > 0:
            mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * (
                self.k + 1)
        else:
            mlp_in_dims = options.lstm_input_size * self.nnvecs * (self.k + 1)

        self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims,
                                 options.mlp_hidden_dims,
                                 options.mlp_hidden2_dims, 4, self.activation)
        self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims,
                               options.mlp_hidden_dims,
                               options.mlp_hidden2_dims,
                               2 * len(self.irels) + 2, self.activation)
示例#6
0
def main():
    data = load_images_data()
    data = convert_data(data)

    input_layer = len(data[0][0])
    output_layer = len(data[0][1])

    train_set = tuple((pixels, answer) for pixels, answer, name in data)

    mlp = MLP(input_layer, [input_layer, 8, output_layer],
              ACTIVATIONS_FUNCTIONS['sigmoid'])

    sup = Supervisor(mlp)

    sup.train_set(train_set, 0.001, 100)

    for input_array, target_array, name in data:
        output = [round(result) for result in mlp.predict(input_array)]
        print(f"{name} - Expected={target_array} :: Predicted={output}")
示例#7
0
def main():
    mlp = MLP(2, [2, 1], ACTIVATIONS_FUNCTIONS['sigmoid'])
    sup = Supervisor(mlp)

    train_set = (
        ([0, 0], [0]),
        ([0, 1], [1]),
        ([1, 0], [1]),
        ([1, 1], [0]),
    )

    start_time = time.time()
    sup.train_set(train_set, 0.0005, 10000)
    end_time = time.time()
    print(f"Spent time {end_time-start_time}")

    buffer = [''] * len(train_set)
    for idx, (input_array, target_array) in enumerate(train_set, 0):
        output = mlp.predict(input_array)
        buffer[
            idx] = f"{input_array[0]} ^ {input_array[1]} = {output[0]} :: {target_array[0]}"
    print('\n'.join(buffer))
    def finetuning(self, x, labels):
        # assign weights
        layers = [x.shape[1]] + [rbm.b.shape[1]
                                 for rbm in self.rbms] + [self.n_labels]
        mlp = MLP(act_type='Sigmoid',
                  opt_type='Adam',
                  layers=layers,
                  epochs=20,
                  learning_rate=0.01,
                  lmbda=1e-2)

        mlp.w = [rbm.w for rbm in self.rbms] + \
            [np.random.randn(self.rbms[-1].w.shape[1], self.n_labels)]
        mlp.b = [rbm.b for rbm in self.rbms] + \
            [np.random.randn(1, self.n_labels)]
        mlp.fit(x, labels)
        # give back the weights
        # add the last feed-forward layer
        for rbm, w, b in zip(self.rbms, mlp.w[:-1], mlp.b[:-1]):
            rbm.w = w
            rbm.b = b
        self.dense = {'w': mlp.w[-1], 'b': mlp.b[-1]}
示例#9
0
def multilayer_perceptron(tweet_features, train_labels):
    clf_mlp = MLP(n_hidden=100)
    clf_mlp.fit(tweet_features, train_labels)
    return clf_mlp
示例#10
0
    def __init__(self, words, pos, rels, w2i, options):
        self.model = Model()
        self.trainer = AdamTrainer(self.model)
        random.seed(1)

        #可选的激活函数
        self.activations = {
            'tanh': tanh,
            'sigmoid': logistic,
            'relu': rectify,
            'cube': cube
        }
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle
        self.ldims = options.lstm_dims * 2
        self.wdims = options.wembedding_dims
        self.pdims = options.pembedding_dims
        self.rdims = options.rembedding_dims
        self.layers = options.lstm_layers
        self.wordsCount = words
        self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()}
        self.pos = {word: ind + 3 for ind, word in enumerate(pos)}
        self.rels = {word: ind for ind, word in enumerate(rels)}
        self.irels = rels

        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.window

        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag
                                                     or self.rlMostFlag else 0)

        self.external_embedding = None
        if options.external_embedding is not None:
            external_embedding_fp = open(options.external_embedding, 'r')
            external_embedding_fp.readline()
            self.external_embedding = {
                line.split(' ')[0]:
                [float(f) for f in line.strip().split(' ')[1:]]
                for line in external_embedding_fp
            }
            external_embedding_fp.close()

            self.edim = len(self.external_embedding.values()[0])
            self.noextrn = [0.0 for _ in xrange(self.edim)]
            self.extrnd = {
                word: i + 3
                for i, word in enumerate(self.external_embedding)
            }
            self.elookup = self.model.add_lookup_parameters(
                (len(self.external_embedding) + 3, self.edim))
            for word, i in self.extrnd.iteritems():
                self.elookup.init_row(i, self.external_embedding[word])
            self.extrnd['*PAD*'] = 1
            self.extrnd['*INITIAL*'] = 2

            print '载入外部词向量。 向量维度:', self.edim
            # logger.info('载入外部词向量。 向量维度:%s', self.edim)

        dims = self.wdims + self.pdims + (self.edim if self.external_embedding
                                          is not None else 0)
        self.blstmFlag = options.blstmFlag
        self.bibiFlag = options.bibiFlag
        self.attentionFlag = options.attentionFlag

        if self.bibiFlag:
            self.surfaceBuilders = [
                VanillaLSTMBuilder(1, dims, self.ldims * 0.5, self.model),
                VanillaLSTMBuilder(1, dims, self.ldims * 0.5, self.model)
            ]
            self.bsurfaceBuilders = [
                VanillaLSTMBuilder(1, self.ldims, self.ldims * 0.5,
                                   self.model),
                VanillaLSTMBuilder(1, self.ldims, self.ldims * 0.5, self.model)
            ]
        elif self.blstmFlag:
            if self.layers > 0:
                self.surfaceBuilders = [
                    VanillaLSTMBuilder(self.layers, dims, self.ldims * 0.5,
                                       self.model),
                    LSTMBuilder(self.layers, dims, self.ldims * 0.5,
                                self.model)
                ]
            else:
                self.surfaceBuilders = [
                    SimpleRNNBuilder(1, dims, self.ldims * 0.5, self.model),
                    LSTMBuilder(1, dims, self.ldims * 0.5, self.model)
                ]

        self.hidden_units = options.hidden_units
        self.hidden2_units = options.hidden2_units
        self.vocab['*PAD*'] = 1
        self.pos['*PAD*'] = 1

        self.vocab['*INITIAL*'] = 2
        self.pos['*INITIAL*'] = 2

        self.wlookup = self.model.add_lookup_parameters(
            (len(words) + 3, self.wdims))
        self.plookup = self.model.add_lookup_parameters(
            (len(pos) + 3, self.pdims))
        self.rlookup = self.model.add_lookup_parameters(
            (len(rels), self.rdims))

        self.word2lstm = self.model.add_parameters(
            (self.ldims, self.wdims + self.pdims +
             (self.edim if self.external_embedding is not None else 0)))
        self.word2lstmbias = self.model.add_parameters((self.ldims))
        self.lstm2lstm = self.model.add_parameters(
            (self.ldims, self.ldims * self.nnvecs + self.rdims))
        self.lstm2lstmbias = self.model.add_parameters((self.ldims))

        #正向lstm
        # 隐藏层第一层
        self.hidLayer = self.model.add_parameters(
            (self.hidden_units, self.ldims * self.nnvecs * (self.k + 1)))
        self.hidBias = self.model.add_parameters((self.hidden_units))

        # 隐藏层第二层
        self.hid2Layer = self.model.add_parameters(
            (self.hidden2_units, self.hidden_units))
        self.hid2Bias = self.model.add_parameters((self.hidden2_units))

        # 输出层
        self.outLayer = self.model.add_parameters(
            (3, self.hidden2_units
             if self.hidden2_units > 0 else self.hidden_units))
        self.outBias = self.model.add_parameters((3))

        #反向lstm
        self.rhidLayer = self.model.add_parameters(
            (self.hidden_units, self.ldims * self.nnvecs * (self.k + 1)))
        self.rhidBias = self.model.add_parameters((self.hidden_units))

        self.rhid2Layer = self.model.add_parameters(
            (self.hidden2_units, self.hidden_units))
        self.rhid2Bias = self.model.add_parameters((self.hidden2_units))

        self.routLayer = self.model.add_parameters(
            (2 * (len(self.irels) + 0) + 1, self.hidden2_units
             if self.hidden2_units > 0 else self.hidden_units))
        self.routBias = self.model.add_parameters(
            (2 * (len(self.irels) + 0) + 1))

        #attention层
        encoder_input_dim = options.encoder_output_size
        self.stack_encoder = AttentionNetwork(
            model=self.model,
            input_dim=encoder_input_dim,
            output_dim=options.encoder_output_size,
            rnn_dropout_rate=0.33)

        self.buffer_encoder = AttentionNetwork(
            model=self.model,
            input_dim=encoder_input_dim,
            output_dim=options.encoder_output_size,
            rnn_dropout_rate=0.33)

        self.unlabeled_MLP = MLP(self.model, 'unlabeled',
                                 encoder_input_dim * 2, options.hidden_units,
                                 options.hidden2_units, 4, self.activation)

        self.labeled_MLP = MLP(self.model, 'labeled', encoder_input_dim * 2,
                               options.hidden_units, options.hidden2_units,
                               2 * len(self.irels) + 2, self.activation)
示例#11
0
from mnist import MNIST
from multilayer_perceptron import MLP

mnist = MNIST(one_hot_encoding=True, z_score=True)

X_train = mnist.train_images
y_train = mnist.train_labels
X_test = mnist.test_images
y_test = mnist.test_labels

clf = MLP(hidden_layer_sizes=(500, 300), activation='swish', verbose=True)

clf.fit(X_train, y_train)

test_loss = clf._compute_loss(X_test, y_test)
test_acc = clf.score(X_test, y_test)
print('\nTest loss: {:.3}\tTest accuracy: {:.3}'.format(test_loss, test_acc))
示例#12
0
def test_MLP_model_mnist(dataset_name='mnist.pkl.gz',
                         learning_rate=0.01,
                         L1_reg=0.00,
                         L2_reg=0.0001,
                         n_epochs=1000,
                         batch_size=20,
                         n_hidden=500):
    # Set up the dataset
    dataset = load_data(dataset_name)
    # Split the data into a training, validation and test set
    train_data, train_labels = dataset[0]
    test_data, test_labels = dataset[1]
    validation_data, validation_labels = dataset[2]
    # Compute number of minibatches for each set
    n_train_batches = train_data.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = validation_data.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_data.get_value(borrow=True).shape[0] / batch_size
    data_dim = (28, 28) # The dimension of each image in the dataset
    data_classes = 10 # The number of classes within the data
    
    # Build the model
    # ---------------

    # Allocate symbolic variables for data
    index = T.lscalar() # This is the index to a minibatch
    x = T.matrix('x') # Data (rasterized images)
    y = T.ivector('y') # Labels (1d vector of ints)

    rng = np.random.RandomState(1234)

    # Construct MLP class
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=data_dim[0]*data_dim[1],
                     n_hidden=n_hidden,
                     n_out=data_classes)

    # Cost to minimize during training
    # Add regularization terms
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr)

    # Compile a Theano function that computes mistakes made by the model on a minibatch
    test_model = th.function(inputs=[index], # This function is for the test data   
                             outputs=classifier.errors(y),
                             givens={x: test_data[index * batch_size: (index + 1) * batch_size],
                                     y: test_labels[index * batch_size: (index + 1) * batch_size]})
    validate_model = th.function(inputs=[index], # This function is for the validation data    
                                 outputs=classifier.errors(y),
                                 givens={x: validation_data[index * batch_size: (index + 1) * batch_size],
                                         y: validation_labels[index * batch_size: (index + 1) * batch_size]})
    # Compute the gradient of cost with respect to theta
    grad_params = [T.grad(cost,param) for param in classifier.params]

    # Specify how to update model parameters as a list of (variable, update expression) pairs
    updates = [(param, param - learning_rate * grad_param) for param, grad_param in zip(classifier.params, grad_params)]

    # Compile Theano function that returns the cost and updates parameters of model based on update rules
    train_model = th.function(inputs=[index], # Index in minibatch that defines x with label y   
                             outputs=cost, # Cost/loss associated with x,y
                             updates=updates,
                             givens={x: train_data[index * batch_size: (index + 1) * batch_size],
                                     y: train_labels[index * batch_size: (index + 1) * batch_size]})

    # Train the model
    # ---------------

    # Setup the early-stopping parameters
    patience = 10000 # Minimum number of examples to examine
    patience_increase = 2 # How much longer to wait once a new best is found
    improvement_threshold = 0.995 # Value of a significant relative improvement
    validation_frequency = min(n_train_batches, patience / 2) # Number of minibatches before validating
    best_validation_loss = np.inf
    test_score = 0
    start_time = time.clock()

    # Setup the training loop
    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            # Set the iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # Compute the zero-one loss on the validation set
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch,
                                                                             minibatch_index + 1,
                                                                             n_train_batches,
                                                                             this_validation_loss * 100.))
                # Check if current validation score is the best
                if this_validation_loss < best_validation_loss:
                    # Improve the patience is loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    best_validation_loss = this_validation_loss
                    # Test on test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)
                    print('epoch %i, minibatch %i/%i, test error of best model %f %%' % (epoch,
                                                                                         minibatch_index + 1,
                                                                                         n_train_batches,
                                                                                         test_score * 100.))
            # Stop the loop if we have exhausted our patience
            if patience <= iter:
                done_looping = True
                break;
    # The loop has ended so record the time it took
    end_time = time.clock()
    # Print out results and timing information
    print('Optimization complete with best validation score of %f %%, with test performance %f %%' % (best_validation_loss * 100.,
                                                                                                      test_score * 100.)) 
    print 'The code ran for %d epochs with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
示例#13
0
    def __init__(self, vocab, options):

        # import here so we don't load Dynet if just running parser.py --help for example
        from multilayer_perceptron import MLP
        from feature_extractor import FeatureExtractor
        import dynet as dy
        global dy

        global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP
        LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3

        global NO_COMP, SOFT_COMP, HARD_COMP, GEN_COMP
        NO_COMP, HARD_COMP, SOFT_COMP, GEN_COMP = 0, 1, 2, 3

        self.composition = options.nucleus_composition

        all_rels = vocab[5]
        functional_rels = ['det', 'case', 'clf', 'cop', 'mark', 'aux', 'cc']
        if self.composition in [HARD_COMP, SOFT_COMP]:
            self.compositional_relations = functional_rels
        elif self.composition in [GEN_COMP]:
            self.compositional_relations = all_rels
        else:
            self.compositional_relations = []

        self.compositional_relations_dict = {
            rel: idx
            for idx, rel in enumerate(self.compositional_relations)
        }

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)

        self.activations = {
            'tanh':
            dy.tanh,
            'sigmoid':
            dy.logistic,
            'relu':
            dy.rectify,
            'tanh3':
            (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle

        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.k

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag
                                                     or self.rlMostFlag else 0)
        self.feature_extractor = FeatureExtractor(self.model, options, vocab,
                                                  self.nnvecs)
        self.irels = self.feature_extractor.irels

        if options.no_bilstms > 0:
            mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * (
                self.k + 1)
        else:
            mlp_in_dims = self.feature_extractor.lstm_input_size * self.nnvecs * (
                self.k + 1)
        print("The size of the MLP input layer is {0}".format(mlp_in_dims))

        if self.composition in [SOFT_COMP, GEN_COMP]:
            rel_emb_sz = 10
            self.cmp_rel_lookup = self.model.add_lookup_parameters(
                (len(self.compositional_relations), rel_emb_sz))
            cmb_sz = 2 * 2 * options.lstm_output_size + rel_emb_sz
            out_sz = 2 * options.lstm_output_size
            self.combiner_W1 = self.model.add_parameters((out_sz, cmb_sz),
                                                         name='cmbW1')
            self.combiner_b1 = self.model.add_parameters(out_sz, name='cmbb1')

        self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims,
                                 options.mlp_hidden_dims,
                                 options.mlp_hidden2_dims, 4, self.activation)
        self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims,
                               options.mlp_hidden_dims,
                               options.mlp_hidden2_dims,
                               2 * len(self.irels) + 2, self.activation)
示例#14
0
    def __init__(self, words, pos, rels, cpos, langs, w2i, ch, options):
        """
        0 = LA, 1 = RA, 2 = SH, 3 = SW
        """

        import dynet as dy  # import here so we don't load Dynet if just running parser.py --help for example
        global dy

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)

        self.activations = {
            'tanh':
            dy.tanh,
            'sigmoid':
            dy.logistic,
            'relu':
            dy.rectify,
            'tanh3':
            (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle
        self.shareMLP = options.shareMLP
        self.config_lembed = options.lembed_config

        #vectors used
        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.k

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag
                                                     or self.rlMostFlag else 0)
        self.feature_extractor = FeatureExtractor(self.model, words, rels,
                                                  langs, w2i, ch, self.nnvecs,
                                                  options)
        self.irels = self.feature_extractor.irels

        #mlps
        mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * (self.k + 1)
        if self.config_lembed:
            mlp_in_dims += options.lang_emb_size

        h1 = options.mlp_hidden_dims
        h2 = options.mlp_hidden2_dims
        if not options.multiling or self.shareMLP:
            self.unlabeled_MLP = MLP(self.model, mlp_in_dims, h1, h2, 4,
                                     self.activation)
            self.labeled_MLP = MLP(self.model, mlp_in_dims, h1, h2,
                                   2 * len(rels) + 2, self.activation)
        else:
            self.labeled_mlpdict = {}
            for lang in self.feature_extractor.langs:
                self.labeled_mlpdict[lang] = MLP(self.model, mlp_in_dims, h1,
                                                 h2, 2 * len(rels) + 2,
                                                 self.activation)

            self.unlabeled_mlpdict = {}
            for lang in self.feature_extractor.langs:
                self.unlabeled_mlpdict[lang] = MLP(self.model, mlp_in_dims, h1,
                                                   h2, 4, self.activation)
示例#15
0
class Classifier(object):
    def __init__(self, task_type='transitivity', classifier_type='mlp'):
        self.task_type = task_type
        self.task_undefined = False
        self.classifier_type = classifier_type

    def get_xys(self, x_dict, y_dict):
        x_list = []
        y_list = []
        x_ids = []
        for key in x_dict:
            y_avc = y_dict[key[:2]]
            yItem = self.get_task_y(y_avc, self.task_type)
            if yItem is not None:
                y_list.append(yItem)
                x_list.append(x_dict[key])
                x_ids.append(key)
        return x_ids, x_list, y_list

    def get_y_gold(self, x_ids, y_gold):
        return [
            self.get_task_y(y_gold[x_id[:2]], self.task_type) for x_id in x_ids
        ]

    def get_task_y(self, avc, task_type):
        #this should not be here or class misnamed
        if task_type == "transitivity":
            return sum([avc.has_dobj, avc.has_iobj])
        if task_type == "intransitive":
            return sum([avc.has_dobj, avc.has_iobj]) == 0
        elif task_type == "dobj":
            return int(avc.has_dobj)
        elif task_type == "iobj":
            return int(avc.has_iobj)
        elif task_type == "subj":
            return int(avc.has_subj)
        elif task_type == "subj_num":
            return avc.subj_num
        elif task_type == "subj_pers":
            return avc.subj_pers
        elif task_type == 'subj_n_pers':
            if avc.subj_num == None and avc.subj_pers == None:
                return None
            else:
                return "".join([
                    str(i) for i in filter(None, [avc.subj_num, avc.subj_pers])
                ])
        else:
            raise Exception("Task unknown: %s" % task_type)

    def get_train_info(self, x_train, y_train):
        #check that we have at least 2 classes and 10 training examples
        if len(set(y_train)) < 2 or len(x_train) < 10:
            self.task_undefined = True
            self.trainsize = 0
        else:
            self.trainsize = len(x_train)

    def train(self, train_x, train_y):
        x_ids, x_train, y_train = self.get_xys(train_x, train_y)
        self.get_train_info(x_train, y_train)
        if self.task_undefined:
            return

        if self.classifier_type == 'perceptron':
            #averaged perceptron
            from sklearn.linear_model import SGDClassifier
            self.model = SGDClassifier(loss="perceptron",
                                       eta0=1,
                                       learning_rate="constant",
                                       penalty=None,
                                       average=10)
            self.model.fit(x_train, y_train)

        elif self.classifier_type == 'mlp':
            from multilayer_perceptron import MLP
            data = zip(x_train, y_train)
            labels = set(y_train)
            input_size = len(x_train[0])
            out_size = len(labels)
            hidden_size = input_size  # same as Adi et al.
            self.model = MLP(input_size,
                             hidden_size,
                             out_size,
                             labels,
                             epochs=100)
            self.model.train(data)

    def predict(self, test_x, test_y):
        if not self.task_undefined:
            x_ids, x_test, y_test = self.get_xys(test_x, test_y)
            y_pred = self.model.predict(x_test)
            self.testsize = len(x_test)
            #write pred to file
            return x_ids, y_pred
        else:
            self.testsize = 0
            return None, None

    def evaluate(self, x_ids, pred, all_y_gold):
        if x_ids is not None and pred is not None:
            y_gold = self.get_y_gold(x_ids, all_y_gold)
            return accuracy_score(y_gold, pred)
        else:
            return np.nan

    def majority_baseline(self, train_x, train_y, test_x, test_y):
        x_ids, x_train, y_train = self.get_xys(train_x, train_y)
        x_ids, x_test, y_test = self.get_xys(test_x, test_y)
        y_maj = max(y_train, key=y_train.count)
        y_maj_pred = [y_maj for i in range(len(y_test))]
        return accuracy_score(y_test, y_maj_pred)
示例#16
0
    def __init__(self, vocab, options):

        # import here so we don't load Dynet if just running parser.py --help for example
        from multilayer_perceptron import MLP
        from feature_extractor import FeatureExtractor
        import dynet as dy
        global dy

        global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP
        LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)

        self.activations = {
            'tanh':
            dy.tanh,
            'sigmoid':
            dy.logistic,
            'relu':
            dy.rectify,
            'tanh3':
            (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle

        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.k
        self.distances = 4  # probe looks at distances between tokens ahead, considering distances:
        # normalized by the smallest, among:
        # s0 - b0
        # s0 - b1
        # b0 - closest bi: if < s0-b0, do a Shift
        # closest si - b0	: if ~= s0-b0, do a reduce

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag
                                                     or self.rlMostFlag else 0)
        self.feature_extractor = FeatureExtractor(self.model, options, vocab,
                                                  self.nnvecs)
        self.irels = self.feature_extractor.irels

        if options.no_bilstms > 0:  # number of bilistms
            mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * (
                self.k + 1)
        else:
            mlp_in_dims = self.feature_extractor.lstm_input_size * self.nnvecs * (
                self.k + 1)

        # use attention
        if options.bert and options.attention:
            # add attention vectors for stack to top buf and viceversa
            attention_size = self.k * 2
            # all layers
            #layers = self.feature_extractor.bert.model.config.num_hidden_layers
            #attention_size = layers * layers * self.k # * 2
            mlp_in_dims += attention_size

        # Sartiano
        if options.distance_probe_conf:
            print('Distance Probe enabled', file=sys.stderr)
            from distance_probe import DistanceProbe
            self.distance_probe = DistanceProbe(options.distance_probe_conf,
                                                options.dynet_seed)
            mlp_in_dims += self.distances
        else:
            self.distance_probe = None

        self.attention_indices = [
            int(x) for x in options.attention.split(',')
        ] if options.attention else []

        self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims,
                                 options.mlp_hidden_dims,
                                 options.mlp_hidden2_dims, SWAP + 1,
                                 self.activation)
        self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims,
                               options.mlp_hidden_dims,
                               options.mlp_hidden2_dims,
                               2 * len(self.irels) + 2, self.activation)
        print('MLP size: (%d, %d)' % (mlp_in_dims, options.mlp_hidden_dims),
              file=sys.stderr)
示例#17
0
def test_MLP_model_mnist(dataset_name='mnist.pkl.gz',
                         learning_rate=0.01,
                         L1_reg=0.00,
                         L2_reg=0.0001,
                         n_epochs=1000,
                         batch_size=20,
                         n_hidden=500):
    # Set up the dataset
    dataset = load_data(dataset_name)
    # Split the data into a training, validation and test set
    train_data, train_labels = dataset[0]
    test_data, test_labels = dataset[1]
    validation_data, validation_labels = dataset[2]
    # Compute number of minibatches for each set
    n_train_batches = train_data.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = validation_data.get_value(
        borrow=True).shape[0] / batch_size
    n_test_batches = test_data.get_value(borrow=True).shape[0] / batch_size
    data_dim = (28, 28)  # The dimension of each image in the dataset
    data_classes = 10  # The number of classes within the data

    # Build the model
    # ---------------

    # Allocate symbolic variables for data
    index = T.lscalar()  # This is the index to a minibatch
    x = T.matrix('x')  # Data (rasterized images)
    y = T.ivector('y')  # Labels (1d vector of ints)

    rng = np.random.RandomState(1234)

    # Construct MLP class
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=data_dim[0] * data_dim[1],
                     n_hidden=n_hidden,
                     n_out=data_classes)

    # Cost to minimize during training
    # Add regularization terms
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    # Compile a Theano function that computes mistakes made by the model on a minibatch
    test_model = th.function(
        inputs=[index],  # This function is for the test data   
        outputs=classifier.errors(y),
        givens={
            x: test_data[index * batch_size:(index + 1) * batch_size],
            y: test_labels[index * batch_size:(index + 1) * batch_size]
        })
    validate_model = th.function(
        inputs=[index],  # This function is for the validation data    
        outputs=classifier.errors(y),
        givens={
            x: validation_data[index * batch_size:(index + 1) * batch_size],
            y: validation_labels[index * batch_size:(index + 1) * batch_size]
        })
    # Compute the gradient of cost with respect to theta
    grad_params = [T.grad(cost, param) for param in classifier.params]

    # Specify how to update model parameters as a list of (variable, update expression) pairs
    updates = [(param, param - learning_rate * grad_param)
               for param, grad_param in zip(classifier.params, grad_params)]

    # Compile Theano function that returns the cost and updates parameters of model based on update rules
    train_model = th.function(
        inputs=[index],  # Index in minibatch that defines x with label y   
        outputs=cost,  # Cost/loss associated with x,y
        updates=updates,
        givens={
            x: train_data[index * batch_size:(index + 1) * batch_size],
            y: train_labels[index * batch_size:(index + 1) * batch_size]
        })

    # Train the model
    # ---------------

    # Setup the early-stopping parameters
    patience = 10000  # Minimum number of examples to examine
    patience_increase = 2  # How much longer to wait once a new best is found
    improvement_threshold = 0.995  # Value of a significant relative improvement
    validation_frequency = min(n_train_batches, patience /
                               2)  # Number of minibatches before validating
    best_validation_loss = np.inf
    test_score = 0
    start_time = time.clock()

    # Setup the training loop
    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            # Set the iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # Compute the zero-one loss on the validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                # Check if current validation score is the best
                if this_validation_loss < best_validation_loss:
                    # Improve the patience is loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    best_validation_loss = this_validation_loss
                    # Test on test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)
                    print(
                        'epoch %i, minibatch %i/%i, test error of best model %f %%'
                        % (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
            # Stop the loop if we have exhausted our patience
            if patience <= iter:
                done_looping = True
                break
    # The loop has ended so record the time it took
    end_time = time.clock()
    # Print out results and timing information
    print(
        'Optimization complete with best validation score of %f %%, with test performance %f %%'
        % (best_validation_loss * 100., test_score * 100.))
    print 'The code ran for %d epochs with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
示例#18
0
def run(num,
        epochs=33,
        layer_sizes=None,
        activation=nn.ELU,
        use_main_effect_nets=True,
        num_samples=30000,
        num_features=10,
        valid_size=5,
        test_size=5,
        std_scale=True,
        my_data_norm=False,
        lr=1e-3,
        l1_const=5e-5,
        dropout_p=0,
        early_stopping=False,
        patience=5,
        abs_val=True,
        verbose=True,
        order=2,
        o=2,
        greedy_heuristic=False,
        gelu_final_layer=False,
        gelu_last_layer=False,
        gelu_alt_layer=False,
        gelu_main_effects=False):
    # Params
    device = torch.device("cuda" if args.cuda else "cpu")
    if layer_sizes is None:
        layer_sizes = [140, 100, 60, 20]

    # Data
    # set_seed(42)
    X = generate_X(num_samples, num)
    Y, ground_truth = globals()["f_{}".format(num)](X.transpose())
    if my_data_norm:
        X = np.array(X)
        X = (X - X.min(0)) / X.ptp(0)
    data_loaders = preprocess_data(X,
                                   Y,
                                   valid_size=valid_size,
                                   test_size=test_size,
                                   std_scale=std_scale,
                                   get_torch_loaders=True)
    X_train = np.concatenate([data[0] for data in data_loaders["train"]], 0)

    # Model and training
    model = MLP(num_features,
                layer_sizes,
                use_main_effect_nets=use_main_effect_nets,
                activation=activation,
                dropout_p=dropout_p,
                gelu_final_layer=gelu_final_layer,
                gelu_last_layer=gelu_last_layer,
                gelu_alt_layer=gelu_alt_layer,
                gelu_main_effects=gelu_main_effects).to(device)
    model, mlp_loss = train(model,
                            data_loaders,
                            nepochs=epochs,
                            device=device,
                            learning_rate=lr,
                            l1_const=l1_const,
                            verbose=verbose,
                            early_stopping=early_stopping,
                            patience=patience)

    # NID AUC
    model_weights = get_weights(model)
    pairwise_interactions, _ = get_interactions(model_weights,
                                                pairwise=True,
                                                one_indexed=True)
    # Automatically selects the top 100 excluding redundant subsets, and unpruned -- can use internal func to prune mine
    anyorder_interactions_pruned, anyorder_interactions_unpruned = get_interactions(
        model_weights, one_indexed=True)
    anyorder_interactions_unpruned = [
        inter for inter in anyorder_interactions_unpruned
        if len(inter[0]) <= order
    ]
    # auc_nid = get_auc(pairwise_interactions, [{i + 1 for i in inter} for inter in ground_truth], verbose=verbose)
    if order == 2:
        anyorder_interactions_unpruned = pairwise_interactions
    # My AUC
    n_way_NID = set([
        tuple([inr - 1 for inr in inter[0]])
        for inter in anyorder_interactions_unpruned if len(inter[0]) <= order
    ])
    auc_mine, interactions = test_inputs_n_way(
        X_train,
        model,
        ground_truth,
        device,
        abs_val,
        order,
        o,
        n_way_NID,
        verbose,
        greedy_heuristic=greedy_heuristic)
    # auc_mine = aucs1

    aucs_nid = []
    for nth in interactions:
        new = copy.deepcopy(anyorder_interactions_unpruned)
        for interaction in [inr[0] for inr in nth]:
            if interaction not in n_way_NID:
                new.append(((inr + 1 for inr in interaction), 0))

        # two_and_three_way = [inter for inter in anyorder_interactions_unpruned if len(inter[0]) <= order]
        # print(set([tuple([inr - 1 for inr in inter[0]]) for inter in new]) == set([inr[0] for inr in nth]))
        # print([tuple([inr - 1 for inr in inter[0]]) for inter in new])
        # print([inr[0] for inr in nth])
        auc_nid = [
            get_auc([
                item for item in list(new)
                if len(tuple(tuple(item)[0])) == oth + 1
            ] if oth + 1 > 2 else pairwise_interactions,
                    [{i + 1
                      for i in inter} for inter in ground_truth],
                    verbose=verbose)
            if max([len(g) for g in ground_truth]) >= oth + 1 else 0
            for oth in range(1, order)
        ]
        aucs_nid.append(auc_nid)

    # Requires a subset of "detected" higher-order interactions and computes precision (% of those are real)
    r_prec = get_anyorder_R_precision(anyorder_interactions_pruned,
                                      [{i + 1
                                        for i in inter}
                                       for inter in ground_truth])

    return auc_mine, aucs_nid
示例#19
0
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):

    """
    Run MLP SGD on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    classifier = MLP(
        rng = rng,
        input = x,
        n_in = 28 * 28, #MNIST specific
        n_hidden = n_hidden,
        n_out = 10
    )

    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # classify errors
    test_model = theano.function(
        inputs = [index],
        outputs = classifier.errors(y),
        givens = {
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compute gradient of cost with respect to all params
    gparams = [T.grad(cost, param) for param in classifier.params]

    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatches before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) & validation_frequency == 0:
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                if this_validation_loss < best_validation_loss:
                    if(
                        this_validation_loss < best_validation_loss * improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test on test set
                    test_losses = [test_model(i) for i
                                   in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

                if patience <= iter:
                    done_looping = True
                    break

        end_time = timeit.default_timer()

        print(('Optimization complete. Best validation score of %f %% '
               'obtained at iteration %i, with test performance %f %%') %
              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
        print(('The code for file ' +
               os.path.split(__file__)[1] +
               ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
示例#20
0
def main():
    """
    Task Main
    """

    # download and parse Fashion MNIST training set
    train_set = datasets.FashionMNIST(
        root='./data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor()
        ])
    )

    # download and parse Fashion MNIST testing set
    test_set = datasets.FashionMNIST(
        root='./data',
        train=False,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor()
        ])
    )

    # instantiate train data loader
    train_data_loader = torch.utils.data.DataLoader(train_set, batch_size=1000)

    # instantiate test data loader
    test_data_loader = torch.utils.data.DataLoader(test_set, batch_size=1000)

    # instantiate multilayer perceptron neural network
    mlp = MLP()

    # train the multilayer perceptron neural network
    mlp_performance = mlp.train(
        train_data_loader, learning_rate=0.01, n_epochs=20)

    # test the multilayer perceptron neural network
    mlp.test(test_data_loader)

    # instantiate base architecture convolutional neural network
    base_cnn = BaseCNN()

    # train the base architecture convolutional neural network
    base_cnn_performance = base_cnn.train(
        train_data_loader, learning_rate=0.01, n_epochs=20)

    # test the base architecture convolutional neural network
    base_cnn.test(test_data_loader)

    # instantiate the variant 1 architecture convolutional neural network
    cnn1 = CNN1()

    # train the variant 1 architecture convolutional neural network
    cnn1_performance = cnn1.train(
        train_data_loader, learning_rate=0.01, n_epochs=20)

    # test the variant 1 architecture convolutional neural  network
    cnn1.test(test_data_loader)

    # instantiate the variant 2 architecture convolutional neural network
    cnn2 = CNN2()

    # train the variant 2 architecture convolutional neural network
    cnn2_performance = cnn2.train(
        train_data_loader, learning_rate=0.01, n_epochs=20)

    # test the variant 2 architecture convolutional neural network
    cnn2.test(test_data_loader)

    # plot the test results
    plt.plot(range(20), mlp_performance, color='black', label='MLP')
    plt.plot(range(20), base_cnn_performance, color='red', label='Base CNN')
    plt.plot(range(20), cnn1_performance, color='green', label='CNN 1')
    plt.plot(range(20), cnn2_performance, color='blue', label='CNN 2')
    plt.title('Neural Network Image Classification Accuracy')
    plt.xlabel('epochs')
    plt.ylabel('accuracy')
    plt.legend()
    plt.show()