Пример #1
0
def train_simple_model(data=None,
                       n_values=None,
                       num_epochs=5,
                       depth=10,
                       width=256,
                       drop_in=0.,
                       drop_hid=0.,
                       batch_size=32,
                       learning_rate=0.01,
                       valid_freq=100,
                       save_path='../results/',
                       options_dict=None,
                       reload_model=None,
                       num_targets=3):

    #TODO: eliminate data from this function.  Instead refer to a filename for data.
    #TODO: Rewrite iterate_minibatch to iterate through a file.
    #Either use numpy's mmap or csv
    #TODO: load the first line of said file to get layer_shape
    train, valid, test = data

    #X width, so the model knows how wide to make the first layer
    layer_shape = train[0].shape[1]

    # Prepare Theano variables for inputs and target
    input_var = T.matrix('inputs', dtype='float32')

    #CG: num_targets is 1 or 3.  int64 fine because it goes into output
    target_var = []
    for i in range(num_targets):
        target_var.append(T.vector('target_%s' % i, dtype='int32'))

    # Create neural network model (depending on first command line parameter)
    #CG: ignore mlp, maybe remove this whole switch.
    #CG 1: build network
    start_time = time.time()
    fplog("Building model and compiling functions...")
    network = build_custom_mlp(
        input_var, depth, width, drop_in, drop_hid, layer_shape,
        [[n_values['y_1']], [n_values['y_2']], [n_values['y_3']]])

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    # CG 2: Make prediction
    prediction = []
    for n in network:
        prediction.append(lasagne.layers.get_output(n))

    loss = 0

    #for p,t in zip(prediction,target_var):
    #    loss += lasagne.objectives.categorical_crossentropy(p, t)
    loss = lasagne.objectives.categorical_crossentropy(
        prediction[0],
        target_var[0]) + lasagne.objectives.categorical_crossentropy(
            prediction[1],
            target_var[1]) + lasagne.objectives.categorical_crossentropy(
                prediction[2], target_var[2])
    loss = loss.mean()

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use adadelta,
    params = []
    for i, n in enumerate(network):
        if i == 0:
            p = lasagne.layers.get_all_params(n, trainable=True)
        else:
            p = lasagne.layers.get_all_params(n, trainable=True)[-2:]
        params += p
    updates = lasagne.updates.adadelta(loss, params, learning_rate=0.01)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = []
    test_loss = []
    test_acc = []
    preds = []
    for n, t in zip(network, target_var):
        p = lasagne.layers.get_output(n, deterministic=True)
        test_prediction.append(p)
        l = lasagne.objectives.categorical_crossentropy(p, t)
        test_loss.append(l.mean())

        # As a bonus, also create an expression for the classification accuracy:
        acc = T.mean(T.eq(T.argmax(p, axis=1), t), dtype=theano.config.floatX)
        test_acc.append(acc)
        preds.append(theano.function([input_var], p))

    val_fn = []
    train_fn = theano.function([input_var] + target_var, loss, updates=updates)
    for t, l, a in zip(target_var, test_loss, test_acc):
        val_fn.append(theano.function([input_var, t], [l, a]))

    history_train_errs = []
    history_valid_errs = []
    # Finally, launch the training loop.
    fplog("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()

        for batch in iterate_minibatches(train, batch_size, shuffle=False):
            inputs, targets = batch
            train_err += train_fn(inputs, targets[0], targets[1], targets[2])
            train_batches += 1

            if train_batches % valid_freq == 0:
                err = []
                acc = []
                for i in range(num_targets):
                    e, a = val_fn[i](inputs, targets[i])
                    err.append(e)
                    acc.append(a)
                history_train_errs.append([err, acc])
                save_to_results_file(var_string, results_path)
                np.savez(save_path,
                         history_train_errs=history_train_errs,
                         history_valid_errs=history_valid_errs,
                         options_dict=options_dict,
                         *params)

        # And a full pass over the validation data:
        val_err = np.zeros(num_targets)
        val_acc = np.zeros(num_targets)
        val_batches = 0
        for batch in iterate_minibatches(valid, batch_size, shuffle=False):
            inputs, targets = batch

            #calculate error and accuracy separately for each target
            for i in range(num_targets):
                e, a = val_fn[i](inputs, targets[i])
                val_err[i] += e
                val_acc[i] += a
            val_batches += 1

            params = get_all_params(network)

            if train_batches % valid_freq == 0:
                err = []
                acc = []
                for i in range(num_targets):
                    e, a = val_fn[i](inputs, targets[i])
                    err.append(e)
                    acc.append(a)
                history_train_errs.append([err, acc])
                fplog('saving...')
                np.savez(save_path,
                         history_train_errs=history_train_errs,
                         history_valid_errs=history_valid_errs,
                         options_dict=options_dict,
                         *params)

        # Then we fplog the results for this epoch:
        fplog("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        max_train = np.max(train_err / train_batches)
        min_train = np.min(train_err / train_batches)
        max_val = np.max(val_err / val_batches)
        min_val = np.min(val_err / val_batches)
        avg_val_acc = np.mean(val_acc / val_batches)
        fplog(" train_batches: %i" % train_batches)
        fplog(" val_batches: %i" % val_batches)
        fplog("  max training loss:\t\t{:.6f}".format(max_train))
        fplog("  min training loss:\t\t{:.6f}".format(min_train))
        fplog("  max validation loss:\t\t{:.6f}".format(max_val))
        fplog("  min validation loss:\t\t{:.6f}".format(min_val))
        fplog("  avg validation accuracy:\t\t{:.2f} %".format(avg_val_acc *
                                                              100))

    end_time = time.time()
    fplog("The code ran for %d epochs, with %f sec/epochs" %
          ((num_epochs), (end_time - start_time) / (1. * (num_epochs))))

    # After training, we compute and fplog the test error:
    test_err = np.zeros(num_targets)
    test_acc = np.zeros(num_targets)
    test_batches = 0
    test_preds = []
    y_test_list = test[1:]  #omit X_test.  Only want y1,y2,y3
    for i in range(num_targets):
        test_preds.append(np.zeros(len(y_test_list[i])))
    for batch in iterate_minibatches(train, batch_size, shuffle=False):
        inputs, targets = batch

        for i in range(num_targets):
            e, a = val_fn[i](inputs, targets[i])
            pred_prob = preds[i](inputs)
            pred = pred_prob.argmax(axis=1)
            test_preds[i] = np.append(test_preds[i], pred)
            test_err[i] += e
            test_acc[i] += a
        test_batches += 1

    test_acc_pct = []

    max_err = np.max(test_err / test_batches)
    min_err = np.min(test_err / test_batches)
    avg_acc = np.mean(test_acc / test_batches)
    max_acc = np.max(test_acc / test_batches)
    min_acc = np.min(test_acc / test_batches)
    fplog("Final results:")
    fplog("  max test loss:\t\t\t{:.6f}".format(max_err))
    fplog("  min test loss:\t\t\t{:.6f}".format(min_err))

    #calculate test accuracy
    for i in range(len(test_acc)):
        test_acc_pct.append(test_acc[i] / test_batches)
        fplog("  test accuracy " + str(i) +
              ":\t\t{:.2f} %".format(test_acc_pct[i] * 100))
    fplog(" mean test accuracy:\t\t{:.2f} %".format(avg_acc * 100))

    params = get_all_params(network)

    # Optionally, you could now dump the network weights to a file like this:
    np.savez(save_path,
             train_err=train_err / train_batches,
             valid_err=val_err / val_batches,
             test_err=test_err / test_batches,
             test_acc=test_acc_pct,
             history_train_errs=history_train_errs,
             history_valid_errs=history_valid_errs,
             predictions=test_preds,
             options_dict=options_dict,
             *params)

    #
    # And load them again later on like this:
    # with np.load('model.npz') as f:
    #     param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    # lasagne.layers.set_all_param_values(network, param_values)

    return params, test_preds
def train_simple_model(data = None,
    n_values = None,
    num_epochs=5,
    depth = 10,
    width = 256,
    drop_in = 0.,
    drop_hid = 0.,
    batch_size = 32,
    learning_rate = 0.01,
    valid_freq = 100,
    save_path = '../results/',
    options_dict = None,
    reload_model = None,
    num_targets = 3):

    #TODO: eliminate data from this function.  Instead refer to a filename for data.
    #TODO: Rewrite iterate_minibatch to iterate through a file. 
        #Either use numpy's mmap or csv
    #TODO: load the first line of said file to get layer_shape
    train, valid, test = data

    #X width, so the model knows how wide to make the first layer
    layer_shape = train[0].shape[1] 

    # Prepare Theano variables for inputs and target
    input_var = T.matrix('inputs',dtype='float32')

    #CG: num_targets is 1 or 3.  int64 fine because it goes into output
    target_var = []
    for i in range(num_targets):
        target_var.append(T.vector('target_%s' % i,dtype = 'int32'))

    # Create neural network model (depending on first command line parameter)
    #CG: ignore mlp, maybe remove this whole switch.
    #CG 1: build network
    start_time = time.time()
    fplog("Building model and compiling functions...")
    network = build_custom_mlp(input_var, 
        depth, width, drop_in, drop_hid, 
        layer_shape, [[n_values['y_1']],[n_values['y_2']],[n_values['y_3']]])


    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    # CG 2: Make prediction
    prediction = []
    for n in network:
        prediction.append(lasagne.layers.get_output(n))

    loss=0

    #for p,t in zip(prediction,target_var):
    #    loss += lasagne.objectives.categorical_crossentropy(p, t)
    loss = lasagne.objectives.categorical_crossentropy(prediction[0],target_var[0]) + lasagne.objectives.categorical_crossentropy(prediction[1],target_var[1]) + lasagne.objectives.categorical_crossentropy(prediction[2],target_var[2])
    loss=loss.mean()



    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use adadelta,
    params = []
    for i, n in enumerate(network):
        if i == 0:
            p = lasagne.layers.get_all_params(n, trainable=True)
        else:
            p = lasagne.layers.get_all_params(n, trainable=True)[-2:]
        params += p
    updates = lasagne.updates.adadelta(
            loss, params, learning_rate=0.01)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = []
    test_loss = []
    test_acc = []
    preds = []
    for n,t in zip(network,target_var):
        p = lasagne.layers.get_output(n, deterministic=True)
        test_prediction.append(p)
        l = lasagne.objectives.categorical_crossentropy(p,t)
        test_loss.append(l.mean())

        # As a bonus, also create an expression for the classification accuracy:
        acc = T.mean(T.eq(T.argmax(p, axis=1), t),
                      dtype=theano.config.floatX)
        test_acc.append(acc)
        preds.append(theano.function([input_var],p))


  
    val_fn = []
    train_fn = theano.function([input_var] + target_var, loss, updates=updates)
    for t,l,a in zip(target_var, test_loss, test_acc):
        val_fn.append(theano.function([input_var, t], [l, a]))

    history_train_errs = []
    history_valid_errs = []
    # Finally, launch the training loop.
    fplog("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()

        for batch in iterate_minibatches(train, batch_size, shuffle=False):
            inputs, targets = batch
            train_err += train_fn(inputs, targets[0], targets[1], targets[2])
            train_batches += 1

            if train_batches % valid_freq == 0:
                err = []
                acc = []
                for i in range(num_targets):
                    e, a = val_fn[i](inputs,targets[i])
                    err.append(e)
                    acc.append(a)
                history_train_errs.append([err, acc])
                save_to_results_file(var_string,results_path)
                np.savez(save_path,
                        history_train_errs = history_train_errs,
                        history_valid_errs = history_valid_errs,
                        options_dict=options_dict,
                        *params)

        # And a full pass over the validation data:
        val_err = np.zeros(num_targets)
        val_acc = np.zeros(num_targets)
        val_batches = 0
        for batch in iterate_minibatches(valid, batch_size, shuffle=False):
            inputs, targets = batch

            #calculate error and accuracy separately for each target
            for i in range(num_targets):
                e,a = val_fn[i](inputs, targets[i])
                val_err[i] += e
                val_acc[i] += a
            val_batches += 1

            params = get_all_params(network)

            if train_batches % valid_freq == 0:
                err = []
                acc = []
                for i in range(num_targets):
                    e,a = val_fn[i](inputs, targets[i])
                    err.append(e)
                    acc.append(a)
                history_train_errs.append([err, acc])
                fplog('saving...')
                np.savez(save_path,
                        history_train_errs=history_train_errs,
                        history_valid_errs = history_valid_errs,
                        options_dict=options_dict,
                         *params)

        # Then we fplog the results for this epoch:
        fplog("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        max_train = np.max(train_err / train_batches)
        min_train = np.min(train_err / train_batches)
        max_val = np.max(val_err / val_batches)
        min_val = np.min(val_err / val_batches)
        avg_val_acc = np.mean(val_acc / val_batches)
        fplog(" train_batches: %i" %train_batches)
        fplog(" val_batches: %i" %val_batches)
        fplog("  max training loss:\t\t{:.6f}".format(max_train))
        fplog("  min training loss:\t\t{:.6f}".format(min_train))
        fplog("  max validation loss:\t\t{:.6f}".format(max_val))
        fplog("  min validation loss:\t\t{:.6f}".format(min_val))
        fplog("  avg validation accuracy:\t\t{:.2f} %".format(
            avg_val_acc * 100))


    end_time = time.time()
    fplog("The code ran for %d epochs, with %f sec/epochs" % (
        (num_epochs), (end_time - start_time) / (1. * (num_epochs))))

    # After training, we compute and fplog the test error:
    test_err = np.zeros(num_targets)
    test_acc = np.zeros(num_targets)
    test_batches = 0
    test_preds = []
    y_test_list = test[1:]  #omit X_test.  Only want y1,y2,y3
    for i in range(num_targets):
        test_preds.append(np.zeros(len(y_test_list[i])))
    for batch in iterate_minibatches(train, batch_size, shuffle=False):
        inputs, targets = batch
        
        for i in range(num_targets):
            e,a = val_fn[i](inputs, targets[i])
            pred_prob = preds[i](inputs)
            pred = pred_prob.argmax(axis = 1)
            test_preds[i] = np.append(test_preds[i],pred)
            test_err[i] += e
            test_acc[i] += a
        test_batches += 1

    test_acc_pct = []

    max_err = np.max(test_err / test_batches)
    min_err = np.min(test_err / test_batches)
    avg_acc = np.mean(test_acc / test_batches)
    max_acc = np.max(test_acc / test_batches)
    min_acc = np.min(test_acc / test_batches)
    fplog("Final results:")
    fplog("  max test loss:\t\t\t{:.6f}".format(max_err))
    fplog("  min test loss:\t\t\t{:.6f}".format(min_err))

    #calculate test accuracy
    for i in range(len(test_acc)):
        test_acc_pct.append(test_acc[i]/ test_batches)
        fplog("  test accuracy "+str(i)+":\t\t{:.2f} %".format(
            test_acc_pct[i] * 100))
    fplog(" mean test accuracy:\t\t{:.2f} %".format(
        avg_acc * 100))

    params = get_all_params(network)


    # Optionally, you could now dump the network weights to a file like this:
    np.savez(save_path, train_err=train_err / train_batches,
                        valid_err=val_err / val_batches, 
                        test_err=test_err / test_batches,
                        test_acc = test_acc_pct,
                        history_train_errs=history_train_errs,
                        history_valid_errs = history_valid_errs,
                        predictions = test_preds,
                        options_dict=options_dict,
                         *params)

    #
    # And load them again later on like this:
    # with np.load('model.npz') as f:
    #     param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    # lasagne.layers.set_all_param_values(network, param_values)

    return params, test_preds
Пример #3
0
                                       n_values=n_values,
                                       num_epochs=5,
                                       depth=10,
                                       width=256,
                                       batch_size=32,
                                       learning_rate=0.01,
                                       valid_freq=100,
                                       save_path='../results/simple_mlp/',
                                       options_dict=None,
                                       reload_model=None,
                                       num_targets=3)


if __name__ == '__main__':
    if ('--help' in sys.argv) or ('-h' in sys.argv):
        fplog("Trains a neural network on MNIST using Lasagne.")
        fplog("Usage: %s [MODEL [EPOCHS]]" % sys.argv[0])
        fplog()
        fplog("MODEL: 'mlp' for a simple Multi-Layer Perceptron (MLP),")
        fplog("       'custom_mlp:DEPTH,WIDTH,DROP_IN,DROP_HID' for an MLP")
        fplog("       with DEPTH hidden layers of WIDTH units, DROP_IN")
        fplog("       input dropout and DROP_HID hidden dropout,")
        fplog("       'cnn' for a simple Convolutional Neural Network (CNN).")
        fplog("EPOCHS: number of training epochs to perform (default: 500)")
    else:
        kwargs = {}
        if len(sys.argv) > 1:
            kwargs['model'] = sys.argv[1]
        if len(sys.argv) > 2:
            kwargs['num_epochs'] = int(sys.argv[2])
        main(**kwargs)
    params, preds = train_simple_model(data = data,
        n_values = n_values,
        num_epochs=5,
        depth = 10,
        width = 256,
        batch_size = 32,
        learning_rate = 0.01,
        valid_freq = 100,
        save_path = '../results/simple_mlp/',
        options_dict = None,
        reload_model = None,
        num_targets = 3)

if __name__ == '__main__':
    if ('--help' in sys.argv) or ('-h' in sys.argv):
        fplog("Trains a neural network on MNIST using Lasagne.")
        fplog("Usage: %s [MODEL [EPOCHS]]" % sys.argv[0])
        fplog()
        fplog("MODEL: 'mlp' for a simple Multi-Layer Perceptron (MLP),")
        fplog("       'custom_mlp:DEPTH,WIDTH,DROP_IN,DROP_HID' for an MLP")
        fplog("       with DEPTH hidden layers of WIDTH units, DROP_IN")
        fplog("       input dropout and DROP_HID hidden dropout,")
        fplog("       'cnn' for a simple Convolutional Neural Network (CNN).")
        fplog("EPOCHS: number of training epochs to perform (default: 500)")
    else:
        kwargs = {}
        if len(sys.argv) > 1:
            kwargs['model'] = sys.argv[1]
        if len(sys.argv) > 2:
            kwargs['num_epochs'] = int(sys.argv[2])
        main(**kwargs)