Exemplo n.º 1
0
 def output(self, use_dropout=False, depth=0):
     """ Provides data to next layer and applies dropout """
     ret = self.input_layer
     if use_dropout:
         num_str = NNl.get_num_streams(np.prod(self.out_shape))
         mask = NNl.gen_mask(self.srng, self.out_shape, self.p_retain,
                 num_str)
         ret *= mask / self.p_retain
     return ret
Exemplo n.º 2
0
 def output(self, use_dropout=False, depth=0):
     """ Apply the activation and dropout to the signal, producing
     output that will be used by subsequent layers
     """
     out = self.activation(self.signal(use_dropout=use_dropout, depth=depth))
     c_shape = self.out_shape
     if use_dropout:
         num_str = NNl.get_num_streams(np.prod(c_shape))
         mask = NNl.gen_mask(self.srng, c_shape, self.p_retain, num_str)
         out = out * mask / self.p_retain
     return out
Exemplo n.º 3
0
def class_probs(model, data, train_params):
    """ Creates Theano function to return the probabilities of class
        membership.

    Args:
        model: Model instance
        samples: set of data points to use (note: just data, not a Dataset)
        batch_size: size of the batch for calculation
        n_batches: how many batches are required to span the samples

    Returns:
        An nparray of class membership probabilities. Useful for performing
        meta-analysis/transfer learning.
    """

    index = T.lscalar('index')  # index to a [mini]batch
    aug = T.ivector('aug')  # data augmentation (jitter, flip)
    in_dims = model.x_shape[2:]

    p_func = theano.function([index, aug],
                             model.out_layer.p_y_given_x(False),
                             givens={
                                 model.x:
                                 NNl.get_batch(data.V[0], index, data.batch_n,
                                               in_dims, aug)
                             })

    y = [
        p_func(i, model.ref_aug) for i in range(train_params['v_batches'] + 1)
    ]

    return np.asarray(np.concatenate(y, axis=0))
Exemplo n.º 4
0
    def __init__(self, rngs, input_layer, Lshape, traits, activation):
        super(ConvLayer, self).__init__(input_layer, traits, "Conv")

        self.rng = rngs[0]
        self.l2decay = traits['l2decay']
        filter_shape = Lshape[1]
        # The number of input channels must match number of filter channels
        assert Lshape[0][1] == filter_shape[1]
        self.pad = traits['padding']

        self.W = NNl.gen_weights(self.rng, filter_shape, 0, traits['initW'])

        # convolve input feature maps with filters
        # Using Alex K.'s fast CUDA conv, courtesy of S. Dieleman
        self.x = self.input_layer.output(False)
        conv_op = FilterActs(pad=self.pad, partial_sum=1)
        input_shuffled = (self.x).dimshuffle(1, 2, 3, 0) # bc01 to c01b
        filters_shuffled = (self.W).dimshuffle(1, 2, 3, 0) # bc01 to c01b
        contiguous_input = gpu_contiguous(input_shuffled)
        contiguous_filters = gpu_contiguous(filters_shuffled)
        out_shuffled = conv_op(contiguous_input, contiguous_filters)
        self.conv_out = out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01

        # store parameters of this layer
        self.params = [self.W]
Exemplo n.º 5
0
def class_probs(model, data, train_params):
    """ Creates Theano function to return the probabilities of class
        membership.

    Args:
        model: Model instance
        samples: set of data points to use (note: just data, not a Dataset)
        batch_size: size of the batch for calculation
        n_batches: how many batches are required to span the samples

    Returns:
        An nparray of class membership probabilities. Useful for performing
        meta-analysis/transfer learning.
    """

    index = T.lscalar('index')  # index to a [mini]batch
    aug = T.ivector('aug')      # data augmentation (jitter, flip)
    in_dims = model.x_shape[2:]

    p_func = theano.function([index, aug], model.out_layer.p_y_given_x(False),
            givens={model.x: NNl.get_batch(data.V[0], index, data.batch_n,
                in_dims, aug)})

    y = [p_func(i, model.ref_aug) for i in range(train_params['v_batches']+1)]

    return np.asarray(np.concatenate(y, axis=0))
Exemplo n.º 6
0
    def __init__(self, rngs, input_layer, Lshape, traits, activation):
        super(OutputLayer, self).__init__(input_layer, traits, "Output")
        self.out_shape = (Lshape[0], Lshape[1])
        self.W_shape = Lshape[1:]
        self.activation = activation
        self.l2decay = traits['l2decay']

        if len(Lshape) != 3:
            print("Logistic regression shape must be (2,), it is,", Lshape)

        # Initialize weights and biases (can load values later)
        self.W = NNl.gen_weights(rngs[0], self.W_shape, 0, traits['initW'])
        self.b = Tsh(np.zeros((Lshape[2],), dtype=Tfloat))
        self.params = [self.W, self.b]
Exemplo n.º 7
0
def create_functions(model, data, rho, profmode):
    """Creates Theano functions for SGD backprop.

    Args:
        data: Dataset instance on which to train.
        rho: momentum parameter
        profmode: used only for profiling
    """

    print 'Compiling functions...',

    # allocate symbolic variables for the data
    index = T.lscalar('index')  # index to a [mini]batch
    lrate = T.fscalar('lrate')  # learning rate
    aug = T.ivector('aug')      # data augmentation (jitter, flip)
    in_dims = model.x_shape[2:]
    x = model.x
    y = model.y

    # Functions to calculate our training and validation error while we run
    functions = {}
    f_input = [index, aug]

    functions['train_E'] = theano.function(f_input, model.val_error, givens={
            x: NNl.get_batch(data.T[0], index, data.batch_n, in_dims, aug),
            y: NNl.get_batch_0(data.T[1], index, data.batch_n)}, mode=profmode)

    if not data.test:
        functions['val_E'] = theano.function(f_input, model.val_error, givens={
            x: NNl.get_batch(data.V[0], index, data.batch_n, in_dims, aug),
            y: NNl.get_batch_0(data.V[1], index, data.batch_n)}, mode=profmode)

    train_input = [index, lrate, aug]

    # Functions to update the model, via momentum-based SGD
    mom_updates = []
    p_updates = []
    for layer in model.layers[-1:0:-1]:
        grads = T.grad(model.cost, layer.params)

        for grad_i, param_i in zip(grads, layer.params):
            delta_i = theano.shared(param_i.get_value() * 0.)
            c_update = (delta_i, rho * delta_i - lrate * grad_i -
                        lrate * layer.l2decay * param_i)
            mom_updates.append(c_update)
            p_updates.append((param_i, param_i + delta_i))
    functions['momentum'] = theano.function(train_input, model.cost,
            updates=mom_updates, givens={
            x: NNl.get_batch(data.T[0], index, data.batch_n, in_dims, aug),
            y: NNl.get_batch_0(data.T[1], index, data.batch_n)}, mode=profmode)
    functions['update'] = theano.function([], updates=p_updates, mode=profmode)

    print 'done'

    return functions
    def istrazuj(self):

        SB = self.SBSpinBox.value()
        UZ = self.UZSpinBox.value()

        # - odredjivanje koncepta na osnovu vrijednosti koje NN vrati
        if NNlib.neuro(SB, UZ) == 1:
            koncept = 'zid'
        else:
            koncept = 'prazan prostor'

        # - ucitaj koji je koncept u varijablu
        sql = "SET @koncept:='%s';" % koncept
        self.cursor.execute(sql)
        self.conn.commit()

        # Dodaj vrijednosti i koordinate u bazu
        # - ucitavanje trenutnog smjera za izracun koordinata polja ispred
        self.cursor.execute("SELECT smjer, poljeID FROM stanje;")
        smjer, t_polje = self.cursor.fetchone()
        # - izracunaj i spremi vrijednosti polja ispred
        self.cursor.execute("CALL spremi_polje_ispred(%s)", smjer)

        # - dodaj vrijednosti polja u okruzenje
        self.cursor.execute("SELECT poljeID, tezina, d_tezina FROM polje WHERE xkoord = @pi_x AND ykoord = @pi_y")
        poljeID, tezina, d_tezina= self.cursor.fetchone()
        self.okruzenje[int(poljeID)] = int(tezina)

        self.cursor.execute("CALL dodajVezu('%s','veza', '%s', %s);", (t_polje, poljeID, d_tezina))
        self.conn.commit()

        # - ucitaj koordinate polja ispred
        self.cursor.execute("SELECT @pi_x, @pi_y;")
        pi_x, pi_y = self.cursor.fetchone()
        # - spremi t_smjer vrijednost polja ispred
        self.cursor.execute("UPDATE polje SET t_smjer = (SELECT smjer FROM stanje) WHERE xkoord = %s AND ykoord = %s;", (pi_x, pi_y))
        self.conn.commit()
        self.unesenaVrijednost.setText(("<font color='green'>%s spremljeno na polju (%s, %s)</font>") % (koncept, pi_x, pi_y))
        self.promjenaPozicije.setText("<font color='green'>Okrecem se desno 90 stupnjeva</font>")
        # Skreni 90 stupnjeva na desno; smjeru se dodaje +90, a ako je vece od 250 vraca se na 0
        self.cursor.execute("UPDATE stanje SET smjer = IF(smjer < 250, smjer + 90, 0);")
        self.conn.commit()

        self.brojac += 1
        if self.brojac == 4:
            self.brojac = 0
            self.master_istrazuj()
Exemplo n.º 9
0
    def __init__(self, rngs, input_layer, Lshape, traits, activation):
        super(FCLayer, self).__init__(input_layer, traits, "FC")

        self.p_retain = (1. - traits['dropout'])
        self.rng = rngs[0]
        self.srng = rngs[1]
        self.out_shape = (Lshape[0], Lshape[2])
        self.W_shape = Lshape[1:]
        self.activation = activation
        self.l2decay = traits['l2decay']
        self.d_rec = input_layer.output(False)
        self.best_error = np.inf

        if len(Lshape) != 3:
            print "FC layer shape must be (2,), it is,", Lshape

        self.W = NNl.gen_weights(self.rng, self.W_shape, 0, traits['initW'])
        self.b = Tsh(np.zeros(Lshape[2], dtype=Tfloat))
        self.ib = Tsh(np.zeros(Lshape[1], dtype=Tfloat))
        self.params = [self.W, self.b,]
        self.pt_params = [self.W, self.b, self.ib]
Exemplo n.º 10
0
def train_NN(CPFile="",
             datafile="rawdata",
             dataset=None,
             SFile="structure",
             train_ae=False,
             profiling=False,
             rho=0.9,
             LR=0.010,
             n_epochs=500,
             batch_size=128,
             cut=-1,
             cv_k=10,
             seed=1000,
             predict=False,
             verbose=True):
    """The core routine for neural net training.

    Args:
        CPFile: Checkpoint file from which to resume a training run.
            Checkpoints are saved automatically as progress is made on a
            validation set, with standard filename "model_cp"
        datafile: File from which to retrieve raw data. This is subsequently
            loaded into a Dataset instance.
        dataset: Specifies a dataset to load directly. For use with training
            meta-algorithms that modify the dataset over multiple runs.
        SFile: The structure file that specifies the neural net architecture.
        train_ae: Flag for training as an autoencoder.
        profiling: Flag for turning on profiling for examining performance.
        rho: Momentum parameter. Standard momentum is used by default for
            both autoencoder and backprop training.
        LR: Learning rate.
        n_epochs: Number of epochs for training.
        batch_size: SGD mini-batch size. Processing speed increases for
            larger sizes, but fewer updates are made as a tradeoff.
        cut: Number of training examples to use from the raw data, with
            the rest as validation. '-1' indicates look at cv_k.
        cv_k: 'k' in K-fold validation. 1/k of the data used as a validation
            set, with the rest as training.
        seed: specifies random seed. For a given seed, dataset, and neural
            net architecture, the run will be repeatable.
        verbose: Flag determining whether to send continual updates to stdout.

    """

    sched_dict = {20: 0.005, 100: 0.001, 200: 0.0001}

    # This is passed to the theano functions for profiling
    profmode = NNl.get_profiler(profiling)

    # A dictionary collecting the necessary training parameters
    train_params = {
        'LR': LR,
        'n_epochs': n_epochs,
        'rho': rho,
        'verb': verbose,
        'LRsched': sched_dict
    }

    # Create RNGs, one normal one Theano, which are passed to the Builder
    rng = np.random.RandomState(seed)
    theano_rng = MRG_RStreams(rng.randint(999999))
    rngs = [rng, theano_rng]

    # Load the dataset, then split for validation
    if dataset:
        data = dataset
        if not data.T:
            train_params.update(
                data.prep_validation(batch=batch_size, cut=cut, k=cv_k))
        else:
            train_params.update(data.V_params)
    else:
        data = Dataset(datafile, rng)
        if predict:
            cv_k = 1
        train_params.update(
            data.prep_validation(batch=batch_size, cut=cut, k=cv_k))

    #*** CREATE A MODEL CLASS INSTANCE ***#

    in_shape = (batch_size, ) + data.sample_dim

    # Load the checkpoint if there, otherwise use 'structure' to define network
    if os.path.isfile(CPFile):
        mymodel = Model(rngs, in_shape, data.label_dim, CPFile, struc_file="")
    else:
        mymodel = Model(rngs, in_shape, data.label_dim, struc_file=SFile)

    if mymodel.zeropad > 0:
        data.zeropad(mymodel.zeropad)

    #*** AUTOENCODER ***#
    #___________________#

    layers_to_train = []
    if train_ae:
        for layer in mymodel.layers:
            if layer.tag == "FC":
                layers_to_train.append(layer)

    for layer in layers_to_train:
        if layer.input_layer.tag == "Input":
            print "@ Autoencoding layer", layer.number, "with RSTanh"
            activ = NNl.RSTanh
        else:
            print "@ Autoencoding layer", layer.number, "with SoftReLU"
            activ = NNl.SoftReLU

        functions = create_functions_ae(layer, data.T, activ, batch_size, rho,
                                        mymodel.x, mymodel.x_shape[2:],
                                        profmode)

        train_params['logfile'] = NNl.prepare_log(mymodel, data.description)
        train_params['error'] = layer

        train(mymodel, functions, train_params)

    #*** SGD BACKPROP ***#
    #____________________#

    if predict:
        print '@ Predicting'
        #       predict_label(mymodel, data, train_params)
        cp.dump(class_probs(mymodel, data, train_params),
                open("class_p", 'wb'), 2)

    else:
        print '@ Training with SGD backprop'
        T_functions = create_functions(mymodel, data, rho, profmode)

        # Logfile made for analysis of training
        train_params['logfile'] = NNl.prepare_log(mymodel, data.description)
        train_params['error'] = mymodel

        train(mymodel, data, T_functions, train_params)

        print "\nBest validation: ", mymodel.best_error

    if profiling:
        profmode.print_summary()

#   mymodel.update_model("model_cp")
    return mymodel
Exemplo n.º 11
0
def create_functions_ae(layer, training, activation, batch_size, rho, x,
                        in_dims, profmode):
    """Creates the Theano functions necessary to train as an autoencoder.

    Args:
        layer: The layer (FC) on which training will occur.
        training: The set of training data (no labels, just features).
        activation: The function to use for 'reconstruction' activation. This
            will generally align with the distribution found in the input you
            are trying to reconstruct. For example, for mean-std normalized
            input data, you might use a Tanh function in your 1st layer. If
            that first layer has a ReLU neuronal activation, then your 2nd
            hidden layer would probably use the SoftReLU reconstruction
            activation.
        batch_size: size of the batches used for autoencoder training.
        rho: momentum parameter
        x: Theano variable input to the current layer.
        in_dims: shape of the input piped through 'x'.
        profmode: Flag for profiling

    Returns:
        Dictionary of functions for autoencoder training: cost, and 2 updates
    """

    print 'Compiling autoencoder functions...',

    # allocate symbolic variables for the data
    index = T.lscalar('index')  # index to a [mini]batch
    lrate = T.fscalar('lrate')  # learning rate
    aug = T.ivector('aug')  # data augmentation (jitter, flip)

    # Functions to calculate our training and validation error while we run
    functions = {}

    cost = layer.reconstruct_mse(activation)

    f_input = [index, aug]

    functions['train_E'] = theano.function(f_input,
                                           cost,
                                           givens={
                                               x:
                                               NNl.get_batch(
                                                   training[0], index,
                                                   batch_size, in_dims, aug)
                                           },
                                           mode=profmode)

    # create a list of gradients for all model parameters
    grads = T.grad(cost, layer.pt_params)

    train_input = [index, lrate, aug]

    # Functions to update the model, via momentum-based SGD
    mom_updates = []
    p_updates = []
    for grad_i, param_i in zip(grads, layer.pt_params):
        delta_i = theano.shared(param_i.get_value() * 0.)
        c_update = (delta_i, rho * delta_i - lrate * grad_i)
        mom_updates.append(c_update)
        p_updates.append((param_i, param_i + delta_i))
    functions['momentum'] = theano.function(train_input,
                                            cost,
                                            updates=mom_updates,
                                            givens={
                                                x:
                                                NNl.get_batch(
                                                    training[0], index,
                                                    batch_size, in_dims, aug)
                                            },
                                            mode=profmode)
    functions['update'] = theano.function([], updates=p_updates, mode=profmode)

    print 'done'

    return functions
Exemplo n.º 12
0
    print "\t-A\ttrain autoencoder"
    for key, value in std_opts.iteritems():
        print "\t", key, "\t<", value, ">"
    sys.exit()


if __name__ == '__main__':
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hVPpAl:s:n:f:d:S:b:c:C:")
    except getopt.GetoptError as err:
        print str(err)
        usage()
    pass_in = {}
    for opt, val in opts:
        if opt in std_opts:
            pass_in[std_opts[opt]] = NNl.find_type(val)
        elif opt == "-V":
            pass_in['verbose'] = False
        elif opt == "-P":
            pass_in['profiling'] = True
        elif opt == "-p":
            pass_in['predict'] = True
        elif opt == "-A":
            pass_in['train_ae'] = True
        elif opt in ("-h", "--help"):
            usage()
        else:
            assert False, "unhandled option"
    print "Passed arguments: ", pass_in
    train_NN(**pass_in)
    # U 'okruzenje' se spremaju sva polja okolo robota u tom trenutku da bi se mogalo izracunati sljedece polje na koje ce robot ici. Format: {ID polja: tezina polja}
    okruzenje = dict()

    cursor.execute("SELECT d_tezina FROM polje WHERE poljeID = (SELECT poljeID FROM stanje); ")
    t_tezina = cursor.fetchone()

    # Petlja za skeniranje okolo sebe, 1 - 4 za svaku stranu
    for i in range(4):
        # Skeniraj senzorima polje ispred sebe
        # - rucni unos vrijednosti senzora
        print "Unesi vrijednosti za polje ispred"
        SB = input("SB: ")
        UZ = input("UZ: ")

        # - odredjivanje koncepta na osnovu vrijednosti koje NN vrati
        if NNlib.neuro(SB, UZ) == 1:
            koncept = 'zid'
        else:
            koncept = 'prazan prostor'

        # - ucitaj koji je koncept u varijablu
        sql = "SET @koncept:='%s';" % koncept
        cursor.execute(sql)
        conn.commit()

        # Dodaj vrijednosti i koordinate u bazu
        # - ucitavanje trenutnog smjera za izracun koordinata polja ispred
        cursor.execute("SELECT smjer, poljeID FROM stanje;")
        smjer, t_polje = cursor.fetchone()
        # - izracunaj i spremi vrijednosti polja ispred
        cursor.execute("CALL spremi_polje_ispred(%s)", smjer)
Exemplo n.º 14
0
def train(model, data, functions, params):
    """Generic routine to perform training on the GPU using Theano-compiled
    functions and common parameters.

    This will run through a specified number of 'epochs', each consisting of
    a full pass through the training data. The epochs are broken into batches
    as normal for Stochastic Gradient Descent.

    functions: A dictionary containing all of the necessary functions for
        training. It will at least have 'momentum', 'update', and 'train_E'
        functions. 'momentum' updates the delta for each parameter, 'update'
        applies the current delta, and 'train_E' gets the current training
        cost. For supervised training, 'val_E' will usually be included
        so you can keep track of your progress on the validation set.
    params: Necessary training params: LR, training_batches, n_epochs, verbose,
        validation_batches, error (links to where best error is tracked).
    """
    LR = params['LR']
    Nb = 0
    for chunk_i in range(len(data.b_samples)):
        Nb += params['t_batches'][chunk_i]

    print "Training {} epochs at LR = {} rho = {}".format(
        params['n_epochs'], LR, params['rho'])
    print "Using schedule:", sorted(params['LRsched'].items())

    # reference augmentation for checking error (centered, no flip)
    T_aug = model.ref_aug

    # Main training loop
    start_time = time.clock()
    for epoch in range(params['n_epochs']):
        ct = 0

        for chunk_i in range(len(data.b_samples)):
            data.T[0].set_value(data.raw[chunk_i])
            data.T[1].set_value(
                np.asarray(data.labels[chunk_i], dtype=data.ltype))

            for batch_i in range(params['t_batches'][chunk_i]):

                functions['momentum'](batch_i, LR, model.gen_aug())
                functions['update']()

                if params['verb'] and (ct + batch_i + 1) % int(Nb / 5) == 0:
                    print '.',

            ct += params['t_batches'][chunk_i]

        # check the weight distribution
        model.param_status(epoch, output=open("wlog", 'a'))

        # compute error on test and validation set
        c_train_error = [
            functions['train_E'](i, T_aug)
            for i in xrange(params['t_batches'][-1])
        ]

        if epoch in params['LRsched']:
            LR = params['LRsched'][epoch]

        err_train = np.mean(c_train_error)
        if 'val_E' in functions:
            c_val_error = [
                functions['val_E'](i, T_aug)
                for i in xrange(params['v_batches'])
            ]
            err_val = np.mean(c_val_error)
        else:
            err_val = err_train

        # if we achieved a new best validation score
        # save the model and best validation score
        if err_val < getattr(params['error'], "best_error"):
            if params['verb']:
                print 'S',
            setattr(params['error'], "best_error", err_val)
            model.save_model()

        else:
            print ' ',

        curr_time = NNl.nice_time(time.clock() - start_time)

        if 'val_E' in functions:
            if params['verb']:
                print(
                    "{} | epoch {: >4}, LR={:.4f}, train: {:.5f}, val: {:.5f}".
                    format(curr_time, epoch, LR, err_train, err_val))
            else:
                print '.',
            params['logfile'].write("{} {: >4} {:.6f} {:.8f} {:.8f}\n".format(
                curr_time, epoch, LR, err_train, err_val))
        else:
            if params['verb']:
                print("{} | epoch {: >4}, LR={:.5f}, train: {:.6f}".format(
                    curr_time, epoch, LR, err_train))
            params['logfile'].write("{} {: >4} {:.6f} {:.8f}\n".format(
                curr_time, epoch, LR, err_train))
Exemplo n.º 15
0
def create_functions(model, data, rho, profmode):
    """Creates Theano functions for SGD backprop.

    Args:
        data: Dataset instance on which to train.
        rho: momentum parameter
        profmode: used only for profiling
    """

    print 'Compiling functions...',

    # allocate symbolic variables for the data
    index = T.lscalar('index')  # index to a [mini]batch
    lrate = T.fscalar('lrate')  # learning rate
    aug = T.ivector('aug')  # data augmentation (jitter, flip)
    in_dims = model.x_shape[2:]
    x = model.x
    y = model.y

    # Functions to calculate our training and validation error while we run
    functions = {}
    f_input = [index, aug]

    functions['train_E'] = theano.function(
        f_input,
        model.val_error,
        givens={
            x: NNl.get_batch(data.T[0], index, data.batch_n, in_dims, aug),
            y: NNl.get_batch_0(data.T[1], index, data.batch_n)
        },
        mode=profmode)

    if not data.test:
        functions['val_E'] = theano.function(
            f_input,
            model.val_error,
            givens={
                x: NNl.get_batch(data.V[0], index, data.batch_n, in_dims, aug),
                y: NNl.get_batch_0(data.V[1], index, data.batch_n)
            },
            mode=profmode)

    train_input = [index, lrate, aug]

    # Functions to update the model, via momentum-based SGD
    mom_updates = []
    p_updates = []
    for layer in model.layers[-1:0:-1]:
        grads = T.grad(model.cost, layer.params)

        for grad_i, param_i in zip(grads, layer.params):
            delta_i = theano.shared(param_i.get_value() * 0.)
            c_update = (delta_i, rho * delta_i - lrate * grad_i -
                        lrate * layer.l2decay * param_i)
            mom_updates.append(c_update)
            p_updates.append((param_i, param_i + delta_i))
    functions['momentum'] = theano.function(
        train_input,
        model.cost,
        updates=mom_updates,
        givens={
            x: NNl.get_batch(data.T[0], index, data.batch_n, in_dims, aug),
            y: NNl.get_batch_0(data.T[1], index, data.batch_n)
        },
        mode=profmode)
    functions['update'] = theano.function([], updates=p_updates, mode=profmode)

    print 'done'

    return functions
Exemplo n.º 16
0
def train(model, data, functions, params):
    """Generic routine to perform training on the GPU using Theano-compiled
    functions and common parameters.

    This will run through a specified number of 'epochs', each consisting of
    a full pass through the training data. The epochs are broken into batches
    as normal for Stochastic Gradient Descent.

    functions: A dictionary containing all of the necessary functions for
        training. It will at least have 'momentum', 'update', and 'train_E'
        functions. 'momentum' updates the delta for each parameter, 'update'
        applies the current delta, and 'train_E' gets the current training
        cost. For supervised training, 'val_E' will usually be included
        so you can keep track of your progress on the validation set.
    params: Necessary training params: LR, training_batches, n_epochs, verbose,
        validation_batches, error (links to where best error is tracked).
    """
    LR = params['LR']
    Nb = 0
    for chunk_i in range(len(data.b_samples)):
        Nb += params['t_batches'][chunk_i]

    print "Training {} epochs at LR = {} rho = {}".format(
            params['n_epochs'], LR, params['rho'])
    print "Using schedule:", sorted(params['LRsched'].items())

    # reference augmentation for checking error (centered, no flip)
    T_aug = model.ref_aug

    # Main training loop
    start_time = time.clock()
    for epoch in range(params['n_epochs']):
        ct = 0

        for chunk_i in range(len(data.b_samples)):
            data.T[0].set_value(data.raw[chunk_i])
            data.T[1].set_value(np.asarray(data.labels[chunk_i], dtype=data.ltype))

            for batch_i in range(params['t_batches'][chunk_i]):
    
                functions['momentum'](batch_i, LR, model.gen_aug())
                functions['update']()

                if params['verb'] and (ct + batch_i + 1) % int(Nb / 5) == 0:
                    print '.',

            ct += params['t_batches'][chunk_i]

        # check the weight distribution
        model.param_status(epoch, output=open("wlog", 'a'))

        # compute error on test and validation set
        c_train_error = [functions['train_E'](i, T_aug) for i in xrange(
                params['t_batches'][-1])]

        if epoch in params['LRsched']:
            LR = params['LRsched'][epoch]

        err_train = np.mean(c_train_error)
        if 'val_E' in functions:
            c_val_error = [functions['val_E'](i, T_aug)
                    for i in xrange(params['v_batches'])]
            err_val = np.mean(c_val_error)
        else:
            err_val = err_train

        # if we achieved a new best validation score
        # save the model and best validation score
        if err_val < getattr(params['error'], "best_error"):
            if params['verb']:
                print 'S',
            setattr(params['error'], "best_error", err_val)
            model.save_model()

        else:
            print ' ',

        curr_time = NNl.nice_time(time.clock() - start_time)

        if 'val_E' in functions:
            if params['verb']:
                print("{} | epoch {: >4}, LR={:.4f}, train: {:.5f}, val: {:.5f}"
                        .format(curr_time, epoch, LR, err_train, err_val))
            else:
                print '.',
            params['logfile'].write("{} {: >4} {:.6f} {:.8f} {:.8f}\n".format(
                    curr_time, epoch, LR, err_train, err_val))
        else:
            if params['verb']:
                print("{} | epoch {: >4}, LR={:.5f}, train: {:.6f}".format(
                    curr_time, epoch, LR, err_train))
            params['logfile'].write("{} {: >4} {:.6f} {:.8f}\n".format(
                    curr_time, epoch, LR, err_train))
Exemplo n.º 17
0
def create_functions_ae(layer, training, activation, batch_size,
        rho, x, in_dims, profmode):
    """Creates the Theano functions necessary to train as an autoencoder.

    Args:
        layer: The layer (FC) on which training will occur.
        training: The set of training data (no labels, just features).
        activation: The function to use for 'reconstruction' activation. This
            will generally align with the distribution found in the input you
            are trying to reconstruct. For example, for mean-std normalized
            input data, you might use a Tanh function in your 1st layer. If
            that first layer has a ReLU neuronal activation, then your 2nd
            hidden layer would probably use the SoftReLU reconstruction
            activation.
        batch_size: size of the batches used for autoencoder training.
        rho: momentum parameter
        x: Theano variable input to the current layer.
        in_dims: shape of the input piped through 'x'.
        profmode: Flag for profiling

    Returns:
        Dictionary of functions for autoencoder training: cost, and 2 updates
    """

    print 'Compiling autoencoder functions...',

    # allocate symbolic variables for the data
    index = T.lscalar('index')  # index to a [mini]batch
    lrate = T.fscalar('lrate')  # learning rate
    aug = T.ivector('aug')      # data augmentation (jitter, flip)

    # Functions to calculate our training and validation error while we run
    functions = {}

    cost = layer.reconstruct_mse(activation)

    f_input = [index, aug]

    functions['train_E'] = theano.function(f_input, cost, givens={
            x: NNl.get_batch(training[0], index, batch_size, in_dims, aug)},
            mode=profmode)

    # create a list of gradients for all model parameters
    grads = T.grad(cost, layer.pt_params)

    train_input = [index, lrate, aug]

    # Functions to update the model, via momentum-based SGD
    mom_updates = []
    p_updates = []
    for grad_i, param_i in zip(grads, layer.pt_params):
        delta_i = theano.shared(param_i.get_value()*0.)
        c_update = (delta_i, rho * delta_i - lrate * grad_i)
        mom_updates.append(c_update)
        p_updates.append((param_i, param_i + delta_i))
    functions['momentum'] = theano.function(train_input, cost,
            updates=mom_updates, givens={x: NNl.get_batch(training[0],
                index, batch_size, in_dims, aug)}, mode=profmode)
    functions['update'] = theano.function([], updates=p_updates,
            mode=profmode)

    print 'done'

    return functions