示例#1
0
 def __init__(self):
     self.dt = 1
     self.xdim = 1
     self.udim = 1
     self.x = T.fcol()
     self.u = T.fcol()
     self.xu_flat = T.flatten(T.concatenate([self.x, self.u]))
     self.xu = self.xu_flat.dimshuffle(0, 'x')
示例#2
0
    def test_ndim_mismatch(self):
        data = self.rng.rand(5).astype('float32')
        x = f32sc(data)
        y = tensor.fcol('y')
        cond = theano.tensor.iscalar('cond')

        self.assertRaises(TypeError, ifelse, cond, x, y)
        self.assertRaises(TypeError, ifelse, cond, y, x)
示例#3
0
    def test_theano_grad(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            for sparse in [True, False]:
                batch_size, dim = self.rng.random_integers(2000, size=2)
                if sparse:
                    true_labels = np.zeros((batch_size, dim), np.float32)
                    for k, j in enumerate(self.rng.randint(dim, size=batch_size)):
                        true_labels[k, j] = 1.0
                else:
                    true_labels = self.rng.randint(dim, size=(batch_size, 1)).astype(np.int32)
                x = self.rng.randn(batch_size, dim).astype(np.float32)
                mask = (self.rng.rand(batch_size, 1) < 0.8).astype(np.float32)
                device_id = 0
                for with_mask in [False, True]:
                    # Theano model
                    th_x = T.fmatrix()
                    th_mask = T.fcol()
                    th_true_labels = T.fmatrix() if sparse else T.ivector()
                    if with_mask:
                        probs = T.nnet.softmax(th_mask * th_x)
                    else:
                        probs = T.nnet.softmax(th_x)
                    loss = T.mean(T.nnet.categorical_crossentropy(probs, th_true_labels))
                    if with_mask:
                        get_theano_grads = theano.function([th_x, th_true_labels, th_mask], T.grad(loss, wrt=th_x))
                        th_dL_dx = get_theano_grads(x, true_labels if sparse else true_labels[:, 0], mask)
                    else:
                        get_theano_grads = theano.function([th_x, th_true_labels], T.grad(loss, wrt=th_x))
                        th_dL_dx = get_theano_grads(x, true_labels if sparse else true_labels[:, 0])

                    # quagga model
                    x_gpu = Connector(Matrix.from_npa(x), device_id)
                    true_labels_gpu = Connector(Matrix.from_npa(true_labels))
                    mask_gpu = Connector(Matrix.from_npa(mask)) if with_mask else None
                    softmax_ce_block = SoftmaxCeBlock(x_gpu, true_labels_gpu, mask_gpu)
                    x_gpu.fprop()
                    true_labels_gpu.fprop()
                    if with_mask:
                        mask_gpu.fprop()
                    softmax_ce_block.fprop()
                    softmax_ce_block.bprop()
                    q_dL_dx = x_gpu.backward_matrix.to_host()

                    r.append(np.allclose(th_dL_dx, q_dL_dx))

        self.assertEqual(sum(r), len(r))
示例#4
0
def run_networklg(train_wins, train_wins_labels,
			test_wins, test_wins_labels,
			mode = "same_random",
			batch_size = None,
			weights = None):

	input_var = T.tensor3('inputs')
	target_var = T.fcol('targets')

	if (batch_size == None):
		batch_size = len(train_wins_labels)

	#print("Will create Lasagne network")
	netlg = create_networklg(input_var, sliding_window_length,
				mode = mode, weights = weights)

	#print("Will train network")
	validation_function = train_networklg(netlg, input_var, target_var,
				train_wins, train_wins_labels, batch_size)

	test_loss, predictions = test_networklg(netlg, input_var, target_var,
			validation_function, test_wins, test_wins_labels, batch_size)

	return test_loss, predictions
示例#5
0
def main(
        num_epochs=1,
        n_songs_train=1,
        n_songs_val=1,
        n_songs_test=1,
        batch_size=256,
        learning_rate=1e-4
    ):
    """
    Main function
    """

    # Theano config
    theano.config.floatX = 'float32'

    train, val, test = None, None, None
    try:
        train, val, test = use_preparsed_data(
            outputdir='/zap/tsob/audio/',
            )
    except:
        train, val, test = get_data(
            n_songs_train=n_songs_train,
            n_songs_val=n_songs_val,
            n_songs_test=n_songs_test,
            outputdir='/zap/tsob/audio/',
            seed=None
            )

    # Save the returned metadata
    np.savez('/zap/tsob/audio/metadata', train, val, test)

    # Print the dimensions
    print "Data dimensions:"
    for datapt in [train['Xshape'], train['yshape'],
                   val['Xshape'], val['yshape'],
                   test['Xshape'], test['yshape']]:
        print datapt

    # Parse dimensions
    n_train = train['yshape'][0]
    n_val = val['yshape'][0]
    n_test = test['yshape'][0]
    n_chan = train['Xshape'][1]
    n_feats = train['Xshape'][2]
    n_frames = train['Xshape'][3]

    print "n_train  = {0}".format(n_train)
    print "n_val    = {0}".format(n_val)
    print "n_test   = {0}".format(n_test)
    print "n_chan   = {0}".format(n_chan)
    print "n_feats  = {0}".format(n_feats)
    print "n_frames = {0}".format(n_frames)

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4(name='inputs')
    target_var = T.fcol(name='targets')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions..."),
    network = build_cnn(input_var)
    print("Done.")

    # Create a loss expression for training, i.e., a scalar objective we want to minimize
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.binary_hinge_loss(prediction, target_var)
    loss = loss.mean()

    # Create update expressions for training
    # Here, we'll use adam
    params  = lasagne.layers.get_all_params(
        network,
        trainable=True
    )
    updates = lasagne.updates.adam(
        loss,
        params,
        learning_rate=learning_rate,
        beta1=0.95,
        beta2=0.999,
        epsilon=1e-08
    )

    # Create a loss expression for validation/testing.
    # The crucial difference here is that we do a deterministic forward pass
    # through the network, disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)

    test_loss = lasagne.objectives.binary_hinge_loss(
        test_prediction,
        target_var
        )
    test_loss = test_loss.mean()

    test_pred_fn = theano.function(
        [input_var],
        test_prediction,
        allow_input_downcast=True
        )

    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function(
        [input_var, target_var],
        loss,
        updates=updates,
        mode=NanGuardMode(                                          #TODO remove
            nan_is_error=True, inf_is_error=True, big_is_error=True #TODO remove
            ),                                                      #TODO remove
        allow_input_downcast=True
    )

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function(
        [input_var, target_var],
        [test_loss, test_acc],
        allow_input_downcast=True
    )

    # Finally, launch the training loop.
    print("Starting training...")

    train_error_hist = []

    # We iterate over epochs:
    for epoch in range(num_epochs):

        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()

        for batch in iterate_minibatches(
            train, batch_size, shuffle=True
            ):
            inputs, targets = batch
            train_err_increment = train_fn(inputs, targets)
            train_err += train_err_increment
            train_error_hist.append(train_err_increment)
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(val, batch_size, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.8f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.8f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(
            val_acc / val_batches * 100))
    print("Done training.")

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    test_predictions = []
    for batch in iterate_minibatches(test, batch_size, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_predictions.append( test_pred_fn(inputs) )
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(
        test_acc / test_batches * 100))

    # Optionally, you could now dump the network weights to a file like this:
    timestr = str(time.time())
    np.savez('/zap/tsob/audio/model'+timestr+'.npz', *lasagne.layers.get_all_param_values(network))
    np.save('/zap/tsob/audio/train_error_hist'+timestr+'.npy', train_error_hist)
    np.save('/zap/tsob/audio/test_predictions'+timestr+'.npy', test_predictions)
    print "Wrote model to {0}, test error histogram to {1}, and test predictions to {2}".format(
        'model'+timestr+'.npz',
        'train_error_hist'+timestr+'.npy',
        'test_predictions'+timestr+'.npy'
        )
示例#6
0
    def buildModel(self):
        print(' -- Building...')
        x_init = sparse.csr_matrix('x', dtype='float32')
        y_init = T.imatrix('y')
        g_init = T.imatrix('g')
        ind_init = T.ivector('ind')
        sub_path_init = T.imatrix('subPathsBatch')
        mask_init = T.fmatrix('subMask')

        # step train
        x_input = lgl.InputLayer(shape=(None, self.x.shape[1]),
                                 input_var=x_init)
        g_input = lgl.InputLayer(shape=(None, 2), input_var=g_init)
        ind_input = lgl.InputLayer(shape=(None, ), input_var=ind_init)
        pair_second = lgl.SliceLayer(g_input, indices=1, axis=1)
        pair_first = lgl.SliceLayer(g_input, indices=0, axis=1)
        pair_first_emd = lgl.EmbeddingLayer(pair_first,
                                            input_size=self.num_ver,
                                            output_size=self.embedding_size)
        emd_to_numver = layers.DenseLayer(
            pair_first_emd,
            self.num_ver,
            nonlinearity=lg.nonlinearities.softmax)
        index_emd = lgl.EmbeddingLayer(ind_input,
                                       input_size=self.num_ver,
                                       output_size=self.embedding_size,
                                       W=pair_first_emd.W)
        x_to_ydim = layers.SparseLayer(x_input,
                                       self.y.shape[1],
                                       nonlinearity=lg.nonlinearities.softmax)
        index_emd = layers.DenseLayer(index_emd,
                                      self.y.shape[1],
                                      nonlinearity=lg.nonlinearities.softmax)
        concat_two = lgl.ConcatLayer([x_to_ydim, index_emd], axis=1)
        concat_two = layers.DenseLayer(concat_two,
                                       self.y.shape[1],
                                       nonlinearity=lg.nonlinearities.softmax)
        concat_two_output = lgl.get_output(concat_two)
        step_loss = lgo.categorical_crossentropy(concat_two_output,
                                                 y_init).mean()
        hid_loss = lgl.get_output(x_to_ydim)
        step_loss += lgo.categorical_crossentropy(hid_loss, y_init).mean()
        emd_loss = lgl.get_output(index_emd)
        step_loss += lgo.categorical_crossentropy(emd_loss, y_init).mean()
        step_params = [
            index_emd.W, index_emd.b, x_to_ydim.W, x_to_ydim.b, concat_two.W,
            concat_two.b
        ]
        step_updates = lg.updates.sgd(step_loss,
                                      step_params,
                                      learning_rate=self.step_learning_rate)
        self.step_train = theano.function([x_init, y_init, ind_init],
                                          step_loss,
                                          updates=step_updates,
                                          on_unused_input='ignore')
        self.test_fn = theano.function([x_init, ind_init],
                                       concat_two_output,
                                       on_unused_input='ignore')

        # supervised train
        fc_output = lgl.get_output(emd_to_numver)
        pair_second_output = lgl.get_output(pair_second)
        sup_loss = lgo.categorical_crossentropy(fc_output,
                                                pair_second_output).sum()
        sup_params = lgl.get_all_params(emd_to_numver, trainable=True)
        sup_updates = lg.updates.sgd(sup_loss,
                                     sup_params,
                                     learning_rate=self.sup_learning_rate)
        self.sup_train = theano.function([g_init],
                                         sup_loss,
                                         updates=sup_updates,
                                         on_unused_input='ignore')

        cross_entropy = lgo.categorical_crossentropy(fc_output,
                                                     pair_second_output)
        cross_entropy = T.reshape(cross_entropy, (1, self.unsup_batch_size),
                                  ndim=None)

        mask_input = lgl.InputLayer(shape=(None, self.window_size + 1),
                                    input_var=mask_init)
        subPath_in = lgl.InputLayer(shape=(None, self.window_size + 1),
                                    input_var=sub_path_init)
        sub_path_emd = lgl.EmbeddingLayer(subPath_in,
                                          input_size=self.num_ver,
                                          output_size=self.embedding_size,
                                          W=pair_first_emd.W)

        lstm_layer = lgl.LSTMLayer(sub_path_emd,
                                   self.lstm_hidden_units,
                                   grad_clipping=3,
                                   mask_input=mask_input)

        # handle path weight
        max1 = T.mean(lgl.get_output(lstm_layer), axis=1)
        max2 = T.mean(max1, axis=1)
        max2_init = T.fcol('max2')
        max2_init = T.reshape(max2, ((self.subpath_num, 1)))
        max2_input = lgl.InputLayer(shape=(self.subpath_num, 1),
                                    input_var=max2_init)
        max2_input = lgl.BatchNormLayer(max2_input)
        path_weight = lgl.get_output(max2_input)
        path_weight = lg.nonlinearities.sigmoid(path_weight)
        path_weight = 1 + 0.3 * path_weight

        # unsupervised train
        reweight_loss = T.dot(cross_entropy, path_weight)[0][0]
        lstm_params_all = lgl.get_all_params(lstm_layer, trainable=True)
        lstm_params = list(set(lstm_params_all).difference(set(sup_params)))
        lstm_updates = lg.updates.sgd(reweight_loss,
                                      lstm_params,
                                      learning_rate=0.01)
        self.lstm_fn = theano.function([sub_path_init, g_init, mask_init],
                                       reweight_loss,
                                       updates=lstm_updates,
                                       on_unused_input='ignore')
        alpha_updates = lg.updates.sgd(reweight_loss,
                                       sup_params,
                                       learning_rate=0.001)
        self.alpha_fn = theano.function([sub_path_init, g_init, mask_init],
                                        reweight_loss,
                                        updates=alpha_updates,
                                        on_unused_input='ignore')
        print(' -- Done!')
示例#7
0
def main(num_epochs=100, num_points=1200, compute_flag='cpu'):
    # Arguments passed as string need to be converted to int
    num_epochs = int(num_epochs)
    num_points = int(num_points)
    # Define name of output files
    results_file_name = 'exp_' + str(num_epochs) + '_' + str(
        num_points) + '_' + compute_flag + '.csv'
    network_file_name = 'network_' + str(num_epochs) + '_' + str(
        num_points) + '_' + compute_flag
    print 'Saving file to: %s' % results_file_name
    print 'Number of points: %d ' % num_points
    print 'Compute Flag: %s ' % compute_flag
    save_file(results_file_name)
    Deep_learner = DCNN_network.DCNN_network()
    # Define the input tensor
    input_var = T.tensor4('inputs')
    # Define the output tensor (in this case it is a real value or reflectivity)
    if compute_flag == 'gpu3_softmax':
        output_var = T.ivector('targets')
    else:
        output_var = T.fcol('targets')
    # User input to decide which experiment to run, cpu runs were performed
    # to check if the network was working correctly
    if compute_flag == 'cpu':
        network, l_hidden1 = Deep_learner.build_CNN(input_var)
    elif compute_flag == 'cpu2':
        network, l_hidden1 = Deep_learner.build_CNN_2(input_var)
    elif compute_flag == 'cpu3':
        network, l_hidden1 = Deep_learner.build_CNN_3(input_var)
    elif compute_flag == 'gpu2':
        print('gpu2 experiment')
        network, l_hidden1 = Deep_learner.build_DCNN_2(input_var)
    elif compute_flag == 'gpu3':
        print('gpu3 experiment')
        network, l_hidden1 = Deep_learner.build_DCNN_3(input_var)
    elif compute_flag == 'deep':
        network, l_hidden1 = Deep_learner.build_DCNN_deep(input_var)
    elif compute_flag == 'gpu3_softmax':
        network, l_hidden1 = Deep_learner.build_DCNN_3_softmax(input_var)
    else:
        network, l_hidden1 = Deep_learner.build_DCNN(input_var)

    train_prediction = lasagne.layers.get_output(network)
    test_prediction = lasagne.layers.get_output(network)
    if compute_flag == 'gpu3_softmax':
        loss = lasagne.objectives.categorical_crossentropy(
            train_prediction, output_var)
        loss = loss.mean()
    else:

        # Define the mean square error objective function
        loss = T.mean(
            lasagne.objectives.squared_error(train_prediction, output_var))

        test_loss = T.mean(
            lasagne.objectives.squared_error(test_prediction, output_var))
        # Add a l1 regulerization on the fully connected dense layer
        l1_penalty = regularize_layer_params(l_hidden1, l1)

        loss = loss + l1_penalty

        test_loss = loss + l1_penalty

    params = lasagne.layers.get_all_params(network, trainable=True)

    updates = lasagne.updates.nesterov_momentum(loss,
                                                params,
                                                learning_rate=0.0000001,
                                                momentum=0.9)

    train_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), output_var),
                       dtype=theano.config.floatX)
    # Define theano function which generates and compiles C code for the optimization problem
    train_fn = theano.function([input_var, output_var], [loss, train_acc],
                               updates=updates)

    #    test_fn = theano.function([input_var, output_var],test_loss, updates=updates)

    base_path = '/home/an67a/deep_nowcaster/data/dataset2/'
    training_set_list = os.listdir(base_path)
    training_set_list = filter(lambda x: x[-4:] == '.pkl' and 'val' not in x,
                               training_set_list)
    validation_set_list = os.listdir(base_path)
    validation_set_list = filter(lambda x: x[-4:] == '.pkl' and 'val' in x,
                                 validation_set_list)
    experiment_start_time = time.time()
    # Load Data Set
    DataSet = []
    print('Loading data set...')
    for file_name in training_set_list[:3]:
        print file_name
        temp_file = file(base_path + file_name, 'rb')
        X_train, Y_train = cPickle.load(temp_file)
        temp_file.close()
        Y_train = Y_train.reshape(-1, ).astype('uint8')
        DataSet.append((X_train, Y_train))

    print('Start training...')
    for epoch in range(num_epochs):
        print('Epoch number : %d ' % epoch)
        train_err = 0
        train_batches = 0
        train_acc = 0
        start_time = time.time()
        for data in DataSet:
            #        for file_name in training_set_list:
            #            print file_name
            #            temp_file = file(base_path + file_name,'rb')
            #            X_train,Y_train = cPickle.load(temp_file)
            #            Y_train = Y_train.astype('uint8')
            #            temp_file.close()
            for batch in iterate_minibatches(data[0],
                                             data[1],
                                             1059,
                                             shuffle=False):
                inputs, targets = batch
                err, acc = train_fn(inputs, targets)
                train_err += err
                train_acc += acc
                train_batches += 1
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(train_acc /
                                                          train_batches * 100))
        append_file(results_file_name, epoch + 1,
                    round(train_err / train_batches, 2),
                    round((train_acc / train_batches) * 100, 2))

        # Dump the network file every 100 epochs
        if (epoch + 1) % 100 == 0:
            print('creating network file')
            network_file = file(
                '/home/an67a/deep_nowcaster/output/' + network_file_name +
                '_' + str(epoch + 1) + '.pkl', 'wb')
            cPickle.dump(network,
                         network_file,
                         protocol=cPickle.HIGHEST_PROTOCOL)
            network_file.close()
    time_taken = round(time.time() - experiment_start_time, 2)
    print('The experiment took {:.3f}s'.format(time.time() -
                                               experiment_start_time))
    append_file(results_file_name, 'The experiment took', time_taken, 0)
示例#8
0
    def buildModel(self):
        print(' -- Building...')
        x_init = sparse.csr_matrix('x', dtype='float32')
        y_init = T.imatrix('y')
        gx_init = sparse.csr_matrix('gx', dtype='float32')
        gy_init = T.ivector('gy')
        gz_init = T.vector('gz')
        mask_init = T.fmatrix('subMask')

        # step train
        x_input = lgl.InputLayer(shape=(None, self.x.shape[1]),
                                 input_var=x_init)
        x_to_label = layers.SparseLayer(x_input, self.y.shape[1],
                                        nonlinearity=lg.nonlinearities.softmax)
        x_to_emd = layers.SparseLayer(x_input, self.embedding_size)
        W = x_to_emd.W
        x_to_emd = layers.DenseLayer(x_to_emd, self.y.shape[1],
                                     nonlinearity=lg.nonlinearities.softmax)
        x_concat = lgl.ConcatLayer([x_to_label, x_to_emd], axis=1)
        x_concat = layers.DenseLayer(x_concat, self.y.shape[1],
                                     nonlinearity=lg.nonlinearities.softmax)
        pred = lgl.get_output(x_concat)
        step_loss = lgo.categorical_crossentropy(pred, y_init).mean()
        hid_loss = lgl.get_output(x_to_label)
        step_loss += lgo.categorical_crossentropy(hid_loss, y_init).mean()
        emd_loss = lgl.get_output(x_to_emd)
        step_loss += lgo.categorical_crossentropy(emd_loss, y_init).mean()
        step_params = lgl.get_all_params(x_concat)
        step_updates = lg.updates.sgd(step_loss, step_params,
                                      learning_rate=self.step_learning_rate)
        self.step_train = theano.function([x_init, y_init], step_loss,
                                          updates=step_updates)
        self.test_fn = theano.function([x_init], pred)

        # supervised train
        gx_input = lgl.InputLayer(shape=(None, self.x.shape[1]),
                                  input_var=gx_init)
        gx_to_emd = layers.SparseLayer(gx_input, self.embedding_size, W=W)
        gx_to_emd = lgl.DenseLayer(gx_to_emd, self.num_ver,
                                   nonlinearity=lg.nonlinearities.softmax)
        gx_pred = lgl.get_output(gx_to_emd)
        g_loss = lgo.categorical_crossentropy(gx_pred, gy_init).sum()
        sup_params = lgl.get_all_params(gx_to_emd)
        sup_updates = lg.updates.sgd(g_loss, sup_params,
                                     learning_rate=self.sup_learning_rate)
        self.sup_train = theano.function([gx_init, gy_init, gz_init], g_loss,
                                         updates=sup_updates,
                                         on_unused_input='ignore')

        # handle lstm input
        cross_entropy = lgo.categorical_crossentropy(gx_pred, gy_init)
        cross_entropy = T.reshape(cross_entropy, (1, self.subpath_num), ndim=None)
        mask_input = lgl.InputLayer(shape=(None, self.window_size + 1),
                                    input_var=mask_init)
        sub_path_batch1 = sparse.csr_matrix('x', dtype='float32')
        sub_path_input1 = lgl.InputLayer(shape=(None, self.x.shape[1]),
                                         input_var=sub_path_batch1)
        sub_path_batch2 = sparse.csr_matrix('x', dtype='float32')
        sub_path_input2 = lgl.InputLayer(shape=(None, self.x.shape[1]),
                                         input_var=sub_path_batch2)
        sub_path_batch3 = sparse.csr_matrix('x', dtype='float32')
        sub_path_input3 = lgl.InputLayer(shape=(None, self.x.shape[1]),
                                         input_var=sub_path_batch3)
        sub_path_batch4 = sparse.csr_matrix('x', dtype='float32')
        sub_path_input4 = lgl.InputLayer(shape=(None, self.x.shape[1]),
                                         input_var=sub_path_batch4)
        sub_path_emd1 = layers.SparseLayer(sub_path_input1, self.embedding_size,
                                           W=W)
        sub_path_emd1 = T.reshape(lgl.get_output(sub_path_emd1),
                                  (self.subpath_num, 1, self.embedding_size))
        sub_path_emd2 = layers.SparseLayer(sub_path_input2,
                                           self.embedding_size, W=W)
        sub_path_emd2 = T.reshape(lgl.get_output(sub_path_emd2),
                                  (self.subpath_num, 1, self.embedding_size))
        sub_path_emd3 = layers.SparseLayer(sub_path_input3, self.embedding_size,
                                           W=W)
        sub_path_emd3 = T.reshape(lgl.get_output(sub_path_emd3),
                                  (self.subpath_num, 1, self.embedding_size))
        sub_path_emd4 = layers.SparseLayer(sub_path_input4, self.embedding_size,
                                           W=W)
        sub_path_emd4 = T.reshape(lgl.get_output(sub_path_emd4),
                                  (self.subpath_num, 1, self.embedding_size))
        sub_path_concat = T.concatenate([sub_path_emd1, sub_path_emd2,
                                         sub_path_emd3, sub_path_emd4], axis=1)
        sub_path_concat_layer = lgl.InputLayer(shape=(None, self.window_size + 1,
                                                      self.embedding_size),
                                               input_var=sub_path_concat)

        # lstm layer
        lstm_layer = lgl.LSTMLayer(sub_path_concat_layer,
                                   self.lstm_hidden_units,
                                   grad_clipping=3,
                                   mask_input=mask_input)

        # handle path weight
        max1 = T.mean(lgl.get_output(lstm_layer), axis=1)
        max2 = T.mean(max1, axis=1)
        max2_init = T.fcol('max2')
        max2_init = T.reshape(max2, ((self.subpath_num, 1)))
        max2_input = lgl.InputLayer(shape=(self.subpath_num, 1),
                                    input_var=max2_init)
        max2_input = lgl.BatchNormLayer(max2_input)
        path_weight = lgl.get_output(max2_input)
        path_weight = lg.nonlinearities.sigmoid(path_weight)
        path_weight = 1 + 0.3 * path_weight

        # unsupervised train
        reweight_loss = T.dot(cross_entropy, path_weight)[0][0]
        lstm_params = lgl.get_all_params(lstm_layer, trainable=True)
        lstm_updates = lg.updates.sgd(reweight_loss, lstm_params,
                                      learning_rate=0.01)
        self.lstm_fn = theano.function([gx_init, gy_init, gz_init,
                                        sub_path_batch1, sub_path_batch2,
                                        sub_path_batch3, sub_path_batch4,
                                        mask_init],
                                       reweight_loss,
                                       updates=lstm_updates,
                                       on_unused_input='ignore')
        alpha_updates = lg.updates.sgd(reweight_loss, sup_params,
                                       learning_rate=0.001)
        self.alpha_fn = theano.function([gx_init, gy_init, gz_init,
                                         sub_path_batch1, sub_path_batch2,
                                         sub_path_batch3, sub_path_batch4,
                                         mask_init],
                                        reweight_loss,
                                        updates=alpha_updates,
                                        on_unused_input='ignore')

        print(' -- Done!')
示例#9
0
                                    outputLayerSize)).astype("float32"),
                   name="W2")
b2 = theano.shared(np.zeros(outputLayerSize).astype("float32"), name="b2")
#%%
# Forward propogation
X = T.matrix("X")
z1 = T.dot(X, W1) + b1
a1 = T.nnet.sigmoid(z1)
z2 = T.dot(a1, W2) + b2
# using ReLu improve the results a lot
y_hat = T.nnet.relu(z2)
forward = theano.function([X], y_hat)
#%%
# cost function, gradient and optimizer
epsilon = 0.01
y = T.fcol("y")
loss = 0.5 * ((y - y_hat)**2).sum()
calloss = theano.function([X, y], loss)
# gradinet
dW1, dW2 = T.grad(loss, [W1, W2])
db1, db2 = T.grad(loss, [b1, b2])
# optimizer
#%%
train = theano.function(inputs=[X, y],
                        outputs=[y_hat, loss],
                        updates=[[W2, W2 - epsilon * dW2],
                                 [W1, W1 - epsilon * dW1],
                                 [b2, b2 - epsilon * db2],
                                 [b1, b1 - epsilon * db1]])
#%%
cost = []
def conv_net(tr_block,val_block,num_epochs,exp_no,load_model_weights = False,model_file_name = ''):
    #------------------------------------------
    # Model
    
    input_var_ipw = T.tensor4('inputs')
    
    input_var_refl = T.tensor4('inputs')
    
    target_var = T.fcol('targets')
    
    net,l1_hidden = build_DCNN_softmax_mod_special_refl(input_var_refl)
    
#    net,l1_hidden = build_2DCNN_softmax_special(input_var_ipw,input_var_refl)
    
    l2_penelty = regularize_layer_params(l1_hidden,l2)
    
    prediction = lasagne.layers.get_output(net)
    
    loss = lasagne.objectives.squared_error(prediction,target_var)
    
#    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    
    loss = loss.mean() + l2_penelty
    
    if load_model_weights:
        print 'Loading existing model parameters...'
        model_file_name = '../data/1CNNneural_network_refl_' + str(exp_no) + '_100.pkl'
        model_file = file(model_file_name,'rb')
        model_weights = pkl.load(model_file)
        model_file.close()
        lasagne.layers.set_all_param_values(net,model_weights)
    
    params = lasagne.layers.get_all_params(net, trainable=True)
    
    updates = lasagne.updates.adadelta(loss, params)
    
#    updates = lasagne.updates.nesterov_momentum(
#            loss, params, learning_rate=0.0001, momentum=0.9)

    test_prediction = lasagne.layers.get_output(net, deterministic=True)
    
    test_loss = lasagne.objectives.squared_error(test_prediction,target_var)
    
#    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
#                                                            target_var)
    
    test_loss = test_loss.mean()
    
#    test_accuracy = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
#                      dtype=theano.config.floatX)
                
#    train_fn = theano.function([input_var_ipw,input_var_refl, target_var], loss, updates=updates)
    train_fn = theano.function([input_var_refl, target_var], loss, updates=updates)
    
#    val_fn = theano.function([input_var_ipw,input_var_refl, target_var], [test_loss])
    val_fn = theano.function([input_var_refl, target_var], [test_loss])
    #------------------------------------------
    performance_metrics = {}
    base_path = '/project/uma_michael_zink/deep_nowcaster/data/TrainTest/points_regression/' #'../data/TrainTest/points/'
    point_files = filter(lambda x: x[-4:] == '.pkl',os.listdir(base_path))
    val_indices = determine_indices(base_path + point_files[8],val_block)
    first_pass = True
#    val_batches = []
#    X_train_full_data = []
    for ep in range(num_epochs):
        performance_metrics[ep + 1] = []
        print 'Train Model for epoch: %d'%(ep)
        print '-'*50
        train_err = 0.
        train_batches = 0
        if first_pass:
            print 'Loading the entire data to memory!!!!'
            X_train, Y_train, X_val, Y_val = load_data_to_memory(point_files,val_indices)
            first_pass = False
            
#        X_mean_list = []
#        for ea_point in point_files[:2]:
#            temp_matrix = np.load(base_path + ea_point)
#            # Add this index for reflectivity features alone [:,4:,...]
#            X_train = np.vstack(map(lambda x: temp_matrix[x][1][:,4:,...],[i for i in range(len(temp_matrix)) if i not in val_indices])).astype('float')
#            Y_train = np.vstack(map(lambda x: temp_matrix[x][2],[i for i in range(len(temp_matrix)) if i not in val_indices]))
#            X_mean = X_train.mean(axis = 0)
#            X_train -= X_mean
#            if not first_pass:
#                X_val = np.vstack(map(lambda x: temp_matrix[x][1][:,4:,...],val_indices))
#                Y_val = np.vstack(map(lambda x: temp_matrix[x][2],val_indices))
#                val_batches.append((X_val,Y_val,X_mean))
#            # over here we are going to kill 1 sample at random and divide the 
#            # training examples to 4 batches
#            X_train_full_data.append((X_train,Y_train))
        
        
        for X_,Y_ in iterate_minibatches(X_train,Y_train, 250, shuffle = True):
#            train_err += train_fn(X_[:,:4,...].astype('float32'),X_[:,4:,...].astype('float32'),Y_.reshape(-1,1).astype('float32'))
            train_err += train_fn(X_[:,4:,...].astype('float32'),Y_.reshape(-1,1).astype('float32'))
            train_batches += 1
        print 'Number of batches %d '%train_batches
        print 'Training loss = %.6f'%(train_err/train_batches)
        val_loss = 0.
        
        val_batches_ctr = 0
#        if not first_pass:
#            
#            x_batch = np.vstack(map(lambda x: x[0] ,val_batches))
#            y_batch = np.vstack(map(lambda x: x[1],val_batches))
##            x_batch_means = np.vstack(map(lambda x: x[2],val_batches))
#            del val_batches
#        x_batch -= X_mean
        
        for x_val_batch,y_val_batch in iterate_minibatches(X_val,Y_val,250,shuffle = True):
#            x_val_batch = x_val_batch.astype('float')
#            temp = val_fn(x_val_batch[:,:4,...].astype('float32'),x_val_batch[:,4:,...].astype('float32'),y_val_batch.reshape(-1,1).astype('float32'))
            temp = val_fn(x_val_batch[:,4:,...].astype('float32'),y_val_batch.reshape(-1,1).astype('float32'))
#            val_acc += temp[0]
            val_loss += temp[0]
            val_batches_ctr+=1

        print 'Number of validation batches %d'%val_batches_ctr
        
#        print 'Validation accuracy for epoch %d = %.6f'%(ep,val_acc/val_batches_ctr)
        print 'Validation loss for epoch %d = %.6f'%(ep,val_loss/val_batches_ctr)
        
        performance_metrics[ep + 1].append({'val_loss': val_loss / val_batches_ctr,
#                                            'val_acc' : val_acc / val_batches_ctr,
                                            'train_loss' : train_err / train_batches})
        
        if (ep+ 1) % 10 == 0:
            params = convert_gpu_cpu(net)
            network_file_name = '1CNN_0maxpool_2048neural_network_p20_mod_special_refl8_regression_adadelta'
            network_file = file('../output/'+ network_file_name + '_' + str(exp_no) +'_' + str(ep + 1) + '.pkl','wb')
            pkl.dump(params,network_file,protocol = pkl.HIGHEST_PROTOCOL)
            network_file.close()
            f1 = file('../output/performance_metrics_' + network_file_name + '_' + str(exp_no) + '.pkl','wb')
            pkl.dump(performance_metrics,f1,protocol = pkl.HIGHEST_PROTOCOL)
            f1.close()
示例#11
0
    def __init__(self, stateSize, actionSize, numFrames, batchSize, discount,
                 rho, momentum, learningRate, rmsEpsilon, rng, updateRule,
                 batchAccumulator, freezeInterval):
        self.stateSize = stateSize
        self.actionSize = actionSize
        self.numFrames = numFrames
        self.batchSize = batchSize
        self.discount = discount
        self.rho = rho
        self.momentum = momentum
        self.learningRate = learningRate
        self.rmsEpsilon = rmsEpsilon
        self.rng = rng
        self.updateRule = updateRule
        self.batchAccumulator = batchAccumulator
        self.freezeInterval = freezeInterval

        lasagne.random.set_rng(self.rng)

        self.updateCounter = 0

        self.lOut = self.buildNetwork(self.stateSize, self.actionSize,
                                      self.numFrames, self.batchSize)

        if self.freezeInterval > 0:
            self.nextLOut = self.buildNetwork(self.stateSize, self.actionSize,
                                              self.numFrames, self.batchSize)
            self.resetQHat()

        states = T.ftensor3('states')
        nextStates = T.ftensor3('nextStates')
        rewards = T.fcol('rewards')
        actions = T.icol('actions')
        terminals = T.icol('terminals')

        # Shared variables for teaching from a minibatch of replayed
        # state transitions, each consisting of num_frames + 1 (due to
        # overlap) states, along with the chosen action and resulting
        # reward and termninal status.
        self.states_shared = theano.shared(
            numpy.zeros((self.batchSize, self.numFrames + 1, self.stateSize),
                        dtype=theano.config.floatX))
        self.rewards_shared = theano.shared(numpy.zeros(
            (self.batchSize, 1), dtype=theano.config.floatX),
                                            broadcastable=(False, True))
        self.actions_shared = theano.shared(numpy.zeros((self.batchSize, 1),
                                                        dtype='int32'),
                                            broadcastable=(False, True))
        self.terminals_shared = theano.shared(numpy.zeros((self.batchSize, 1),
                                                          dtype='int32'),
                                              broadcastable=(False, True))

        # Shared variable for a single state, to calculate qVals
        self.state_shared = theano.shared(
            numpy.zeros((self.numFrames, self.stateSize),
                        dtype=theano.config.floatX))

        qVals = lasagne.layers.get_output(self.lOut, states)

        if self.freezeInterval > 0:
            nextQVals = lasagne.layers.get_output(self.nextLOut, nextStates)
        else:
            nextQVals = lasagne.layers.get_output(self.lOut, nextStates)
            nextQVals = theano.gradient.disconnected_grad(nextQVals)

        # Cast terminals to floatX
        terminalsX = terminals.astype(theano.config.floatX)
        # T.eq(a,b) returns a variable representing the nogical
        # EQuality (a==b)
        actionmask = T.eq(
            T.arange(self.actionSize).reshape((1, -1)), actions.reshape(
                (-1, 1))).astype(theano.config.floatX)

        target = (rewards + (T.ones_like(terminalsX) - terminalsX) *
                  self.discount * T.max(nextQVals, axis=1, keepdims=True))
        output = (qVals * actionmask).sum(axis=1).reshape((-1, 1))
        diff = target - output

        # no if clip delta, since clip-delta=0

        loss = (diff**2)

        if self.batchAccumulator == 'sum':
            loss = T.sum(loss)
        elif self.batchAccumulator == 'mean':
            loss = T.mean(loss)
        else:
            raise ValueError('Bad accumulator: {}'.format(batch_accumulator))

        params = lasagne.layers.helper.get_all_params(self.lOut)
        train_givens = {
            states: self.states_shared[:, :-1],
            nextStates: self.states_shared[:, 1:],
            rewards: self.rewards_shared,
            actions: self.actions_shared,
            terminals: self.terminals_shared
        }

        if self.updateRule == 'rmsprop':
            updates = lasagne.updates.rmsprop(loss, params, self.learningRate,
                                              self.rho, self.rmsEpsilon)

        elif self.updateRule == 'deepmind_rmsprop':
            updates = deepmind_rmsprop(loss, params, self.learningRate,
                                       self.rho, self.rmsEpsilon)
        else:
            raise ValueError('Unrecognized update: {}'.format(updateRule))

        if self.momentum > 0:
            updates = lasagne.updates.apply_momentum(updates, None,
                                                     self.momentum)

        self._train = theano.function([], [loss],
                                      updates=updates,
                                      givens=train_givens)
        q_givens = {
            states: self.state_shared.reshape(
                (1, self.numFrames, self.stateSize))
        }

        # self._q_vals=theano.function([],qVals[0], givens=q_givens)
        self._q_vals = theano.function([], qVals[0], givens=q_givens)
def causalNeuralNetwork(data_dim, hidden_dim, output_dim, parameters):
    X = T.fcol('X')
    Y = T.fcol('Y')
    global XWh, Xbh, hWY, hbY
    XWh = theano.shared(np.random.uniform(-np.sqrt(1. / data_dim),
                                          np.sqrt(1. / data_dim),
                                          (data_dim, hidden_dim)),
                        name='XWh')
    Xbh = theano.shared(np.random.uniform(-np.sqrt(1. / hidden_dim),
                                          np.sqrt(1. / hidden_dim),
                                          (hidden_dim)),
                        name='Xbh')
    hWY = theano.shared(np.random.uniform(-np.sqrt(1. / hidden_dim),
                                          np.sqrt(1. / hidden_dim),
                                          (hidden_dim, output_dim)),
                        name='hWY')
    hbY = theano.shared(np.random.uniform(-np.sqrt(1. / output_dim),
                                          np.sqrt(1. / output_dim),
                                          (output_dim)),
                        name='hbY')

    if (parameters['input_shift_to_tanh']):
        X = (X - .5) * 2.

    hidden = X.dot(XWh)
    if (parameters['use_bias']):
        hidden = hidden + Xbh
    if (parameters['hidden_activation'] == 1):
        hidden = T.tanh(hidden)
    elif (parameters['hidden_activation'] == 2):
        # ReLu doesn't work well with the intuition that negative values are also significant vs. interpretations as activations
        hidden = T.maximum(T.zeros_like(hidden), hidden)

    output = hidden.dot(hWY)
    if (parameters['use_bias']):
        output = output + hbY
    if (parameters['hidden_activation'] == 1):
        output = T.tanh(output)

    if (parameters['output_shift_to_prob']):
        output = (output / 2.) + .5

    weights_sum = (T.sum(abs(XWh)) + T.sum(abs(Xbh)) + T.sum(abs(hWY)) +
                   T.sum(abs(hbY)))
    factor = 2.
    causal = causal_loss(XWh, factor) + causal_loss(Xbh, factor) + causal_loss(
        hWY, factor) + causal_loss(hbY, factor)

    softOutput = T.minimum(T.ones_like(output) * 0.999999999999, output)
    softOutput = T.maximum(T.ones_like(output) * 0.000000000001, softOutput)

    # Predictions
    hidden_prediction = hidden > 0.
    if (parameters['loss_function'] == 0):
        loss = -T.sum(Y * T.log(softOutput) + (1. - Y) *
                      (T.log(1. - softOutput)))
    elif (parameters['loss_function'] == 1):
        loss = T.mean(T.sqr(Y - output))
    else:
        loss = -T.mean(Y * T.log(softOutput) + (1. - Y) *
                       (T.log(1. - softOutput)))

    if (parameters['loss_weights_sum']):
        loss += weights_sum
    if (parameters['loss_causal_linear']):
        loss += causal

    if (parameters['use_bias']):
        var_list = [XWh, Xbh, hWY, hbY]
    else:
        var_list = [XWh, hWY]

    gradients = T.grad(loss, var_list)
    #     updates = lasagne.updates.rmsprop(gradients, var_list, learning_rate=learning_rate);
    updates = lasagne.updates.nesterov_momentum(gradients,
                                                var_list,
                                                learning_rate=0.01)

    sgd = theano.function([X, Y], [loss], updates=updates)
    predict = theano.function([X], [output, weights_sum])
    graph = theano.function([X],
                            [output, hidden_prediction, hidden, weights_sum])

    return sgd, predict, graph
def conv_net(tr_block,
             val_block,
             num_epochs,
             exp_no,
             load_model_weights=False,
             model_file_name=''):
    #------------------------------------------
    # Model

    input_var_ipw = T.tensor4('inputs')

    input_var_refl = T.tensor4('inputs')

    target_var = T.fcol('targets')

    net, l1_hidden = build_DCNN_softmax_mod_special_refl(input_var_refl)

    #    net,l1_hidden = build_2DCNN_softmax_special(input_var_ipw,input_var_refl)

    l2_penelty = regularize_layer_params(l1_hidden, l2)

    prediction = lasagne.layers.get_output(net)

    loss = lasagne.objectives.squared_error(prediction, target_var)

    #    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)

    loss = loss.mean() + l2_penelty

    if load_model_weights:
        print 'Loading existing model parameters...'
        model_file_name = '../data/1CNNneural_network_refl_' + str(
            exp_no) + '_100.pkl'
        model_file = file(model_file_name, 'rb')
        model_weights = pkl.load(model_file)
        model_file.close()
        lasagne.layers.set_all_param_values(net, model_weights)

    params = lasagne.layers.get_all_params(net, trainable=True)

    updates = lasagne.updates.adadelta(loss, params)

    #    updates = lasagne.updates.nesterov_momentum(
    #            loss, params, learning_rate=0.0001, momentum=0.9)

    test_prediction = lasagne.layers.get_output(net, deterministic=True)

    test_loss = lasagne.objectives.squared_error(test_prediction, target_var)

    #    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
    #                                                            target_var)

    test_loss = test_loss.mean()

    #    test_accuracy = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
    #                      dtype=theano.config.floatX)

    #    train_fn = theano.function([input_var_ipw,input_var_refl, target_var], loss, updates=updates)
    train_fn = theano.function([input_var_refl, target_var],
                               loss,
                               updates=updates)

    #    val_fn = theano.function([input_var_ipw,input_var_refl, target_var], [test_loss])
    val_fn = theano.function([input_var_refl, target_var], [test_loss])
    #------------------------------------------
    performance_metrics = {}
    base_path = '/project/uma_michael_zink/deep_nowcaster/data/TrainTest/points_regression/'  #'../data/TrainTest/points/'
    point_files = filter(lambda x: x[-4:] == '.pkl', os.listdir(base_path))
    val_indices = determine_indices(base_path + point_files[8], val_block)
    first_pass = True
    #    val_batches = []
    #    X_train_full_data = []
    for ep in range(num_epochs):
        performance_metrics[ep + 1] = []
        print 'Train Model for epoch: %d' % (ep)
        print '-' * 50
        train_err = 0.
        train_batches = 0
        if first_pass:
            print 'Loading the entire data to memory!!!!'
            X_train, Y_train, X_val, Y_val = load_data_to_memory(
                point_files, val_indices)
            first_pass = False


#        X_mean_list = []
#        for ea_point in point_files[:2]:
#            temp_matrix = np.load(base_path + ea_point)
#            # Add this index for reflectivity features alone [:,4:,...]
#            X_train = np.vstack(map(lambda x: temp_matrix[x][1][:,4:,...],[i for i in range(len(temp_matrix)) if i not in val_indices])).astype('float')
#            Y_train = np.vstack(map(lambda x: temp_matrix[x][2],[i for i in range(len(temp_matrix)) if i not in val_indices]))
#            X_mean = X_train.mean(axis = 0)
#            X_train -= X_mean
#            if not first_pass:
#                X_val = np.vstack(map(lambda x: temp_matrix[x][1][:,4:,...],val_indices))
#                Y_val = np.vstack(map(lambda x: temp_matrix[x][2],val_indices))
#                val_batches.append((X_val,Y_val,X_mean))
#            # over here we are going to kill 1 sample at random and divide the
#            # training examples to 4 batches
#            X_train_full_data.append((X_train,Y_train))

        for X_, Y_ in iterate_minibatches(X_train, Y_train, 250, shuffle=True):
            #            train_err += train_fn(X_[:,:4,...].astype('float32'),X_[:,4:,...].astype('float32'),Y_.reshape(-1,1).astype('float32'))
            train_err += train_fn(X_[:, 4:, ...].astype('float32'),
                                  Y_.reshape(-1, 1).astype('float32'))
            train_batches += 1
        print 'Number of batches %d ' % train_batches
        print 'Training loss = %.6f' % (train_err / train_batches)
        val_loss = 0.

        val_batches_ctr = 0
        #        if not first_pass:
        #
        #            x_batch = np.vstack(map(lambda x: x[0] ,val_batches))
        #            y_batch = np.vstack(map(lambda x: x[1],val_batches))
        ##            x_batch_means = np.vstack(map(lambda x: x[2],val_batches))
        #            del val_batches
        #        x_batch -= X_mean

        for x_val_batch, y_val_batch in iterate_minibatches(X_val,
                                                            Y_val,
                                                            250,
                                                            shuffle=True):
            #            x_val_batch = x_val_batch.astype('float')
            #            temp = val_fn(x_val_batch[:,:4,...].astype('float32'),x_val_batch[:,4:,...].astype('float32'),y_val_batch.reshape(-1,1).astype('float32'))
            temp = val_fn(x_val_batch[:, 4:, ...].astype('float32'),
                          y_val_batch.reshape(-1, 1).astype('float32'))
            #            val_acc += temp[0]
            val_loss += temp[0]
            val_batches_ctr += 1

        print 'Number of validation batches %d' % val_batches_ctr

        #        print 'Validation accuracy for epoch %d = %.6f'%(ep,val_acc/val_batches_ctr)
        print 'Validation loss for epoch %d = %.6f' % (ep, val_loss /
                                                       val_batches_ctr)

        performance_metrics[ep + 1].append({
            'val_loss':
            val_loss / val_batches_ctr,
            #                                            'val_acc' : val_acc / val_batches_ctr,
            'train_loss':
            train_err / train_batches
        })

        if (ep + 1) % 10 == 0:
            params = convert_gpu_cpu(net)
            network_file_name = '1CNN_0maxpool_2048neural_network_p20_mod_special_refl8_regression_adadelta'
            network_file = file(
                '../output/' + network_file_name + '_' + str(exp_no) + '_' +
                str(ep + 1) + '.pkl', 'wb')
            pkl.dump(params, network_file, protocol=pkl.HIGHEST_PROTOCOL)
            network_file.close()
            f1 = file(
                '../output/performance_metrics_' + network_file_name + '_' +
                str(exp_no) + '.pkl', 'wb')
            pkl.dump(performance_metrics, f1, protocol=pkl.HIGHEST_PROTOCOL)
            f1.close()
示例#14
0
	def __init__(self,x_train,dim_z=10,batch_size = 10,filter_no = [5.,5.,5.],filter_l = [10.,10.,10.],
		pooling_d=3,pooling_s=2,learning_rate = 0.0008,dim_y=None,y_train=None,diff=None,magic=5000):
		####################################### SETTINGS ###################################
		self.x_train = x_train
		self.y_train = y_train
		if y_train !=None:
			self.dim_y = dim_y
		self.diff=diff
		self.batch_size = batch_size
		self.learning_rate = theano.shared(np.float32(learning_rate))
		self.performance = {"train":[]}
		self.inpt = T.ftensor4(name='input')
		self.Y = T.fcol(name= 'label')
		self.df = T.fmatrix(name='differential')
		self.dim_z = dim_z
		self.magic =magic
		self.pooling_d = pooling_d
		self.pooling_s = pooling_s
		self.generative_z = theano.shared(np.float32(np.zeros([1,dim_z])))
		self.generative_hid = theano.shared(np.float32(np.zeros([1,magic])))
		self.activation =relu
		self.out_distribution=False
		self.in_filters = filter_l
		self.filter_lengths = filter_no
		self.params = []


		self.d_o_prob = theano.shared(np.float32(0.0))
		####################################### LAYERS ######################################
		# LAYER 1 ##############################
		self.conv1 = one_d_conv_layer(self.inpt,self.in_filters[0],1,self.filter_lengths[0],param_names = ["W1",'b1']) 
		self.params+=self.conv1.params
		self.bn1 = batchnorm(self.conv1.output)
		self.nl1 = self.activation(self.bn1.X)
		self.maxpool1 = ds.max_pool_2d(self.nl1,[self.pooling_d,1],st=[self.pooling_s,1],ignore_border = False).astype(theano.config.floatX)
		self.layer1_out = dropout(self.maxpool1,self.d_o_prob)
		self.flattened = T.flatten(self.layer1_out,outdim = 2)
		# Conditional +variational layer layer #####################
		if y_train != None:
			self.c_enc =hidden_layer(self.Y,1,self.dim_y)
			self.c_dec = hidden_layer(self.Y,1,self.dim_y,param_names = ["W10",'b10'])
			self.params+=self.c_enc.params
			self.params+=self.c_dec.params
			self.c_nl = self.activation(self.c_enc.output)
			self.c_nl_dec = self.activation(self.c_dec.output)
			self.concatenated = T.concatenate((self.flattened,self.c_nl),axis = 1)
			self.latent_layer = variational_gauss_layer(self.concatenated,self.magic+self.dim_y,dim_z)
		else:
			self.latent_layer = variational_gauss_layer(self.flattened,self.magic,dim_z)
		self.params+=self.latent_layer.params
		self.latent_out = self.latent_layer.output
		# Hidden Layer #########################
		if y_train!= None:
			self.dec_concat = T.concatenate((self.latent_out,self.c_nl_dec),axis = 1)
			self.hidden_layer = hidden_layer(self.dec_concat,self.dim_z+self.dim_y,self.magic)
		else:
			self.hidden_layer = hidden_layer(self.latent_out,dim_z,self.magic)
		self.params+=self.hidden_layer.params
		self.hid_out = dropout(self.activation(self.hidden_layer.output).reshape((self.inpt.shape[0],self.in_filters[-1],int(self.magic/self.in_filters[-1]),1)),self.d_o_prob)
		# Devonvolutional 1 ######################
		self.deconv1 = one_d_deconv_layer(self.hid_out,1,self.in_filters[2],self.filter_lengths[2],pool=self.pooling_d,param_names = ["W3",'b3'],distribution=False)
		self.params+=self.deconv1.params
		#self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic)
		self.tanh_out = self.deconv1.output
		self.last_layer = self.deconv1

		if self.out_distribution==True:
			self.trunk_sigma =  self.last_layer.log_sigma[:,:,:self.inpt.shape[2],:]
		self.trunc_output = self.tanh_out[:,:,:self.inpt.shape[2],:]
		self.cost = self.MSE()
		self.mse = self.MSE()
		#self.likelihood = self.log_px_z()
		#self.get_cost = theano.function([self.inpt],[self.cost,self.mse])

		#self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
		self.derivatives = T.grad(self.cost,self.params)
		#self.get_gradients = theano.function([self.inpt],self.derivatives)
		self.updates =adam(self.params,self.derivatives,self.learning_rate)
		
		################################### FUNCTIONS ######################################################
		#self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior])
		#self.get_prior = theano.function([self.inpt],self.latent_layer.prior)
		#self.convolve1 = theano.function([self.inpt],self.layer1_out)
		#self.convolve2 = theano.function([self.inpt],self.layer2_out)
		#self.deconvolve1 = theano.function([self.inpt],self.deconv1.output)
		#self.deconvolve2 = theano.function([self.inpt],self.deconv2.output)
		#self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2))
		#self.output = theano.function([self.inpt],self.trunc_output,givens=[[self.dropout_symbolic,self.dropout_prob]])
		#self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
		#self.get_cost = theano.function([self.inpt],[self.cost,self.mse])
		#self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
		#self.get_gradients = theano.function([self.inpt],self.derivatives)

		self.generate_from_hid = theano.function([self.inpt],self.trunc_output,givens = [[self.hidden_layer.output,self.generative_hid]])
		self.get_flattened = theano.function([self.inpt],self.flattened)
		if self.y_train!=None:
			self.generate_from_z = theano.function([self.inpt,self.Y],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
			self.train_model = theano.function(inputs = [self.inpt,self.df,self.Y],outputs = self.cost,updates = self.updates)
			self.get_latent_states = theano.function([self.inpt,self.Y],self.latent_out)
			self.get_c_enc = theano.function([self.Y],self.c_enc.output)
			self.output = theano.function([self.inpt,self.Y],self.trunc_output)
			self.get_concat = theano.function([self.inpt,self.Y],self.concatenated)
		else:
			self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
			self.train_model = theano.function(inputs = [self.inpt,self.df],outputs = self.cost,updates = self.updates)
			self.output = theano.function([self.inpt],self.trunc_output)
			self.get_latent_states = theano.function([self.inpt],self.latent_out)
示例#15
0
def main(num_epochs=1,
         n_songs_train=1,
         n_songs_val=1,
         n_songs_test=1,
         batch_size=256,
         learning_rate=1e-4):
    """
    Main function
    """

    # Theano config
    theano.config.floatX = 'float32'

    train, val, test = None, None, None
    try:
        train, val, test = use_preparsed_data(outputdir='/zap/tsob/audio/', )
    except:
        train, val, test = get_data(n_songs_train=n_songs_train,
                                    n_songs_val=n_songs_val,
                                    n_songs_test=n_songs_test,
                                    outputdir='/zap/tsob/audio/',
                                    seed=None)

    # Save the returned metadata
    np.savez('/zap/tsob/audio/metadata', train, val, test)

    # Print the dimensions
    print "Data dimensions:"
    for datapt in [
            train['Xshape'], train['yshape'], val['Xshape'], val['yshape'],
            test['Xshape'], test['yshape']
    ]:
        print datapt

    # Parse dimensions
    n_train = train['yshape'][0]
    n_val = val['yshape'][0]
    n_test = test['yshape'][0]
    n_chan = train['Xshape'][1]
    n_feats = train['Xshape'][2]
    n_frames = train['Xshape'][3]

    print "n_train  = {0}".format(n_train)
    print "n_val    = {0}".format(n_val)
    print "n_test   = {0}".format(n_test)
    print "n_chan   = {0}".format(n_chan)
    print "n_feats  = {0}".format(n_feats)
    print "n_frames = {0}".format(n_frames)

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor4(name='inputs')
    target_var = T.fcol(name='targets')

    # Create neural network model (depending on first command line parameter)
    print("Building model and compiling functions..."),
    network = build_cnn(input_var)
    print("Done.")

    # Create a loss expression for training, i.e., a scalar objective we want to minimize
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.binary_hinge_loss(prediction, target_var)
    loss = loss.mean()

    # Create update expressions for training
    # Here, we'll use adam
    params = lasagne.layers.get_all_params(network, trainable=True)
    updates = lasagne.updates.adam(loss,
                                   params,
                                   learning_rate=learning_rate,
                                   beta1=0.95,
                                   beta2=0.999,
                                   epsilon=1e-08)

    # Create a loss expression for validation/testing.
    # The crucial difference here is that we do a deterministic forward pass
    # through the network, disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)

    test_loss = lasagne.objectives.binary_hinge_loss(test_prediction,
                                                     target_var)
    test_loss = test_loss.mean()

    test_pred_fn = theano.function([input_var],
                                   test_prediction,
                                   allow_input_downcast=True)

    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function(
        [input_var, target_var],
        loss,
        updates=updates,
        mode=NanGuardMode(  #TODO remove
            nan_is_error=True,
            inf_is_error=True,
            big_is_error=True  #TODO remove
        ),  #TODO remove
        allow_input_downcast=True)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc],
                             allow_input_downcast=True)

    # Finally, launch the training loop.
    print("Starting training...")

    train_error_hist = []

    # We iterate over epochs:
    for epoch in range(num_epochs):

        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()

        for batch in iterate_minibatches(train, batch_size, shuffle=True):
            inputs, targets = batch
            train_err_increment = train_fn(inputs, targets)
            train_err += train_err_increment
            train_error_hist.append(train_err_increment)
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(val, batch_size, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        print("  training loss:\t\t{:.8f}".format(train_err / train_batches))
        print("  validation loss:\t\t{:.8f}".format(val_err / val_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(val_acc /
                                                          val_batches * 100))
    print("Done training.")

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = 0
    test_predictions = []
    for batch in iterate_minibatches(test, batch_size, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_predictions.append(test_pred_fn(inputs))
        test_err += err
        test_acc += acc
        test_batches += 1
    print("Final results:")
    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
    print("  test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100))

    # Optionally, you could now dump the network weights to a file like this:
    timestr = str(time.time())
    np.savez('/zap/tsob/audio/model' + timestr + '.npz',
             *lasagne.layers.get_all_param_values(network))
    np.save('/zap/tsob/audio/train_error_hist' + timestr + '.npy',
            train_error_hist)
    np.save('/zap/tsob/audio/test_predictions' + timestr + '.npy',
            test_predictions)
    print "Wrote model to {0}, test error histogram to {1}, and test predictions to {2}".format(
        'model' + timestr + '.npz', 'train_error_hist' + timestr + '.npy',
        'test_predictions' + timestr + '.npy')
def main(num_epochs = 100,num_points = 1200,compute_flag='cpu'):
    # Arguments passed as string need to be converted to int    
    num_epochs = int(num_epochs)
    num_points = int(num_points)
    # Define name of output files
    results_file_name = 'exp_' + str(num_epochs) + '_' + str(num_points) + '_' + compute_flag + '.csv'
    network_file_name = 'network_' + str(num_epochs) + '_' + str(num_points) + '_' + compute_flag 
    print 'Saving file to: %s' % results_file_name
    print 'Number of points: %d ' % num_points
    print 'Compute Flag: %s ' % compute_flag
    save_file(results_file_name)  
    Deep_learner = DCNN_network.DCNN_network()
    # Define the input tensor
    input_var = T.tensor4('inputs')
    # Define the output tensor (in this case it is a real value or reflectivity)
    if compute_flag == 'gpu3_softmax':
        output_var = T.ivector('targets')
    else:
        output_var = T.fcol('targets')
    # User input to decide which experiment to run, cpu runs were performed
    # to check if the network was working correctly
    if compute_flag =='cpu': 
        network,l_hidden1 = Deep_learner.build_CNN(input_var)
    elif compute_flag == 'cpu2':
        network,l_hidden1 = Deep_learner.build_CNN_2(input_var)
    elif compute_flag == 'cpu3':
        network,l_hidden1 = Deep_learner.build_CNN_3(input_var)
    elif compute_flag == 'gpu2':
        print('gpu2 experiment')
        network,l_hidden1 = Deep_learner.build_DCNN_2(input_var)
    elif compute_flag == 'gpu3':
        print('gpu3 experiment')
        network,l_hidden1 = Deep_learner.build_DCNN_3(input_var)
    elif compute_flag == 'deep':
        network,l_hidden1 = Deep_learner.build_DCNN_deep(input_var)
    elif compute_flag == 'gpu3_softmax':
        network,l_hidden1 = Deep_learner.build_DCNN_3_softmax(input_var)
    else:
        network,l_hidden1 = Deep_learner.build_DCNN(input_var)
    
    train_prediction = lasagne.layers.get_output(network)
    test_prediction = lasagne.layers.get_output(network)
    if compute_flag == 'gpu3_softmax':
        loss = lasagne.objectives.categorical_crossentropy(train_prediction, output_var)
        loss = loss.mean()
    else:
    
        # Define the mean square error objective function
        loss = T.mean(lasagne.objectives.squared_error(train_prediction,output_var))
    
        test_loss = T.mean(lasagne.objectives.squared_error(test_prediction,output_var))
        # Add a l1 regulerization on the fully connected dense layer
        l1_penalty = regularize_layer_params(l_hidden1, l1)
    
        loss = loss + l1_penalty
    
        test_loss = loss + l1_penalty
    
    params = lasagne.layers.get_all_params(network, trainable=True)
    
    updates = lasagne.updates.nesterov_momentum(
            loss, params, learning_rate=0.0000001, momentum=0.9)
    
    train_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), output_var),
                      dtype=theano.config.floatX) 
    # Define theano function which generates and compiles C code for the optimization problem
    train_fn = theano.function([input_var, output_var], [loss,train_acc], updates=updates)
    

    
#    test_fn = theano.function([input_var, output_var],test_loss, updates=updates)
    
    base_path = '/home/an67a/deep_nowcaster/data/dataset2/'
    training_set_list = os.listdir(base_path)
    training_set_list = filter(lambda x: x[-4:] == '.pkl' and 'val' not in x,training_set_list)
    validation_set_list = os.listdir(base_path)
    validation_set_list = filter(lambda x: x[-4:] == '.pkl' and 'val' in x,validation_set_list)
    experiment_start_time = time.time()
    # Load Data Set
    DataSet = []
    print('Loading data set...')
    for file_name in training_set_list[:3]:
        print file_name
        temp_file = file(base_path + file_name,'rb')
        X_train,Y_train = cPickle.load(temp_file)
        temp_file.close()
        Y_train = Y_train.reshape(-1,).astype('uint8')
        DataSet.append((X_train,Y_train))
    
    print('Start training...')
    for epoch in range(num_epochs):
        print('Epoch number : %d '%epoch)
        train_err = 0
        train_batches = 0
        train_acc = 0
        start_time = time.time()
        for data in DataSet:
#        for file_name in training_set_list:
#            print file_name
#            temp_file = file(base_path + file_name,'rb')
#            X_train,Y_train = cPickle.load(temp_file)
#            Y_train = Y_train.astype('uint8')
#            temp_file.close()
            for batch in iterate_minibatches(data[0], data[1], 1059, shuffle=False):
                inputs, targets = batch
                err,acc = train_fn(inputs, targets)
                train_err += err
                train_acc += acc
                train_batches += 1
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        print("  validation accuracy:\t\t{:.2f} %".format(
            train_acc / train_batches * 100))
        append_file(results_file_name,epoch + 1,round(train_err / train_batches,2),round((train_acc / train_batches) * 100,2))
        
        # Dump the network file every 100 epochs
        if (epoch + 1) % 100 == 0:
            print('creating network file')
            network_file = file('/home/an67a/deep_nowcaster/output/'+ network_file_name + '_' + str(epoch + 1) + '.pkl','wb')
            cPickle.dump(network,network_file,protocol = cPickle.HIGHEST_PROTOCOL)
            network_file.close()
    time_taken = round(time.time() - experiment_start_time,2)
    print('The experiment took {:.3f}s'.format(time.time() - experiment_start_time))
    append_file(results_file_name,'The experiment took',time_taken,0)
def UnitTest_OnestepAttend():
	N = 2 #number of sample
	D = 5 #dimension of input
	H = 4 #dimension of hidden
	T_new = 1 #length of per each sample
	context_dim = 3
	K = 5

	x = np.linspace(-0.4, 0.6, num=N*T_new*D, dtype = theano.config.floatX).reshape(T_new, N, D)
	h0= np.linspace(-0.4, 0.8, num=N*H, dtype = theano.config.floatX).reshape(N, H)
	Wx= np.linspace(-0.2, 0.9, num=4*D*H, dtype = theano.config.floatX).reshape(D, 4*H)
	Wh= np.linspace(-0.3,0.6, num =4*H*H, dtype = theano.config.floatX).reshape(H,4*H)
	b = np.linspace(0.0, 0.0, num = 4*H, dtype = theano.config.floatX)
	Wz= np.linspace(-0.3, 0.6, num=4*H*context_dim, dtype = theano.config.floatX).reshape(context_dim, 4*H)
	Hcontext = np.linspace(-0.2, 0.6, num=H*K, dtype = theano.config.floatX).reshape(H, K)
	Zcontext = np.linspace(-0.2, 0.5, num=context_dim*K, dtype= theano.config.floatX).reshape(context_dim, K)
	Va= np.linspace(0.1, 0.4, num=K, dtype = theano.config.floatX)
	Va_reshape = Va.reshape(K,1)

	image_feature_3D = np.linspace(-0.2, 0.5, num=10*N*context_dim, dtype = theano.config.floatX).reshape(N,10, context_dim)

	h0_theano = h0.reshape(1, N, H)
	# h0_symb   = theano.tensor.ftensor3("h_symb")
	# lstm_theano_layer.h_m1.set_value(h0_theano)

	c0_theano = np.zeros((1, N, H), dtype = theano.config.floatX)
	# c0_symb   = theano.tensor.ftensor3("c_symb")
	# lstm_theano_layer.c_m1.set_value(c0_theano)

	z0_theano = np.zeros((1, N, context_dim), dtype = theano.config.floatX)

	x_theano = x.reshape(T_new, N, D, 1)
	image_feature_input = image_feature_3D

	weight_y_in_value = np.zeros(( 10, context_dim) , dtype= theano.config.floatX)
	b_theano= b.reshape(1, 1, 4*H)
	pdb.set_trace()

	#symbolic variables
	initial_h0_layer_out = theano.tensor.tensor3(name = 'h0_initial', dtype = theano.config.floatX)
	initial_c0_layer_out = theano.tensor.tensor3(name = 'c0_initial', dtype = theano.config.floatX)
	initial_z0			 = T.tensor3(name= 'z0_initial', dtype = theano.config.floatX)
	weight_y_in = theano.tensor.fmatrix("weight_y")	
	input_data = theano.tensor.tensor3(name ='x', dtype=theano.config.floatX)
	image_feature_region = theano.tensor.tensor3(name = 'feature_region', dtype = theano.config.floatX)

	Wi_sym, Wf_sym, Wc_sym, Wo_sym, Ui_sym, Uf_sym, Uc_sym, Uo_sym, Zi_sym, Zf_sym, Zc_sym, Zo_sym = T.fmatrices(12)
	Zcontext_sym, Hcontext_sym = T.fmatrices(2)
	bi  = T.ftensor3("bi")
	bf  = T.ftensor3("bf")
	bc  = T.ftensor3("bc")
	bo  = T.ftensor3("bo")
	Va_sym = T.fcol("Va")


	out_sym = onestep_attend_tell(input_data, initial_h0_layer_out, initial_c0_layer_out, initial_z0, 
		Wi_sym, Wf_sym, Wc_sym, Wo_sym, Ui_sym, Uf_sym, Uc_sym, Uo_sym, Zi_sym, Zf_sym, Zc_sym, Zo_sym,
		Zcontext_sym, Hcontext_sym, Va_sym,
		bi, bf, bc, bo, image_feature_region, weight_y_in)

	onestep_func = theano.function([input_data, initial_h0_layer_out, initial_c0_layer_out, initial_z0, 
		Wi_sym, Wf_sym, Wc_sym, Wo_sym, Ui_sym, Uf_sym, Uc_sym, Uo_sym, Zi_sym, Zf_sym, Zc_sym, Zo_sym,
		Zcontext_sym, Hcontext_sym, Va_sym,
		bi, bf, bc, bo, image_feature_region, weight_y_in], out_sym)

	list_output = onestep_func(x, h0_theano, c0_theano, z0_theano,
		Wx[:, :H], Wx[:, H:2*H], Wx[:, 2*H:3*H], Wx[:, 3*H:],
		Wh[:, :H], Wh[:, H:2*H], Wh[:, 2*H:3*H], Wh[:, 3*H:],
		Wz[:, :H], Wz[:, H:2*H], Wz[:, 2*H:3*H], Wz[:, 3*H:],
		Zcontext,Hcontext,
		Va_reshape,
		b_theano[:,: , :H], b_theano[:, :, H:2*H], b_theano[:, :, 2*H:3*H], b_theano[:, :, 3*H:], 
		image_feature_input, weight_y_in_value)


	pdb.set_trace()

	print(list_output[0].shape)
	print(list_output[1].shape)
	print(list_output[2].shape)

	pdb.set_trace()