예제 #1
0
EMO='valence'
# EMO='arousal'
do_regularize = False

# fold_id = 2

all_fold_pred = list()
all_fold_y_test = list()
all_fold_id_test = list()

for fold_id in range(1):
    print '... loading FOLD %d'%fold_id
    fold = pickle.load( open( DATADIR + '/pkl/fold%d_normed.pkl'%(fold_id), "rb" ) )

    X_train, y_train, id_train = load_X_from_fold(fold, 'train')
    X_test, y_test, id_test = load_X_from_fold(fold, 'test')

    print id_test.shape

    # X_train = X_train[:,[10,12,13,17,19,82,83,84,85,89,90,91,103,140,142,146,148,212,214,218,220]]
    # X_test = X_test[:,[10,12,13,17,19,82,83,84,85,89,90,91,103,140,142,146,148,212,214,218,220]]
    # X_train = X_train[:,[13,85,103,142,214]]
    # X_test = X_test[:,[13,85,103,142,214]]

    # one dimension at a time
    # 0: arousal, 1: valence
    if EMO == 'valence':
        print '... emotion: valence'
        y_train = y_train[:,0]
        y_test = y_test[:,0]
예제 #2
0
EMO = 'valence'
# EMO='arousal'
do_regularize = False

# fold_id = 2

all_fold_pred = list()
all_fold_y_test = list()
all_fold_id_test = list()

for fold_id in range(10):
    print '... loading FOLD %d' % fold_id
    fold = pickle.load(
        open(DATADIR + '/pkl/fold%d_normed.pkl' % (fold_id), "rb"))

    X_train, y_train, id_train = load_X_from_fold(fold, 'train')
    X_test, y_test, id_test = load_X_from_fold(fold, 'test')

    print id_test.shape

    # X_train = X_train[:,[10,12,13,17,19,82,83,84,85,89,90,91,103,140,142,146,148,212,214,218,220]]
    # X_test = X_test[:,[10,12,13,17,19,82,83,84,85,89,90,91,103,140,142,146,148,212,214,218,220]]
    # X_train = X_train[:,[13,85,103,142,214]]
    # X_test = X_test[:,[13,85,103,142,214]]

    # one dimension at a time
    # 0: arousal, 1: valence
    if EMO == 'valence':
        print '... emotion: valence'
        y_train = y_train[:, 0]
        y_test = y_test[:, 0]
예제 #3
0
def test_dA(learning_rate=0.1, training_epochs=100, batch_size=60, output_folder="dA_plots/"):

    """
    This demo is tested on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset

    """
    cost_type = "MSE"
    noise_type = "gaussian"
    corruption_level = 0.3
    n_hidden = 500

    NUM_FRAMES = 60
    # DATADIR = '/baie/corpus/emoMusic/train/'
    DATADIR = "./train/"
    fold_id = 0
    for fold_id in range(0, 10):
        print "... loading FOLD %d" % fold_id
        fold = pickle.load(open(DATADIR + "/pkl/fold%d_normed.pkl" % (fold_id), "rb"))

        X_train, y_train, id_train = load_X_from_fold(fold, "train")
        # scale data to [0, 1]
        min_max_scaler = preprocessing.MinMaxScaler()
        X_train = min_max_scaler.fit_transform(X_train)
        # X_train = X_train[:, 0:256]
        train_set_x, train_set_y = shared_dataset(X_train, y_train)

        X_test, y_test, id_test = load_X_from_fold(fold, "test")
        X_test = min_max_scaler.transform(X_test)
        # X_test = X_test[:, 0:256]
        print X_test.shape
        test_set_x, test_set_y = shared_dataset(X_test, y_test)

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        print "batch_size: %d, n_train_batches: %d" % (batch_size, n_train_batches)

        # start-snippet-2
        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch
        x = T.matrix("x")  # the data is presented as rasterized images
        # end-snippet-2

        if not os.path.isdir(output_folder):
            os.makedirs(output_folder)
        # os.chdir(output_folder)

        ####################################
        # BUILDING THE MODEL NO CORRUPTION #
        ####################################

        rng = numpy.random.RandomState(123)
        theano_rng = RandomStreams(rng.randint(2 ** 30))

        da = dA(
            numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=260,
            #        n_visible=256,
            n_hidden=n_hidden,
            act_enc="sigmoid",
            act_dec="sigmoid",
        )

        cost, updates = da.get_cost_updates(
            corruption_level=corruption_level,
            learning_rate=learning_rate,
            cost=cost_type,
            noise=noise_type,
            # noise = 'binomial'
        )

        train_da = theano.function(
            [index], cost, updates=updates, givens={x: train_set_x[index * batch_size : (index + 1) * batch_size]}
        )

        start_time = timeit.default_timer()

        ############
        # TRAINING #
        ############

        # go through training epochs
        for epoch in xrange(training_epochs):
            # go through trainng set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(train_da(batch_index))

            print "Training epoch %d, cost " % epoch, numpy.mean(c)

        end_time = timeit.default_timer()

        training_time = end_time - start_time

        print >> sys.stderr, (
            "The no corruption code for file "
            + os.path.split(__file__)[1]
            + " ran for %.2fm" % ((training_time) / 60.0)
        )
        # image = Image.fromarray(
        #     tile_raster_images(X=da.W.get_value(borrow=True).T,
        #                        img_shape=(16, 16), tile_shape=(10, 10),
        #                        tile_spacing=(1, 1)))
        # image.save(output_folder + 'filters_corruption_0.png')

        # if save_dir:
        #     da.save(save_dir)

        denoising_error = da.get_denoising_error(test_set_x, cost_type, noise_type, corruption_level)
        print "Training complete in %f (min) with final denoising error in test: %f" % (
            training_time / 60.0,
            denoising_error,
        )

        # hidden_features = numpy(da.get_hidden_values(test_set_x))
        # print 'hidden_features: '
        # print hidden_features

        # theano functions to get representations of the dataset learned by the model
        index = T.lscalar()  # index to a [mini]batch
        x = theano.tensor.matrix("input")

        # act from the test dataset
        # need to get a T.matrix instead of a shared dataset to be able to use the get_hidden_values function
        tilde_x = da.get_corrupted_input(test_set_x, corruption_level)

        get_rep_test = theano.function(
            [], da.get_hidden_values(x), updates={}, givens={x: tilde_x}, name="get_rep_test"
        )
        test_act = get_rep_test()
        # print type(test_act)
        # print test_act.shape

        # act from the training dataset
        tilde_x = da.get_corrupted_input(train_set_x, corruption_level)
        get_rep_train = theano.function(
            [], da.get_hidden_values(x), updates={}, givens={x: tilde_x}, name="get_rep_test"
        )
        train_act = get_rep_train()

        output = dict()
        output["train"] = dict()
        output["train"]["X"] = train_act
        output["train"]["y"] = y_train
        output["train"]["song_id"] = id_train
        output["test"] = dict()
        output["test"]["X"] = test_act
        output["test"]["y"] = y_test
        output["test"]["song_id"] = id_test

        act_dir = "AE/activations/"
        nom = act_dir + "fold%d_cost%s_noise%s_level%.1f_nh%d_it%d.pkl" % (
            fold_id,
            cost_type,
            noise_type,
            corruption_level,
            n_hidden,
            training_epochs,
        )
        pickle.dump(output, open(nom, "wb"))
        print "activation (dict) saved in " + nom
예제 #4
0
def test_dA(learning_rate=0.1,
            training_epochs=100,
            batch_size=60,
            output_folder='dA_plots/'):
    """
    This demo is tested on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset

    """
    cost_type = 'MSE'
    noise_type = 'gaussian'
    corruption_level = 0.3
    n_hidden = 500

    NUM_FRAMES = 60
    # DATADIR = '/baie/corpus/emoMusic/train/'
    DATADIR = './train/'
    fold_id = 0
    for fold_id in range(0, 10):
        print '... loading FOLD %d' % fold_id
        fold = pickle.load(
            open(DATADIR + '/pkl/fold%d_normed.pkl' % (fold_id), "rb"))

        X_train, y_train, id_train = load_X_from_fold(fold, 'train')
        # scale data to [0, 1]
        min_max_scaler = preprocessing.MinMaxScaler()
        X_train = min_max_scaler.fit_transform(X_train)
        # X_train = X_train[:, 0:256]
        train_set_x, train_set_y = shared_dataset(X_train, y_train)

        X_test, y_test, id_test = load_X_from_fold(fold, 'test')
        X_test = min_max_scaler.transform(X_test)
        # X_test = X_test[:, 0:256]
        print X_test.shape
        test_set_x, test_set_y = shared_dataset(X_test, y_test)

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(
            borrow=True).shape[0] / batch_size
        print 'batch_size: %d, n_train_batches: %d' % (batch_size,
                                                       n_train_batches)

        # start-snippet-2
        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch
        x = T.matrix('x')  # the data is presented as rasterized images
        # end-snippet-2

        if not os.path.isdir(output_folder):
            os.makedirs(output_folder)
        # os.chdir(output_folder)

        ####################################
        # BUILDING THE MODEL NO CORRUPTION #
        ####################################

        rng = numpy.random.RandomState(123)
        theano_rng = RandomStreams(rng.randint(2**30))

        da = dA(
            numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=260,
            #        n_visible=256,
            n_hidden=n_hidden,
            act_enc='sigmoid',
            act_dec='sigmoid')

        cost, updates = da.get_cost_updates(
            corruption_level=corruption_level,
            learning_rate=learning_rate,
            cost=cost_type,
            noise=noise_type,
            # noise = 'binomial'
        )

        train_da = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size]
            })

        start_time = timeit.default_timer()

        ############
        # TRAINING #
        ############

        # go through training epochs
        for epoch in xrange(training_epochs):
            # go through trainng set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(train_da(batch_index))

            print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

        end_time = timeit.default_timer()

        training_time = (end_time - start_time)

        print >> sys.stderr, ('The no corruption code for file ' +
                              os.path.split(__file__)[1] + ' ran for %.2fm' %
                              ((training_time) / 60.))
        # image = Image.fromarray(
        #     tile_raster_images(X=da.W.get_value(borrow=True).T,
        #                        img_shape=(16, 16), tile_shape=(10, 10),
        #                        tile_spacing=(1, 1)))
        # image.save(output_folder + 'filters_corruption_0.png')

        # if save_dir:
        #     da.save(save_dir)

        denoising_error = da.get_denoising_error(test_set_x, cost_type,
                                                 noise_type, corruption_level)
        print 'Training complete in %f (min) with final denoising error in test: %f' \
            %(training_time / 60.,denoising_error)

        # hidden_features = numpy(da.get_hidden_values(test_set_x))
        # print 'hidden_features: '
        # print hidden_features

        # theano functions to get representations of the dataset learned by the model
        index = T.lscalar()  # index to a [mini]batch
        x = theano.tensor.matrix('input')

        # act from the test dataset
        # need to get a T.matrix instead of a shared dataset to be able to use the get_hidden_values function
        tilde_x = da.get_corrupted_input(test_set_x, corruption_level)

        get_rep_test = theano.function([],
                                       da.get_hidden_values(x),
                                       updates={},
                                       givens={x: tilde_x},
                                       name='get_rep_test')
        test_act = get_rep_test()
        # print type(test_act)
        # print test_act.shape

        # act from the training dataset
        tilde_x = da.get_corrupted_input(train_set_x, corruption_level)
        get_rep_train = theano.function([],
                                        da.get_hidden_values(x),
                                        updates={},
                                        givens={x: tilde_x},
                                        name='get_rep_test')
        train_act = get_rep_train()

        output = dict()
        output['train'] = dict()
        output['train']['X'] = train_act
        output['train']['y'] = y_train
        output['train']['song_id'] = id_train
        output['test'] = dict()
        output['test']['X'] = test_act
        output['test']['y'] = y_test
        output['test']['song_id'] = id_test

        act_dir = 'AE/activations/'
        nom = act_dir + 'fold%d_cost%s_noise%s_level%.1f_nh%d_it%d.pkl' % (
            fold_id, cost_type, noise_type, corruption_level, n_hidden,
            training_epochs)
        pickle.dump(output, open(nom, "wb"))
        print 'activation (dict) saved in ' + nom