Пример #1
0
 def get_matrices(self, n=3, ratios=[.7, .15, .15]):
     X = []
     y = []
     for ngr, w in self.iterate_ngram_training(n):
         X.append(ngr)
         y.append([w])
     X = numpy.array(X)
     y = numpy.array(y)
     total = len(y)
     training = round(total * ratios[0])
     valid = training + round(total * ratios[1])
     #test = total - training - valid
     labels = len(self.vocab)
     training_data = DenseDesignMatrix(X=X[:training, :],
                                       y=y[:training],
                                       X_labels=labels,
                                       y_labels=labels)
     valid_data = DenseDesignMatrix(X=X[training:valid, :],
                                    y=y[training:valid],
                                    X_labels=labels,
                                    y_labels=labels)
     test_data = DenseDesignMatrix(X=X[valid:, :],
                                   y=y[valid:],
                                   X_labels=labels,
                                   y_labels=labels)
     return training_data, valid_data, test_data
Пример #2
0
def test_sgd_sup():

    # tests that we can run the sgd algorithm
    # on a supervised cost.
    # does not test for correctness at all, just
    # that the algorithm runs without dying

    dim = 3
    m = 10

    rng = np.random.RandomState([25, 9, 2012])

    X = rng.randn(m, dim)

    idx = rng.randint(0, dim, (m, ))
    Y = np.zeros((m, dim))
    for i in xrange(m):
        Y[i, idx[i]] = 1

    dataset = DenseDesignMatrix(X=X, y=Y)

    m = 15
    X = rng.randn(m, dim)

    idx = rng.randint(0, dim, (m,))
    Y = np.zeros((m, dim))
    for i in xrange(m):
        Y[i, idx[i]] = 1

    # Including a monitoring dataset lets us test that
    # the monitor works with supervised data
    monitoring_dataset = DenseDesignMatrix(X=X, y=Y)

    model = SoftmaxModel(dim)

    learning_rate = 1e-3
    batch_size = 5

    cost = SupervisedDummyCost()

    # We need to include this so the test actually stops running at some point
    termination_criterion = EpochCounter(5)

    algorithm = SGD(learning_rate, cost,
                    batch_size=batch_size,
                    monitoring_batches=3,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=None)

    train.main_loop()
Пример #3
0
def test_from_dataset():
    """
    Tests whether it supports integer labels.
    """
    rng = np.random.RandomState([1, 2, 3])
    topo_view = rng.randn(12, 2, 3, 3)
    y = rng.randint(0, 5, (12, 1))

    # without y:
    d1 = DenseDesignMatrix(topo_view=topo_view)
    slice_d = from_dataset(d1, 5)
    assert slice_d.X.shape[1] == d1.X.shape[1]
    assert slice_d.X.shape[0] == 5

    # with y:
    d2 = DenseDesignMatrix(topo_view=topo_view, y=y)
    slice_d = from_dataset(d2, 5)
    assert slice_d.X.shape[1] == d2.X.shape[1]
    assert slice_d.X.shape[0] == 5
    assert slice_d.y.shape[0] == 5

    # without topo_view:
    x = topo_view.reshape(12, 18)
    d3 = DenseDesignMatrix(X=x, y=y)
    slice_d = from_dataset(d3, 5)
    assert slice_d.X.shape[1] == d3.X.shape[1]
    assert slice_d.X.shape[0] == 5
    assert slice_d.y.shape[0] == 5
Пример #4
0
 def create_batch_matrices(self, ratios=[.7, .15, .15]):
     res = self.read_batch()
     if res is None:
         return None
     X, y = res
     num_labels = len(self.needed) + 1  # for filtered words
     X = numpy.array(X)
     y = numpy.array(y)
     total = len(y)
     indices = range(total)
     shuffle(indices)
     training = int(round(total * ratios[0]))
     valid = int(round(total * ratios[1]))
     training_indices = indices[:training]
     valid_indices = indices[training:training + valid]
     #test = total - training - valid
     training_data = DenseDesignMatrix(X=X[training_indices, :],
                                       y=y[training_indices],
                                       X_labels=num_labels,
                                       y_labels=num_labels)
     valid_data = DenseDesignMatrix(X=X[valid_indices, :],
                                    y=y[valid_indices],
                                    X_labels=num_labels,
                                    y_labels=num_labels)
     test_data = DenseDesignMatrix(X=X[valid:, :],
                                   y=y[valid:],
                                   X_labels=num_labels,
                                   y_labels=num_labels)
     return training_data, valid_data, test_data
Пример #5
0
def test_multiple_monitoring_datasets():
    # tests that DefaultTrainingAlgorithm can take multiple
    # monitoring datasets.

    BATCH_SIZE = 2
    BATCHES = 3
    NUM_FEATURES = 4
    dim = 3
    m = 10

    rng = np.random.RandomState([2014, 02, 25])
    X = rng.randn(m, dim)
    Y = rng.randn(m, dim)

    train = DenseDesignMatrix(X=X)
    test = DenseDesignMatrix(X=Y)

    algorithm = DefaultTrainingAlgorithm(
        batch_size=BATCH_SIZE,
        batches_per_iter=BATCHES,
        monitoring_dataset={'train': train, 'test': test})

    model = S3C(nvis=NUM_FEATURES, nhid=1,
                irange=.01, init_bias_hid=0., init_B=1.,
                min_B=1., max_B=1., init_alpha=1.,
                min_alpha=1., max_alpha=1., init_mu=0.,
                m_step=Grad_M_Step(learning_rate=0.),
                e_step=E_Step(h_new_coeff_schedule=[1.]))

    algorithm.setup(model=model, dataset=train)
    algorithm.train(dataset=train)
Пример #6
0
def test_init_bias_target_marginals():
    """
    Test `Softmax` layer instantiation with `init_bias_target_marginals`.
    """
    batch_size = 5
    n_features = 5
    n_classes = 3
    n_targets = 3
    irange = 0.1
    learning_rate = 0.1

    X_data = np.random.random(size=(batch_size, n_features))

    Y_categorical = np.asarray([[0], [1], [1], [2], [2]])
    class_frequencies = np.asarray([.2, .4, .4])
    categorical_dataset = DenseDesignMatrix(X_data,
                                            y=Y_categorical,
                                            y_labels=n_classes)

    Y_continuous = np.random.random(size=(batch_size, n_targets))
    Y_means = np.mean(Y_continuous, axis=0)
    continuous_dataset = DenseDesignMatrix(X_data,
                                           y=Y_continuous)

    Y_multiclass = np.random.randint(n_classes,
                                     size=(batch_size, n_targets))
    multiclass_dataset = DenseDesignMatrix(X_data,
                                           y=Y_multiclass,
                                           y_labels=n_classes)

    def softmax_layer(dataset):
        return Softmax(n_classes, 'h0', irange=irange,
                       init_bias_target_marginals=dataset)

    valid_categorical_mlp = MLP(
        layers=[softmax_layer(categorical_dataset)],
        nvis=n_features
    )

    actual = valid_categorical_mlp.layers[0].b.get_value()
    expected = pseudoinverse_softmax_numpy(class_frequencies)
    assert np.allclose(actual, expected)

    valid_continuous_mlp = MLP(
        layers=[softmax_layer(continuous_dataset)],
        nvis=n_features
    )

    actual = valid_continuous_mlp.layers[0].b.get_value()
    expected = pseudoinverse_softmax_numpy(Y_means)
    assert np.allclose(actual, expected)

    def invalid_multiclass_mlp():
        return MLP(
            layers=[softmax_layer(multiclass_dataset)],
            nvis=n_features
        )
    assert_raises(AssertionError, invalid_multiclass_mlp)
Пример #7
0
def test_bgd_unsup():

    # tests that we can run the bgd algorithm
    # on an supervised cost.
    # does not test for correctness at all, just
    # that the algorithm runs without dying

    dim = 3
    m = 10

    rng = np.random.RandomState([25, 9, 2012])

    X = rng.randn(m, dim)

    dataset = DenseDesignMatrix(X=X)

    m = 15
    X = rng.randn(m, dim)

    # including a monitoring datasets lets us test that
    # the monitor works with supervised data
    monitoring_dataset = DenseDesignMatrix(X=X)

    model = SoftmaxModel(dim)

    learning_rate = 1e-3
    batch_size = 5

    class DummyCost(Cost):
        def expr(self, model, data):
            self.get_data_specs(model)[0].validate(data)
            X = data
            return T.square(model(X) - X).mean()

        def get_data_specs(self, model):
            return (model.get_input_space(), model.get_input_source())

    cost = DummyCost()

    # We need to include this so the test actually stops running at some point
    termination_criterion = EpochCounter(5)

    algorithm = BGD(cost,
                    batch_size=5,
                    monitoring_batches=2,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion)

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=None)

    train.main_loop()
Пример #8
0
def testing_multiple_datasets_with_specified_dataset_in_monitor_based_lr():
    # tests that the class MonitorBasedLRAdjuster in sgd.py can properly use
    # the spcified dataset_name in the constructor when multiple datasets
    # exist.

    dim = 3
    m = 10

    rng = np.random.RandomState([06, 02, 2014])

    X = rng.randn(m, dim)
    Y = rng.randn(m, dim)

    learning_rate = 1e-2
    batch_size = 5

    # We need to include this so the test actually stops running at some point
    epoch_num = 1

    # including a monitoring datasets lets us test that
    # the monitor works with supervised data
    monitoring_train = DenseDesignMatrix(X=X)
    monitoring_test = DenseDesignMatrix(X=Y)

    cost = DummyCost()

    model = SoftmaxModel(dim)

    dataset = DenseDesignMatrix(X=X)

    termination_criterion = EpochCounter(epoch_num)

    monitoring_dataset = {'train': monitoring_train, 'test': monitoring_test}

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=batch_size,
                    monitoring_batches=2,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    dataset_name = monitoring_dataset.keys()[0]
    monitor_lr = MonitorBasedLRAdjuster(dataset_name=dataset_name)

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=[monitor_lr])

    train.main_loop()
Пример #9
0
def create_dense_design_matrix(x, y=None, num_classes=None):
    if y is None:
        return DenseDesignMatrix(X=x)

    if num_classes is None:
        return DenseDesignMatrix(X=x, y=y)

    y = y.reshape((-1, ))
    one_hot = np.zeros((y.shape[0], num_classes), dtype='float32')
    for i in xrange(y.shape[0]):
        one_hot[i, y[i]] = 1.
    return DenseDesignMatrix(X=x, y=one_hot)
Пример #10
0
def testing_multiple_datasets_in_monitor_based_lr():
    # tests that the class MonitorBasedLRAdjuster in sgd.py does not take multiple datasets in which multiple channels ending in '_objectives' exist. 
    # This case happens when the user has not specified either channel_name or dataset_name in the constructor

    dim = 3
    m = 10

    rng = np.random.RandomState([06,02,2014])

    X = rng.randn(m, dim)
    Y = rng.randn(m, dim)

    learning_rate = 1e-2
    batch_size = 5

    # We need to include this so the test actually stops running at some point
    epoch_num = 1

    # including a monitoring datasets lets us test that
    # the monitor works with supervised data
    monitoring_train = DenseDesignMatrix(X=X)
    monitoring_test = DenseDesignMatrix(X=Y)

    cost = DummyCost()

    model = SoftmaxModel(dim)

    dataset = DenseDesignMatrix(X=X)

    termination_criterion = EpochCounter(epoch_num)

    algorithm = SGD(learning_rate, cost, batch_size=5,
             monitoring_batches=2, monitoring_dataset= {'train': monitoring_train, 'test' : monitoring_test},
             termination_criterion=termination_criterion, update_callbacks=None,
             init_momentum = None, set_batch_size = False)

    monitor_lr = MonitorBasedLRAdjuster()

    train = Train(dataset, model, algorithm, save_path=None,
             save_freq=0, extensions=[monitor_lr])

    try:
        train.main_loop()
    except ValueError:
        return
        
    raise AssertionError("MonitorBasedLRAdjuster takes multiple dataset names in which more than one \"objective\" channel exist and the user has not specified " + 
        "either channel_name or database_name in the constructor to disambiguate.")
Пример #11
0
def get_feats_from_cnn(rows, model=None):
    """
    fprop rows using best trained model and returns activations of the
    penultimate layer
    """
    conf = utils.get_config()
    patch_size = conf['patch_size']
    region_size = conf['region_size']
    batch_size = None
    preds = utils.get_predictor(model=model, return_all=True)
    y = np.zeros(len(rows))
    samples = np.zeros((len(rows), region_size, region_size, 1),
                       dtype=np.float32)
    for i, row in enumerate(rows):
        print 'processing %i-th image: %s' % (i, row['image_filename'])
        try:
            samples[i] = utils.get_samples_from_image(row, False)[0]
        except ValueError as e:
            print '{1} Value error: {0}'.format(str(e), row['image_filename'])
        y[i] = utils.is_positive(row)
    ds = DenseDesignMatrix(topo_view=samples)
    pipeline = utils.get_pipeline(ds.X_topo_space.shape, patch_size,
                                  batch_size)
    pipeline.apply(ds)
    return preds[-2](ds.get_topological_view()), y
Пример #12
0
def load(start, stop, datadir='data/CK'):
    im_list = glob.glob(os.path.join(datadir, 'faces_aligned/*.png'))[start:]
    if not im_list:
        msg = ('No image files found in: %s' %
               os.path.realpath(os.path.join(datadir, 'faces_aligned')))
        log.error(msg)
        raise RuntimeException(msg)
    X = []
    y = []
    more_to_read = stop - start
    for im_file in im_list:
        if more_to_read <= 0:
            break
        label_base_pat = os.path.basename(im_file)[:9] + '*_emotion.txt'
        maybe_label_file = glob.glob(
            os.path.join(datadir, 'labels', label_base_pat))
        if maybe_label_file:
            y.append(read_label(maybe_label_file[0]))
            imdata = imread(im_file, False)
            imdata = cv2.resize(imdata, (32, 32))
            imdata = imdata.flatten().astype(np.float32) / 255
            X.append(imdata)
            more_to_read -= 1
    return DenseDesignMatrix(X=np.asarray(X),
                             y=np.asarray(y).reshape(-1, 1),
                             view_converter=DefaultViewConverter(
                                 (32, 32, 1), axes=('b', 0, 1, 'c')))
    def __init__(self,
                 raw,
                 transformer,
                 cpu_only=False,
                 space_preserving=False,
                 block_length=1):
        """
            .. todo::

                WRITEME properly

            Parameters
            ----------
            raw : pylearn2 Dataset
                Provides raw data
            transformer: pylearn2 Block
                To transform the data
            block_length: timeseries length
                Amount of elements of the timeseries
        """
        assert block_length >= 1

        if block_length != 1:
            timeseries = Timeseries(X=raw, block_length=block_length)
            super(TimeseriesTransformerDataset,
                  self).__init__(timeseries, transformer, cpu_only,
                                 space_preserving)
        else:
            raw = DenseDesignMatrix(X=raw)
            super(TimeseriesTransformerDataset,
                  self).__init__(raw, transformer, cpu_only, space_preserving)
Пример #14
0
def random_dense_design_matrix(rng, num_examples, dim, num_classes):
    """
    Creates a random dense design matrix that has class labels.

    Parameters
    ----------
    rng : numpy.random.RandomState
        The random number generator used to generate the dataset.
    num_examples : int
        The number of examples to create.
    dim : int
        The number of features in each example.
    num_classes : int
        The number of classes to assign the examples to.
        0 indicates that no class labels will be generated.
    """
    X = rng.randn(num_examples, dim)

    if num_classes:
        Y = rng.randint(0, num_classes, (num_examples, 1))
        y_labels = num_classes
    else:
        Y = None
        y_labels = None

    return DenseDesignMatrix(X=X, y=Y, y_labels=y_labels)
Пример #15
0
    def test_unit_norm(self):
        """ Test that using std_bias = 0.0 and use_norm = True
            results in vectors having unit norm """

        tol = 1e-5

        num_examples = 5
        num_features = 10

        rng = np.random.RandomState([1, 2, 3])

        X = as_floatX(rng.randn(num_examples, num_features))

        dataset = DenseDesignMatrix(X=X)

        # the setting of subtract_mean is not relevant to the test
        # the test only applies when std_bias = 0.0 and use_std = False
        preprocessor = GlobalContrastNormalization(subtract_mean=False,
                                                   sqrt_bias=0.0,
                                                   use_std=False)

        dataset.apply_preprocessor(preprocessor)

        result = dataset.get_design_matrix()

        norms = np.sqrt(np.square(result).sum(axis=1))

        max_norm_error = np.abs(norms - 1.).max()

        tol = 3e-5

        assert max_norm_error < tol
Пример #16
0
def test_execution_order():

    # ensure save is called directly after monitoring by checking
    # parameter values in `on_monitor` and `on_save`.

    model = MLP(layers=[Softmax(layer_name='y', n_classes=2, irange=0.)],
                nvis=3)

    dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)),
                                y=np.random.normal(size=(6, 2)))

    epoch_counter = EpochCounter(max_epochs=1)

    algorithm = SGD(batch_size=2,
                    learning_rate=0.1,
                    termination_criterion=epoch_counter)

    extension = ParamMonitor()

    train = Train(dataset=dataset,
                  model=model,
                  algorithm=algorithm,
                  extensions=[extension],
                  save_freq=1,
                  save_path="save.pkl")

    # mock save
    train.save = MethodType(only_run_extensions, train)

    train.main_loop()
Пример #17
0
def test_convert_to_one_hot():
    rng = np.random.RandomState([2013, 11, 14])
    m = 11
    d = DenseDesignMatrix(
        X=rng.randn(m, 4),
        y=rng.randint(low=0, high=10, size=(m,)))
    d.convert_to_one_hot()
Пример #18
0
def test_init_with_X_or_topo():
    # tests that constructing with topo_view works
    # tests that construction with design matrix works
    # tests that conversion from topo_view to design matrix and back works
    # tests that conversion the other way works too
    rng = np.random.RandomState([1, 2, 3])
    topo_view = rng.randn(5, 2, 2, 3)
    d1 = DenseDesignMatrix(topo_view=topo_view)
    X = d1.get_design_matrix()
    d2 = DenseDesignMatrix(X=X, view_converter=d1.view_converter)
    topo_view_2 = d2.get_topological_view()
    assert np.allclose(topo_view, topo_view_2)
    X = rng.randn(*X.shape)
    topo_view_3 = d2.get_topological_view(X)
    X2 = d2.get_design_matrix(topo_view_3)
    assert np.allclose(X, X2)
Пример #19
0
def load_data(start, stop):
    # Loads the 1 million images into X and creates a DenseDesignMatrix
    # for use in a Denoising Autoencoder which is later used in a sDAE.
    # Returns: dataset: DenseDesignMatrix(start, stop)
    #dataset_location = "~/catkin_ws/src/athomesoftware/datasets/tinyimages/"
    #dataset_location = "~/catkin_ws/src/athomesoftware/datasets/cifar10/"
    X = []
    y = []

    print("Loading images from " + dataset_location)
    for images in os.walk(dataset_location):
        if (images.endswith('.png')):
            im = Image.open(images)
            im.reshape(3, 32, 32)
            row = list(im.getData())
            X.append(row)

    print("Images loaded from " + dataset_location)
    X = np.asarray(X)
    y = np.asarray(y)
    y = y.reshape(y.shape[0], 1)
    X = X[start:stop, :]
    y = y[start:stop, :]

    print("Creating design matrix " + dataset_location)
    return DenseDesignMatrix(X=X, y=y)
Пример #20
0
def monary_load(start=0, stop=-1, find_args={}, species_to_retrieve=[]):
    if species_to_retrieve == []:
        species_to_retrieve = species
    else:
        species_to_retrieve = [s for s in species_to_retrieve if s in species]
    query = {}
    for s in species_to_retrieve:
        query[s] = {"$gt": 0}
    find_args["$or"] = [{k: query[k]} for k in query.keys()]
    with Monary("127.0.0.1") as monary:
        out = monary.query(
            "creeval",
            collection,
            find_args,
            num_metadata + cat_metadata + species_to_retrieve, ["float32"] *
            (len(num_metadata) + len(cat_metadata) + len(species_to_retrieve)),
            limit=(stop - start),
            offset=start)
    for i, col in enumerate(out[0:len(num_metadata + cat_metadata)]):
        out[i] = np.ma.filled(col, np.ma.mean(col))
        #if any(np.isnan(col)):
        #	print col
    out = np.ma.row_stack(out).T
    X = out[:, 0:len(num_metadata + cat_metadata)]
    y = out[:, len(num_metadata + cat_metadata):]
    y = (y > 0).astype(int)

    scaler = StandardScaler().fit(X)
    X = scaler.transform(X)
    pickle.dump(scaler, open(collection + "_scaler.pkl", "wb"))
    y = np.asarray(y)

    return DenseDesignMatrix(X=X, y=y)
Пример #21
0
def test_extract_reassemble():
    """ Tests that ExtractGridPatches and ReassembleGridPatches are
    inverse of each other """

    rng = np.random.RandomState([1, 3, 7])

    topo = rng.randn(4, 3 * 5, 3 * 7, 2)

    dataset = DenseDesignMatrix(topo_view=topo)

    patch_shape = (3, 7)

    extractor = ExtractGridPatches(patch_shape, patch_shape)
    reassemblor = ReassembleGridPatches(patch_shape=patch_shape,
                                        orig_shape=topo.shape[1:3])

    dataset.apply_preprocessor(extractor)
    dataset.apply_preprocessor(reassemblor)

    new_topo = dataset.get_topological_view()

    assert new_topo.shape == topo.shape

    if not np.all(new_topo == topo):
        assert False
Пример #22
0
def test_serialization_guard():

    # tests that Train refuses to serialize the dataset

    dim = 2
    m = 11

    rng = np.random.RandomState([28, 9, 2012])
    X = rng.randn(m, dim)
    dataset = DenseDesignMatrix(X=X)

    model = DummyModel(dim)
    # make the dataset part of the model, so it will get
    # serialized
    model.dataset = dataset

    Monitor.get_monitor(model)

    algorithm = DummyAlgorithm()

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path='_tmp_unit_test.pkl',
                  save_freq=1,
                  callbacks=None)

    try:
        train.main_loop()
    except RuntimeError:
        return
    assert False  # train did not complain, this is a bug
Пример #23
0
    def next(self):
        next_index = self._subset_iterator.next()

        # convert to boolean selection
        sel = np.zeros(self.num_examples, dtype=bool)
        sel[next_index] = True
        next_index = sel

        rval = []
        for data, fn in safe_izip(self._raw_data, self._convert):
            try:
                this_data = data[next_index]
            except TypeError:
                this_data = data[next_index, :]
            if fn:
                this_data = fn(this_data)
            if self._preprocessor is not None:
                d = DenseDesignMatrix(X=this_data)
                self._preprocessor.apply(d)
                this_data = d.get_design_matrix()
            assert not np.any(np.isnan(this_data))
            rval.append(this_data)
        rval = tuple(rval)
        if not self._return_tuple and len(rval) == 1:
            rval, = rval
        return rval    
Пример #24
0
def array_to_ds(X):
    """
    Build a DenseDesignMatrix with topo_view using X.
    X: a nsamples x pixels numpy array, or a list of linearized images
    """
    if type(X) is list:
        X = np.asarray(X)
    return DenseDesignMatrix(topo_view=X.reshape(X.shape + (1,)))
Пример #25
0
    def load_dataset(self):
        # TODO: we might need other variables for identifying what kind of
        # extra preprocessing was done such as features product and number
        # of features kept based on MI.
        #base_path = get_data_path(self.state)
        #self.base_path = base_path

        #import pdb
        #pdb.set_trace()
        
        if self.state.dataset == 'mnist':
            self.test_ddm = MNIST(which_set='test', one_hot=True)

            dataset = MNIST(which_set='train', shuffle=True, one_hot=True)
            train_X, valid_X = np.split(dataset.X, [50000])
            train_y, valid_y = np.split(dataset.y, [50000])
            self.train_ddm = DenseDesignMatrix(X=train_X, y=train_y)
            self.valid_ddm = DenseDesignMatrix(X=valid_X, y=valid_y)
            
        elif self.state.dataset == 'svhn':
            self.train_ddm = SVHN(which_set='splitted_train')
            self.test_ddm = SVHN(which_set='test')
            self.valid_ddm = SVHN(which_set='valid')

        elif self.state.dataset == 'cifar10':

            self.train_ddm = My_CIFAR10(which_set='train', one_hot=True)
            self.test_ddm = None
            self.valid_ddm = My_CIFAR10(which_set='test', one_hot=True)

        
        if self.train_ddm is not None:
            self.nvis = self.train_ddm.X.shape[1]
            self.nout = self.train_ddm.y.shape[1]
            print "nvis, nout :", self.nvis, self.nout
            self.ntrain = self.train_ddm.X.shape[0]
            print "ntrain :", self.ntrain
        
        if self.valid_ddm is not None:
            self.nvalid = self.valid_ddm.X.shape[0]
            print "nvalid :", self.nvalid
        
        if self.test_ddm is not None:
            self.ntest = self.test_ddm.X.shape[0]
            print "ntest :", self.ntest
Пример #26
0
def random_dense_design_matrix(rng, num_examples, dim, num_classes):
    X = rng.randn(num_examples, dim)

    if num_classes:
        Y = rng.randint(0, num_classes, (num_examples, 1))
    else:
        Y = None

    return DenseDesignMatrix(X=X, y=Y)
Пример #27
0
def random_one_hot_dense_design_matrix(rng, num_examples, dim, num_classes):
    X = rng.randn(num_examples, dim)

    idx = rng.randint(0, num_classes, (num_examples, ))
    Y = np.zeros((num_examples, num_classes))
    for i in xrange(num_examples):
        Y[i, idx[i]] = 1

    return DenseDesignMatrix(X=X, y=Y)
def load_xy_data(npy_fn_x,
                 npy_fn_y,
                 start=0,
                 stop=None,
                 strip_dims=None,
                 reverse=False):
    """
    Load the data from `npy_fn_x` and `npy_fn_y`, pair them, and keep
    the rows from `start` (inclusive) to `stop` (exclusive).

    Parameters
    ----------
    npy_fn_x : str
    npy_fn_y : str
    start : int
    stop : int
        Useful for only using a part of the dataset. For data with a frame
        every 10 ms, 360000 frames would give 1 hour of data.
    strip_dims : int
        Only keep this many dimensions of each row (useful for stripping off
        deltas).
    reverse : bool
        If set, load the data by first treating `npy_fn_x` as input and
        `npy_fn_y` as output, and then the reverse.

    Return
    ------
    ddm : DenseDesignMatrix
    """

    X = np.load(npy_fn_x)
    X = X[start:stop, :strip_dims]

    Y = np.load(npy_fn_y)
    Y = Y[start:stop, :strip_dims]

    d_frame = X.shape[1]  # single frame dimension

    view_converter = DefaultViewConverter((d_frame, X.shape[1] / d_frame, 1))

    if not reverse:
        return DenseDesignMatrix(X=X, y=Y, view_converter=view_converter)
    else:
        return DenseDesignMatrix(X=np.vstack([X, Y]), y=np.vstack([Y, X]))
Пример #29
0
def train(input_dir, output_dir, activation_function, num_hidden_layers,
          num_hidden_nodes_per_layer, learning_rate, minibatch_size, stdev,
          dropout, useX1andX2, gaussian, valid_size, random_seed):

    np.random.seed(random_seed)

    print 'Loading data'
    Xtr, Ytr, Xte = load_data(input_dir, useX1andX2)
    print 'Scaling data'
    s = scale_data(Xtr, Xte)

    Xtr, Xva, Ytr, Yva = train_test_split(Xtr, Ytr, test_size=valid_size)

    dataset_tr = DenseDesignMatrix(X=Xtr, y=Ytr.reshape(len(Ytr), 1))
    dataset_va = DenseDesignMatrix(X=Xva, y=Yva.reshape(len(Yva), 1))

    _, num_features = Xtr.shape

    trainer, model = initialize_dnn(dataset_tr, dataset_va, output_dir,
                                    activation_function, num_features,
                                    num_hidden_layers,
                                    num_hidden_nodes_per_layer, learning_rate,
                                    minibatch_size, stdev, dropout, gaussian)
    trainer.main_loop()

    best_model = pylearn2.utils.serial.load(output_dir + "/NNModel_best.pkl")

    Ytr_pred = predict(Xtr, best_model)
    Yva_pred = predict(Xva, best_model)
    Yte_pred = predict(Xte, best_model)

    J_tr = mean_squared_error(Ytr_pred[:, 0], Ytr)
    J_va = mean_squared_error(Yva_pred[:, 0], Yva)

    print "Training MSE:", J_tr
    print "Validation MSE:", J_va

    print "Outputting predictions on test set"
    test_predictions_file = open(output_dir + "/predictions.csv", "w")
    test_predictions_file.write('ID,Prediction\n')
    for i in xrange(len(Yte_pred)):
        test_predictions_file.write(
            str(i + 1) + "," + str(max(0, Yte_pred[i, 0])) + "\n")
    test_predictions_file.close()
Пример #30
0
    def make_dataset(num_batches):
        m = num_batches * batch_size
        X = rng.randn(m, num_features)
        y = rng.randn(m, num_features)

        rval = DenseDesignMatrix(X=X, y=y)

        rval.yaml_src = ""  # suppress no yaml_src warning

        return rval