Exemplo n.º 1
0
    def __init__(self, array, batch_size=None):
        try:
            from skdata.mnist.view import OfficialVectorClassification
        except ImportError:
            from skdata.mnist.views import OfficialVectorClassification
        self.mnist = OfficialVectorClassification()

        self.train_idx = self.mnist.fit_idxs
        self.val_idx = self.mnist.val_idxs
        self.test_idx = self.mnist.tst_idxs

        self.N_train = self.train_idx.shape[0]
        self.N_val = self.val_idx.shape[0]
        self.N_test = self.test_idx.shape[0]
        self.D = self.mnist.all_vectors.shape[1]

        if array == 'train':
            self.data = gpuarray.to_gpu(
                self.mnist.all_vectors[self.train_idx].astype(np.float32) /
                255.,
                allocator=memory_pool.allocate)
            targets = self.mnist.all_labels[self.train_idx]
            labels_soft = np.zeros((self.N_train, 10), dtype=np.float32)
            labels_soft[range(self.N_train), targets] = 1.
            self.targets = gpuarray.to_gpu(labels_soft,
                                           allocator=memory_pool.allocate)
            self.N = self.N_train
        elif array == 'val':
            self.data = gpuarray.to_gpu(
                self.mnist.all_vectors[self.val_idx].astype(np.float32) / 255.,
                allocator=memory_pool.allocate)
            self.N = self.N_val
            targets = self.mnist.all_labels[self.val_idx]
            labels_soft = np.zeros((self.N_train, 10), dtype=np.float32)
            labels_soft[range(self.N_val), targets] = 1.
            self.targets = gpuarray.to_gpu(labels_soft,
                                           allocator=memory_pool.allocate)
        elif array == 'test':
            self.data = gpuarray.to_gpu(
                self.mnist.all_vectors[self.test_idx].astype(np.float32) /
                255.,
                allocator=memory_pool.allocate)
            targets = self.mnist.all_labels[self.test_idx]
            labels_soft = np.zeros((self.N_test, 10), dtype=np.float32)
            labels_soft[range(self.N_test), targets] = 1.
            self.targets = gpuarray.to_gpu(labels_soft,
                                           allocator=memory_pool.allocate)
            self.N = self.N_test
        else:
            raise ValueError('Unknown partition "%s"' % array)

        self.batch_size = batch_size if batch_size is not None else self.N
        self.i = 0
        self._make_batches()
Exemplo n.º 2
0
                                           **input_shapes)
# The above executor computes gradients. When evaluating test data we don't need this.
# We want this executor to share weights with the above one, so we will use bind
# (instead of simple_bind) and use the other executor's arguments.
executor_test = cost_classification.bind(ctx=mx.cpu(0),
                                         grad_req='null',
                                         args=executor.arg_arrays)

# initialize the weights
for r in executor.arg_arrays:
    r[:] = np.random.randn(*r.shape) * 0.02

# Using skdata to get mnist data. This is for portability. Can sub in any data loading you like.
from skdata.mnist.view import OfficialVectorClassification

data = OfficialVectorClassification()
trIdx = data.sel_idxs[:]
teIdx = data.val_idxs[:]
for epoch in range(20):
    np.random.shuffle(trIdx)

    for x in range(0, len(trIdx), batch_size):
        # extract a batch from mnist
        batchX = data.all_vectors[trIdx[x:x + batch_size]]
        batchY = data.all_labels[trIdx[x:x + batch_size]]

        # our executor was bound to 128 size. Throw out non matching batches.
        if batchX.shape[0] != batch_size:
            continue
        # Store batch in executor 'data'
        executor.arg_dict['data'][:] = batchX / 255.
Exemplo n.º 3
0
class MNISTDataProvider(MiniBatchDataProvider):
    """``DataProvider`` that automatically provides data from the
    `MNIST <http://yann.lecun.com/exdb/mnist/>`_ data set of
    hand-written digits.

    Depends on the `skdata <http://jaberg.github.io/skdata/>`_ package.

    :param array: {'train', 'val', 'test'}
        Whether to use the official training, validation, or test data split of MNIST.
    :param batch_size: The size of mini-batches.
    """

    try:
        from skdata.mnist.view import OfficialVectorClassification
    except ImportError:
        from skdata.mnist.views import OfficialVectorClassification
    mnist = OfficialVectorClassification()

    def __init__(self, array, batch_size=None):

        self.train_idx = self.mnist.fit_idxs
        self.val_idx = self.mnist.val_idxs
        self.test_idx = self.mnist.tst_idxs

        self.N_train = self.train_idx.shape[0]
        self.N_val = self.val_idx.shape[0]
        self.N_test = self.test_idx.shape[0]
        self.D = self.mnist.all_vectors.shape[1]

        if array == 'train':
            self.data = gpuarray.to_gpu(
                self.mnist.all_vectors[self.train_idx].astype(np.float32) /
                255.)
            targets = self.mnist.all_labels[self.train_idx]
            labels_soft = np.zeros((self.N_train, 10), dtype=np.float32)
            labels_soft[range(self.N_train), targets] = 1.
            self.targets = gpuarray.to_gpu(labels_soft)
            self.N = self.N_train
        elif array == 'val':
            self.data = gpuarray.to_gpu(
                self.mnist.all_vectors[self.val_idx].astype(np.float32) / 255.)
            self.N = self.N_val
            targets = self.mnist.all_labels[self.val_idx]
            labels_soft = np.zeros((self.N_train, 10), dtype=np.float32)
            labels_soft[range(self.N_val), targets] = 1.
            self.targets = gpuarray.to_gpu(labels_soft)
        elif array == 'test':
            self.data = gpuarray.to_gpu(
                self.mnist.all_vectors[self.test_idx].astype(np.float32) /
                255.)
            targets = self.mnist.all_labels[self.test_idx]
            labels_soft = np.zeros((self.N_test, 10), dtype=np.float32)
            labels_soft[range(self.N_test), targets] = 1.
            self.targets = gpuarray.to_gpu(labels_soft)
            self.N = self.N_test
        else:
            raise ValueError('Unknown partition "%s"' % array)

        self.batch_size = batch_size if batch_size is not None else self.N
        self.i = 0
        self._make_batches()
Exemplo n.º 4
0
class MNISTDataProvider(DataProvider):
    """``DataProvider`` that automatically provides data from the
    `MNIST <http://yann.lecun.com/exdb/mnist/>`_ data set of
    hand-written digits.

    Depends on the `skdata <http://jaberg.github.io/skdata/>`_ package.

    :param array: {'train', 'val', 'test'}
        Whether to use the official training, validation, or test data split of MNIST.
    :param batch_size: The size of mini-batches.
    """

    from skdata.mnist.view import OfficialVectorClassification
    mnist = OfficialVectorClassification()

    def __init__(self, array, batch_size=None):

        self.train_idx = self.mnist.fit_idxs
        self.val_idx = self.mnist.val_idxs
        self.test_idx = self.mnist.tst_idxs

        self.N_train = self.train_idx.shape[0]
        self.N_val = self.val_idx.shape[0]
        self.N_test = self.test_idx.shape[0]
        self.D = self.mnist.all_vectors.shape[1]

        if array == 'train':
            self.data = gpuarray.to_gpu(
                self.mnist.all_vectors[self.train_idx].astype(np.float32) /
                255.)
            targets = self.mnist.all_labels[self.train_idx]
            labels_soft = np.zeros((self.N_train, 10), dtype=np.float32)
            labels_soft[range(self.N_train), targets] = 1.
            self.targets = gpuarray.to_gpu(labels_soft)
            self.N = self.N_train
        elif array == 'val':
            self.data = gpuarray.to_gpu(
                self.mnist.all_vectors[self.val_idx].astype(np.float32) / 255.)
            self.N = self.N_val
            targets = self.mnist.all_labels[self.val_idx]
            labels_soft = np.zeros((self.N_train, 10), dtype=np.float32)
            labels_soft[range(self.N_val), targets] = 1.
            self.targets = gpuarray.to_gpu(labels_soft)
        elif array == 'test':
            self.data = gpuarray.to_gpu(
                self.mnist.all_vectors[self.test_idx].astype(np.float32) /
                255.)
            targets = self.mnist.all_labels[self.test_idx]
            labels_soft = np.zeros((self.N_test, 10), dtype=np.float32)
            labels_soft[range(self.N_test), targets] = 1.
            self.targets = gpuarray.to_gpu(labels_soft)
            self.N = self.N_test
        else:
            raise ValueError('Unknown partition "%s"' % array)

        self.batch_size = batch_size if batch_size is not None else self.N
        self.i = 0

    def __getitem__(self, batch_idx):
        if self.batch_size is None:
            if batch_idx == 0:
                return self.data, self.targets
            else:
                raise ValueError("batch_idx out of bounds")
        else:
            minibatch_data = self.data[batch_idx *
                                       self.batch_size:(batch_idx + 1) *
                                       self.batch_size]
            minibatch_targets = self.targets[batch_idx *
                                             self.batch_size:(batch_idx + 1) *
                                             self.batch_size]
            return minibatch_data, minibatch_targets

    def next(self):
        if self.i >= self.N:
            self.i = 0
            raise StopIteration

        if self.batch_size is None:
            self.i += self.N
            return self.data, self.targets
        else:
            minibatch_data = self.data[self.i:self.i + self.batch_size]
            minibatch_targets = self.targets[self.i:self.i + self.batch_size]
            self.i += self.batch_size
            return minibatch_data, minibatch_targets