Exemplo n.º 1
0
def test_in_memory():
    skip_if_not_available(datasets=['mnist.hdf5'])
    # Load MNIST and get two batches
    mnist = MNIST(('train',), load_in_memory=True)
    data_stream = DataStream(mnist, iteration_scheme=SequentialScheme(
        examples=mnist.num_examples, batch_size=256))
    epoch = data_stream.get_epoch_iterator()
    for i, (features, targets) in enumerate(epoch):
        if i == 1:
            break
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(256, 512))
    mnist.close(handle)
    assert numpy.all(features == known_features)

    # Pickle the epoch and make sure that the data wasn't dumped
    with tempfile.NamedTemporaryFile(delete=False) as f:
        filename = f.name
        cPickle.dump(epoch, f)
    assert os.path.getsize(filename) < 1024 * 1024  # Less than 1MB

    # Reload the epoch and make sure that the state was maintained
    del epoch
    with open(filename, 'rb') as f:
        epoch = cPickle.load(f)
    features, targets = next(epoch)
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(512, 768))
    mnist.close(handle)
    assert numpy.all(features == known_features)
Exemplo n.º 2
0
def test_in_memory():
    skip_if_not_available(datasets=['mnist.hdf5'])
    # Load MNIST and get two batches
    mnist = MNIST('train', load_in_memory=True)
    data_stream = DataStream(mnist,
                             iteration_scheme=SequentialScheme(
                                 examples=mnist.num_examples, batch_size=256))
    epoch = data_stream.get_epoch_iterator()
    for i, (features, targets) in enumerate(epoch):
        if i == 1:
            break
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(256, 512))
    mnist.close(handle)
    assert numpy.all(features == known_features)

    # Pickle the epoch and make sure that the data wasn't dumped
    with tempfile.NamedTemporaryFile(delete=False) as f:
        filename = f.name
        cPickle.dump(epoch, f)
    assert os.path.getsize(filename) < 1024 * 1024  # Less than 1MB

    # Reload the epoch and make sure that the state was maintained
    del epoch
    with open(filename, 'rb') as f:
        epoch = cPickle.load(f)
    features, targets = next(epoch)
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(512, 768))
    mnist.close(handle)
    assert numpy.all(features == known_features)
Exemplo n.º 3
0
def test_mnist():
    skip_if_not_available(datasets=['mnist'])
    mnist_train = MNIST('train', start=20000)
    assert len(mnist_train.features) == 40000
    assert len(mnist_train.targets) == 40000
    assert mnist_train.num_examples == 40000
    mnist_test = MNIST('test', sources=('targets',))
    assert len(mnist_test.targets) == 10000
    assert mnist_test.num_examples == 10000

    first_feature, first_target = mnist_train.get_data(request=[0])
    assert first_feature.shape == (1, 784)
    assert first_feature.dtype.kind == 'f'
    assert first_target.shape == (1, 1)
    assert first_target.dtype is numpy.dtype('uint8')

    first_target, = mnist_test.get_data(request=[0, 1])
    assert first_target.shape == (2, 1)

    binary_mnist = MNIST('test', binary=True, sources=('features',))
    first_feature, = binary_mnist.get_data(request=[0])
    assert first_feature.dtype.kind == 'b'
    assert_raises(ValueError, MNIST, 'valid')

    mnist_train = cPickle.loads(cPickle.dumps(mnist_train))
    assert len(mnist_train.features) == 40000

    mnist_test_unflattened = MNIST('test', flatten=False)
    assert mnist_test_unflattened.features.shape == (10000, 28, 28)
Exemplo n.º 4
0
def test_mnist():
    skip_if_not_available(datasets=['mnist'])
    mnist_train = MNIST('train', start=20000)
    assert len(mnist_train.features) == 40000
    assert len(mnist_train.targets) == 40000
    assert mnist_train.num_examples == 40000
    mnist_test = MNIST('test', sources=('targets', ))
    assert len(mnist_test.targets) == 10000
    assert mnist_test.num_examples == 10000

    first_feature, first_target = mnist_train.get_data(request=[0])
    assert first_feature.shape == (1, 784)
    assert first_feature.dtype.kind == 'f'
    assert first_target.shape == (1, 1)
    assert first_target.dtype is numpy.dtype('uint8')

    first_target, = mnist_test.get_data(request=[0, 1])
    assert first_target.shape == (2, 1)

    binary_mnist = MNIST('test', binary=True, sources=('features', ))
    first_feature, = binary_mnist.get_data(request=[0])
    assert first_feature.dtype.kind == 'b'
    assert_raises(ValueError, MNIST, 'valid')

    mnist_train = cPickle.loads(cPickle.dumps(mnist_train))
    assert len(mnist_train.features) == 40000

    mnist_test_unflattened = MNIST('test', flatten=False)
    assert mnist_test_unflattened.features.shape == (10000, 28, 28)
Exemplo n.º 5
0
def test_mnist_test():
    skip_if_not_available(datasets=["mnist.hdf5"])

    dataset = MNIST(("test",), load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == "uint8"
    assert data.shape == (10, 1, 28, 28)
    assert labels.shape == (10, 1)
    known = numpy.array([0, 0, 0, 0, 0, 0, 84, 185, 159, 151, 60, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    assert_allclose(data[0][0][7], known)
    assert labels[0][0] == 7
    assert dataset.num_examples == 10000
    dataset.close(handle)

    stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme(10, 10))
    data = next(stream.get_epoch_iterator())[0]
    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX
Exemplo n.º 6
0
def test_mnist_train():
    skip_if_not_available(datasets=['mnist.hdf5'])

    dataset = MNIST('train', load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == 'uint8'
    assert data.shape == (10, 1, 28, 28)
    assert labels.shape == (10, 1)
    known = numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253,
                         253, 253, 253, 253, 225, 172, 253, 242, 195,  64, 0,
                         0, 0, 0])
    assert_allclose(data[0][0][6], known)
    assert labels[0][0] == 5
    assert dataset.num_examples == 60000
    dataset.close(handle)

    stream = DataStream.default_stream(
        dataset, iteration_scheme=SequentialScheme(10, 10))
    data = next(stream.get_epoch_iterator())[0]
    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX
Exemplo n.º 7
0
def test_mnist_test():
    skip_if_not_available(datasets=['mnist.hdf5'])

    dataset = MNIST('test', load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == 'uint8'
    assert data.shape == (10, 1, 28, 28)
    assert labels.shape == (10, 1)
    known = numpy.array([
        0, 0, 0, 0, 0, 0, 84, 185, 159, 151, 60, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0
    ])
    assert_allclose(data[0][0][7], known)
    assert labels[0][0] == 7
    assert dataset.num_examples == 10000
    dataset.close(handle)

    stream = DataStream.default_stream(dataset,
                                       iteration_scheme=SequentialScheme(
                                           10, 10))
    data = next(stream.get_epoch_iterator())[0]
    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX
Exemplo n.º 8
0
from fuel.schemes import SequentialScheme
from fuel.transformers import Mapping, Flatten
from blocks.graph import ComputationGraph
from blocks.monitoring import aggregation
from blocks.extensions import FinishAfter, Timing, Printing
from blocks.extensions.monitoring import (DataStreamMonitoring,
                                          TrainingDataMonitoring)
from blocks.main_loop import MainLoop

from blocks_contrib.extensions import DataStreamMonitoringAndSaving
floatX = theano.config.floatX


mnist = MNIST('train', sources=['features'])
handle = mnist.open()
data = mnist.get_data(handle, slice(0, 50000))[0]
means = data.reshape((50000, 784)).mean(axis=0)


def autocorrentropy2(X, ksize=np.inf):
    b, t, d = X.shape
    V = np.zeros((b, t, d))
    for i in range(b):
        for j in range(t):
            if ksize in (np.inf, 0., np.nan):
                V[i, j, :] = (X[i, :(t-j), :] * X[i, j:, :]).sum(axis=0) / (t-j)
            else:
                V[i, j, :] = np.exp((-ksize * (X[i, :(t-j), :]-X[i, j:, :])**2)).sum(axis=0) / (t-j)
    return V

Exemplo n.º 9
0
class BucketVisualizer:
    def __init__(self, save_to, act_table):
        self.mnist_test = MNIST(("test", ), sources=['features', 'targets'])
        self.table = self.load_act_table(save_to, act_table)

    def all_match(self, index, the_set, positive):
        if the_set is None or len(the_set) == 0:
            return True
        selected = self.table[index, the_set]
        if positive:
            matched = selected > 0
        else:
            matched = selected <= 0
        return matched.sum() == len(the_set)

    def activations_for_sample(self, index):
        return self.table[index, :]

    def positive_for_sample(self, index):
        return numpy.where(self.activations_for_sample(index) > 0)[0]

    def negative_for_sample(self, index):
        return numpy.where(self.activations_for_sample(index) <= 0)[0]

    def prediction_for_sample(self, index):
        return self.table[index, :10].argmax()

    def label_for_sample(self, index):
        return self.mnist_test.get_data(request=index)[1][0]

    def filter_image_bytes(self,
                           positive_set=None,
                           negative_set=None,
                           sort_by=None,
                           columns=100,
                           limit=None,
                           ulimit=None,
                           descending=False):
        include_indexes = [
            ind for ind in range(self.table.shape[0])
            if (self.all_match(ind, positive_set, True)
                and self.all_match(ind, negative_set, False))
        ]
        if sort_by:
            include_indexes.sort(key=lambda x: self.table[x, sort_by].sum())
        if descending:
            include_indexes.reverse()
        if limit or ulimit and not (limit and ulimit and
                                    limit + ulimit >= len(include_indexes)):
            lower = include_indexes[:limit] if limit else []
            upper = include_indexes[-ulimit:] if ulimit else []
            include_indexes = lower + upper
        count = max(1, len(include_indexes))
        grid_shape = (((count - 1) // columns + 1), min(columns, count))

        filmstrip = Filmstrip(image_shape=(28, 28), grid_shape=grid_shape)
        for i, index in enumerate(include_indexes):
            filmstrip.set_image((i // columns, i % columns),
                                self.mnist_test.get_data(request=index)[0])
        return filmstrip.save_bytes()

    def example_count(self):
        return self.table.shape[0]

    def unit_count(self):
        return self.table.shape[1]

    def load_act_table(self, save_to, act_table):
        try:
            return pickle.load(open(act_table, 'rb'))
        except FileNotFoundError:
            return self.create_act_table(save_to, act_table)

    def create_act_table(self, save_to, act_table):
        batch_size = 500
        image_size = (28, 28)
        output_size = 10
        convnet = create_lenet_5()
        layers = convnet.layers

        x = tensor.tensor4('features')
        y = tensor.lmatrix('targets')

        # Normalize input and apply the convnet
        probs = convnet.apply(x)
        cg = ComputationGraph([probs])

        def full_brick_name(brick):
            return '/'.join([''] + [b.name for b in brick.get_unique_path()])

        # Find layer outputs to probe
        outmap = OrderedDict(
            (full_brick_name(get_brick(out)), out) for out in VariableFilter(
                roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables))
        # Generate pics for biases
        biases = VariableFilter(roles=[BIAS])(cg.parameters)

        # Generate parallel array, in the same order, for outputs
        outs = [outmap[full_brick_name(get_brick(b))] for b in biases]

        # Figure work count
        error_rate = (MisclassificationRate().apply(
            y.flatten(), probs).copy(name='error_rate'))
        max_activation_table = (MaxActivationTable().apply(outs).copy(
            name='max_activation_table'))
        max_activation_table.tag.aggregation_scheme = (
            Concatenate(max_activation_table))

        model = Model([error_rate, max_activation_table])

        # Load it with trained parameters
        params = load_parameters(open(save_to, 'rb'))
        model.set_parameter_values(params)

        mnist_test_stream = DataStream.default_stream(
            self.mnist_test,
            iteration_scheme=SequentialScheme(self.mnist_test.num_examples,
                                              batch_size))

        evaluator = DatasetEvaluator([error_rate, max_activation_table])
        results = evaluator.evaluate(mnist_test_stream)
        table = results['max_activation_table']
        pickle.dump(table, open(act_table, 'wb'))
        return table
Exemplo n.º 10
0
        train(x, y)
        
print(w.get_value()) #something around 2


#https://raw.githubusercontent.com/Newmu/Theano-Tutorials/master/2_logistic_regression.py

import theano
from theano import tensor as T
import numpy as np
from fuel.datasets import MNIST
from matplotlib import pyplot, cm

dataset = MNIST(('train',), sources=('features',))
state = dataset.open()
image, = dataset.get_data(state=state, request=[1234])
pyplot.imshow(image.reshape((28, 28)), cmap=cm.Greys_r, interpolation='nearest')
pyplot.show()
dataset.close(state)

def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

def model(X, w):
    return T.nnet.softmax(T.dot(X, w))

trX, teX, trY, teY = mnist(onehot=True)

# In[3]:

mnist.num_examples


# In[4]:

mnist.sources


# In[5]:

handle = mnist.open()
data_sample = mnist.get_data(handle, [0, 1, 2])  # (ndarray, dnarray)


# In[6]:

data_sample[0].shape  # features


# In[7]:

data_sample[1].shape  # targets


# ## DataStream

# In[8]:
Exemplo n.º 12
0
class BucketVisualizer:
    def __init__(self, save_to, act_table):
        self.mnist_test = MNIST(("test",), sources=['features', 'targets'])
        self.table = self.load_act_table(save_to, act_table)

    def all_match(self, index, the_set, positive):
        if the_set is None or len(the_set) == 0:
            return True
        selected = self.table[index, the_set]
        if positive:
            matched = selected > 0
        else:
            matched = selected <= 0
        return matched.sum() == len(the_set)

    def activations_for_sample(self, index):
        return self.table[index, :]

    def positive_for_sample(self, index):
        return numpy.where(self.activations_for_sample(index) > 0)[0]

    def negative_for_sample(self, index):
        return numpy.where(self.activations_for_sample(index) <= 0)[0]

    def prediction_for_sample(self, index):
        return self.table[index, :10].argmax()

    def label_for_sample(self, index):
        return self.mnist_test.get_data(request=index)[1][0]

    def filter_image_bytes(self,
            positive_set=None, negative_set=None, sort_by=None,
            columns=100, limit=None, ulimit=None, descending=False):
        include_indexes = [ind for ind in range(self.table.shape[0])
                if (self.all_match(ind, positive_set, True) and
                    self.all_match(ind, negative_set, False))]
        if sort_by:
            include_indexes.sort(key=lambda x: self.table[x, sort_by].sum())
        if descending:
            include_indexes.reverse()
        if limit or ulimit and not(
                limit and ulimit and limit + ulimit >= len(include_indexes)):
            lower = include_indexes[:limit] if limit else []
            upper = include_indexes[-ulimit:] if ulimit else []
            include_indexes = lower + upper
        count = max(1, len(include_indexes))
        grid_shape = (((count - 1) // columns + 1), min(columns, count))

        filmstrip = Filmstrip(image_shape=(28, 28), grid_shape=grid_shape)
        for i, index in enumerate(include_indexes):
            filmstrip.set_image((i // columns, i % columns),
                        self.mnist_test.get_data(request=index)[0])
        return filmstrip.save_bytes()

    def example_count(self):
        return self.table.shape[0]

    def unit_count(self):
        return self.table.shape[1]

    def load_act_table(self, save_to, act_table):
        try:
            return pickle.load(open(act_table, 'rb'))
        except FileNotFoundError:
            return self.create_act_table(save_to, act_table)

    def create_act_table(self, save_to, act_table):
        batch_size = 500
        image_size = (28, 28)
        output_size = 10
        convnet = create_lenet_5()
        layers = convnet.layers

        x = tensor.tensor4('features')
        y = tensor.lmatrix('targets')

        # Normalize input and apply the convnet
        probs = convnet.apply(x)
        cg = ComputationGraph([probs])

        def full_brick_name(brick):
            return '/'.join([''] + [b.name for b in brick.get_unique_path()])

        # Find layer outputs to probe
        outmap = OrderedDict((full_brick_name(get_brick(out)), out)
                for out in VariableFilter(
                    roles=[OUTPUT], bricks=[Convolutional, Linear])(
                        cg.variables))
        # Generate pics for biases
        biases = VariableFilter(roles=[BIAS])(cg.parameters)

        # Generate parallel array, in the same order, for outputs
        outs = [outmap[full_brick_name(get_brick(b))] for b in biases]

        # Figure work count
        error_rate = (MisclassificationRate().apply(y.flatten(), probs)
                      .copy(name='error_rate'))
        max_activation_table = (MaxActivationTable().apply(
                outs).copy(name='max_activation_table'))
        max_activation_table.tag.aggregation_scheme = (
                Concatenate(max_activation_table))

        model = Model([
            error_rate,
            max_activation_table])

        # Load it with trained parameters
        params = load_parameters(open(save_to, 'rb'))
        model.set_parameter_values(params)

        mnist_test_stream = DataStream.default_stream(
            self.mnist_test,
            iteration_scheme=SequentialScheme(
                self.mnist_test.num_examples, batch_size))

        evaluator = DatasetEvaluator([
            error_rate,
            max_activation_table
            ])
        results = evaluator.evaluate(mnist_test_stream)
        table = results['max_activation_table']
        pickle.dump(table, open(act_table, 'wb'))
        return table