Exemplo n.º 1
0
    def __init__(self,
                 preprocessed_dataset,
                 preprocessor,
                 convert_to_one_hot=True,
                 start=None,
                 stop=None,
                 axes=['b', 0, 1, 'c']):

        self.args = locals()

        self.preprocessed_dataset = preprocessed_dataset
        self.preprocessor = preprocessor
        self.rng = self.preprocessed_dataset.rng
        self.data_specs = preprocessed_dataset.data_specs
        self.X_space = preprocessed_dataset.X_space
        self.X_topo_space = preprocessed_dataset.X_topo_space
        self.view_converter = preprocessed_dataset.view_converter

        self.y = preprocessed_dataset.y
        if convert_to_one_hot:
            if not (self.y.min() == 0):
                raise AssertionError("Expected y.min == 0 but y.min == %g" %
                                     self.y.min())
            nclass = self.y.max() + 1
            y = np.zeros((self.y.shape[0], nclass), dtype='float32')
            for i in xrange(self.y.shape[0]):
                y[i, self.y[i]] = 1.
            self.y = y
            assert self.y is not None
            space, source = self.data_specs
            space.components[source.index('targets')].dim = nclass

        if control.get_load_data():
            if start is not None:
                self.X = preprocessed_dataset.X[start:stop, :]
                if self.y is not None:
                    self.y = self.y[start:stop, :]
                assert self.X.shape[0] == stop-start
            else:
                self.X = preprocessed_dataset.X
        else:
            self.X = None
        if self.X is not None:
            if self.y is not None:
                assert self.y.shape[0] == self.X.shape[0]

        #self.mn = self.X.min()
        #self.mx = self.X.max()

        if getattr(preprocessor, "inv_P_", None) is None:
            warnings.warn("ZCA preprocessor.inv_P_ was none. Computing "
                          "inverse of preprocessor.P_ now. This will take "
                          "some time. For efficiency, it is recommended that "
                          "in the future you compute the inverse in ZCA.fit() "
                          "instead, by passing it compute_inverse=True.")
            logger.info('inverting...')
            preprocessor.inv_P_ = np.linalg.inv(preprocessor.P_)
            logger.info('...done inverting')

        self.view_converter.set_axes(axes)
Exemplo n.º 2
0
    def __setstate__(self, d):
        """
        .. todo::

            WRITEME
        """

        if d['design_loc'] is not None:
            if control.get_load_data():
                d['X'] = np.load(d['design_loc'])
            else:
                d['X'] = None

        if d['compress']:
            X = d['X']
            mx = d['compress_max']
            mn = d['compress_min']
            del d['compress_max']
            del d['compress_min']
            d['X'] = 0
            self.__dict__.update(d)
            if X is not None:
                self.X = np.cast['float32'](X) * mx / 255. + mn
            else:
                self.X = None
        else:
            self.__dict__.update(d)

        # To be able to unpickle older data after the addition of
        # the data_specs mechanism
        if not all(m in d for m in ('data_specs', 'X_space',
                                    '_iter_data_specs', 'X_topo_space')):
            X_space = VectorSpace(dim=self.X.shape[1])
            X_source = 'features'
            if self.y is None:
                space = X_space
                source = X_source
            else:
                y_space = VectorSpace(dim=self.y.shape[-1])
                y_source = 'targets'

                space = CompositeSpace((X_space, y_space))
                source = (X_source, y_source)

            self.data_specs = (space, source)
            self.X_space = X_space
            self._iter_data_specs = (X_space, X_source)

            view_converter = d.get('view_converter', None)
            if view_converter is not None:
                # Get the topo_space from the view_converter
                if not hasattr(view_converter, 'topo_space'):
                    raise NotImplementedError("Not able to get a topo_space "
                                              "from this converter: %s"
                                              % view_converter)

                # self.X_topo_space stores a "default" topological space that
                # will be used only when self.iterator is called without a
                # data_specs, and with "topo=True", which is deprecated.
                self.X_topo_space = view_converter.topo_space
Exemplo n.º 3
0
def from_dataset(dataset, num_examples):
    """
    .. todo::

        WRITEME
    """
    try:

        V, y = dataset.get_batch_topo(num_examples, True)

    except:

        # This patches a case where control.get_load_data() is false so
        # dataset.X is None This logic should be removed whenever we implement
        # lazy loading

        if isinstance(dataset, DenseDesignMatrix) and dataset.X is None and not control.get_load_data():
            warnings.warn("from_dataset wasn't able to make subset of " "dataset, using the whole thing")
            return DenseDesignMatrix(X=None, view_converter=dataset.view_converter)
        raise

    rval = DenseDesignMatrix(topo_view=V, y=y)
    rval.adjust_for_viewer = dataset.adjust_for_viewer

    return rval
Exemplo n.º 4
0
    def __setstate__(self, d):
        """
        .. todo::

            WRITEME
        """

        if d['design_loc'] is not None:
            if control.get_load_data():
                d['X'] = np.load(d['design_loc'])
            else:
                d['X'] = None

        if d['compress']:
            X = d['X']
            mx = d['compress_max']
            mn = d['compress_min']
            del d['compress_max']
            del d['compress_min']
            d['X'] = 0
            self.__dict__.update(d)
            if X is not None:
                self.X = np.cast['float32'](X) * mx / 255. + mn
            else:
                self.X = None
        else:
            self.__dict__.update(d)

        # To be able to unpickle older data after the addition of
        # the data_specs mechanism
        if not all(m in d for m in ('data_specs', 'X_space',
                                    '_iter_data_specs', 'X_topo_space')):
            X_space = VectorSpace(dim=self.X.shape[1])
            X_source = 'features'
            if self.y is None:
                space = X_space
                source = X_source
            else:
                y_space = VectorSpace(dim=self.y.shape[-1])
                y_source = 'targets'

                space = CompositeSpace((X_space, y_space))
                source = (X_source, y_source)

            self.data_specs = (space, source)
            self.X_space = X_space
            self._iter_data_specs = (X_space, X_source)

            view_converter = d.get('view_converter', None)
            if view_converter is not None:
                # Get the topo_space from the view_converter
                if not hasattr(view_converter, 'topo_space'):
                    raise NotImplementedError("Not able to get a topo_space "
                            "from this converter: %s"
                            % view_converter)

                # self.X_topo_space stores a "default" topological space that
                # will be used only when self.iterator is called without a
                # data_specs, and with "topo=True", which is deprecated.
                self.X_topo_space = view_converter.topo_space
Exemplo n.º 5
0
    def __init__(self,
                 preprocessed_dataset,
                 preprocessor,
                 convert_to_one_hot=True,
                 start=None,
                 stop=None,
                 axes=['b', 0, 1, 'c']):

        self.args = locals()

        self.preprocessed_dataset = preprocessed_dataset
        self.preprocessor = preprocessor
        self.rng = self.preprocessed_dataset.rng
        self.data_specs = preprocessed_dataset.data_specs
        self.X_space = preprocessed_dataset.X_space
        self.X_topo_space = preprocessed_dataset.X_topo_space
        self.view_converter = preprocessed_dataset.view_converter

        self.y = preprocessed_dataset.y
        if convert_to_one_hot:
            if not (self.y.min() == 0):
                raise AssertionError("Expected y.min == 0 but y.min == %g" %
                                     self.y.min())
            nclass = self.y.max() + 1
            y = np.zeros((self.y.shape[0], nclass), dtype='float32')
            for i in xrange(self.y.shape[0]):
                y[i, self.y[i]] = 1.
            self.y = y
            assert self.y is not None
            space, source = self.data_specs
            space.components[source.index('targets')].dim = nclass

        if control.get_load_data():
            if start is not None:
                self.X = preprocessed_dataset.X[start:stop, :]
                if self.y is not None:
                    self.y = self.y[start:stop, :]
                assert self.X.shape[0] == stop - start
            else:
                self.X = preprocessed_dataset.X
        else:
            self.X = None
        if self.X is not None:
            if self.y is not None:
                assert self.y.shape[0] == self.X.shape[0]

        #self.mn = self.X.min()
        #self.mx = self.X.max()

        if preprocessor.inv_P_ is None:
            warnings.warn("ZCA preprocessor.inv_P_ was none. Computing "
                          "inverse of preprocessor.P_ now. This will take "
                          "some time. For efficiency, it is recommended that "
                          "in the future you compute the inverse in ZCA.fit() "
                          "instead, by passing it compute_inverse=True.")
            logger.info('inverting...')
            preprocessor.inv_P_ = np.linalg.inv(preprocessor.P_)
            logger.info('...done inverting')

        self.view_converter.set_axes(axes)
Exemplo n.º 6
0
def from_dataset(dataset, num_examples):
    """
    .. todo::

        WRITEME
    """
    try:

        V, y = dataset.get_batch_topo(num_examples, True)

    except:

        # This patches a case where control.get_load_data() is false so
        # dataset.X is None This logic should be removed whenever we implement
        # lazy loading

        if isinstance(dataset, DenseDesignMatrix) and \
           dataset.X is None and \
           not control.get_load_data():
            warnings.warn("from_dataset wasn't able to make subset of "
                          "dataset, using the whole thing")
            return DenseDesignMatrix(X=None,
                                     view_converter=dataset.view_converter)
        raise

    rval = DenseDesignMatrix(topo_view=V, y=y)
    rval.adjust_for_viewer = dataset.adjust_for_viewer

    return rval
Exemplo n.º 7
0
    def __init__(self,
                 preprocessed_dataset,
                 preprocessor,
                 start=None,
                 stop=None,
                 axes=None):

        if axes is not None:
            warnings.warn("The axes argument to ZCA_Dataset no longer has "
                          "any effect. Its role is now carried out by the "
                          "Space you pass to Dataset.iterator. You should "
                          "remove 'axes' arguments from calls to "
                          "ZCA_Dataset. This argument may be removed from "
                          "the library after 2015-05-05.")
        self.args = locals()

        self.preprocessed_dataset = preprocessed_dataset
        self.preprocessor = preprocessor
        self.rng = self.preprocessed_dataset.rng
        self.data_specs = preprocessed_dataset.data_specs
        self.X_space = preprocessed_dataset.X_space
        self.X_topo_space = preprocessed_dataset.X_topo_space
        self.view_converter = preprocessed_dataset.view_converter

        self.y = preprocessed_dataset.y
        self.y_labels = preprocessed_dataset.y_labels

        # Defined up here because PEP8 requires excessive indenting if defined
        # where it is used.
        msg = ("Expected self.y to have dim 2, but it has %d. Maybe you are "
               "loading from an outdated pickle file?")
        if control.get_load_data():
            if start is not None:
                self.X = preprocessed_dataset.X[start:stop, :]
                if self.y is not None:
                    if self.y.ndim != 2:
                        raise ValueError(msg % self.y.ndim)
                    self.y = self.y[start:stop, :]
                assert self.X.shape[0] == stop - start
            else:
                self.X = preprocessed_dataset.X
        else:
            self.X = None
        if self.X is not None:
            if self.y is not None:
                assert self.y.shape[0] == self.X.shape[0]

        if getattr(preprocessor, "inv_P_", None) is None:
            warnings.warn("ZCA preprocessor.inv_P_ was none. Computing "
                          "inverse of preprocessor.P_ now. This will take "
                          "some time. For efficiency, it is recommended that "
                          "in the future you compute the inverse in ZCA.fit() "
                          "instead, by passing it compute_inverse=True.")
            logger.info('inverting...')
            preprocessor.inv_P_ = np.linalg.inv(preprocessor.P_)
            logger.info('...done inverting')
Exemplo n.º 8
0
    def __init__(self,
                 preprocessed_dataset,
                 preprocessor,
                 start=None,
                 stop=None,
                 axes=None):

        if axes is not None:
            warnings.warn("The axes argument to ZCA_Dataset no longer has "
                          "any effect. Its role is now carried out by the "
                          "Space you pass to Dataset.iterator. You should "
                          "remove 'axes' arguments from calls to "
                          "ZCA_Dataset. This argument may be removed from "
                          "the library after 2015-05-05.")
        self.args = locals()

        self.preprocessed_dataset = preprocessed_dataset
        self.preprocessor = preprocessor
        self.rng = self.preprocessed_dataset.rng
        self.data_specs = preprocessed_dataset.data_specs
        self.X_space = preprocessed_dataset.X_space
        self.X_topo_space = preprocessed_dataset.X_topo_space
        self.view_converter = preprocessed_dataset.view_converter

        self.y = preprocessed_dataset.y
        self.y_labels = preprocessed_dataset.y_labels

        # Defined up here because PEP8 requires excessive indenting if defined
        # where it is used.
        msg = ("Expected self.y to have dim 2, but it has %d. Maybe you are "
               "loading from an outdated pickle file?")
        if control.get_load_data():
            if start is not None:
                self.X = preprocessed_dataset.X[start:stop, :]
                if self.y is not None:
                    if self.y.ndim != 2:
                        raise ValueError(msg % self.y.ndim)
                    self.y = self.y[start:stop, :]
                assert self.X.shape[0] == stop - start
            else:
                self.X = preprocessed_dataset.X
        else:
            self.X = None
        if self.X is not None:
            if self.y is not None:
                assert self.y.shape[0] == self.X.shape[0]

        if getattr(preprocessor, "inv_P_", None) is None:
            warnings.warn("ZCA preprocessor.inv_P_ was none. Computing "
                          "inverse of preprocessor.P_ now. This will take "
                          "some time. For efficiency, it is recommended that "
                          "in the future you compute the inverse in ZCA.fit() "
                          "instead, by passing it compute_inverse=True.")
            logger.info('inverting...')
            preprocessor.inv_P_ = np.linalg.inv(preprocessor.P_)
            logger.info('...done inverting')
Exemplo n.º 9
0
    def __init__(self,
                 preprocessed_dataset,
                 preprocessor,
                 convert_to_one_hot=True,
                 start=None,
                 stop=None,
                 axes=['b', 0, 1, 'c']):

        self.args = locals()

        self.preprocessed_dataset = preprocessed_dataset
        self.preprocessor = preprocessor
        self.rng = self.preprocessed_dataset.rng
        self.data_specs = preprocessed_dataset.data_specs
        self.X_space = preprocessed_dataset.X_space
        self.X_topo_space = preprocessed_dataset.X_topo_space
        self.view_converter = preprocessed_dataset.view_converter

        self.y = preprocessed_dataset.y
        if convert_to_one_hot:
            if not (self.y.min() == 0):
                raise AssertionError("Expected y.min == 0 but y.min == " +
                                     str(self.y.min()))
            nclass = self.y.max() + 1
            y = np.zeros((self.y.shape[0], nclass), dtype='float32')
            for i in xrange(self.y.shape[0]):
                y[i, self.y[i]] = 1.
            self.y = y
            assert self.y is not None
            space, source = self.data_specs
            space.components[source.index('targets')].dim = nclass

        if control.get_load_data():
            if start is not None:
                self.X = preprocessed_dataset.X[start:stop, :]
                if self.y is not None:
                    self.y = self.y[start:stop, :]
                assert self.X.shape[0] == stop - start
            else:
                self.X = preprocessed_dataset.X
        else:
            self.X = None
        if self.X is not None:
            if self.y is not None:
                assert self.y.shape[0] == self.X.shape[0]

        #self.mn = self.X.min()
        #self.mx = self.X.max()
        print 'inverting...'
        preprocessor.invert()
        print '...done inverting'

        self.view_converter.axes = axes
Exemplo n.º 10
0
    def __init__(self,
            preprocessed_dataset,
            preprocessor,
            convert_to_one_hot = True,
            start = None,
            stop = None,
            axes = ['b', 0, 1, 'c']):

        self.args = locals()

        self.preprocessed_dataset = preprocessed_dataset
        self.preprocessor = preprocessor
        self.rng = self.preprocessed_dataset.rng
        self.data_specs = preprocessed_dataset.data_specs
        self.X_space = preprocessed_dataset.X_space
        self.X_topo_space = preprocessed_dataset.X_topo_space
        self.view_converter = preprocessed_dataset.view_converter

        self.y = preprocessed_dataset.y
        if convert_to_one_hot:
            if not ( self.y.min() == 0):
                raise AssertionError("Expected y.min == 0 but y.min == "+str(self.y.min()))
            nclass = self.y.max() + 1
            y = np.zeros((self.y.shape[0], nclass), dtype='float32')
            for i in xrange(self.y.shape[0]):
                y[i,self.y[i]] = 1.
            self.y = y
            assert self.y is not None
            space, source = self.data_specs
            space.components[source.index('targets')].dim = nclass

        if control.get_load_data():
            if start is not None:
                self.X = preprocessed_dataset.X[start:stop,:]
                if self.y is not None:
                    self.y = self.y[start:stop,:]
                assert self.X.shape[0] == stop-start
            else:
                self.X = preprocessed_dataset.X
        else:
            self.X = None
        if self.X is not None:
            if self.y is not None:
                assert self.y.shape[0] == self.X.shape[0]


        #self.mn = self.X.min()
        #self.mx = self.X.max()
        print 'inverting...'
        preprocessor.invert()
        print '...done inverting'

        self.view_converter.axes = axes
Exemplo n.º 11
0
    def __init__(self,
                 preprocessed_dataset,
                 preprocessor,
                 start=None,
                 stop=None,
                 axes=['b', 0, 1, 'c']):
        """
        .. todo::

            WRITEME
        """
        self.args = locals()

        self.preprocessed_dataset = preprocessed_dataset
        self.preprocessor = preprocessor
        self.rng = self.preprocessed_dataset.rng
        self.data_specs = preprocessed_dataset.data_specs
        self.X_space = preprocessed_dataset.X_space
        self.X_topo_space = preprocessed_dataset.X_topo_space
        self.view_converter = preprocessed_dataset.view_converter

        self.y = preprocessed_dataset.y
        self.y_labels = preprocessed_dataset.y_labels

        if control.get_load_data():
            if start is not None:
                self.X = preprocessed_dataset.X[start:stop, :]
                if self.y is not None:
                    self.y = self.y[start:stop, :]
                assert self.X.shape[0] == stop - start
            else:
                self.X = preprocessed_dataset.X
        else:
            self.X = None
        if self.X is not None:
            if self.y is not None:
                assert self.y.shape[0] == self.X.shape[0]

        # self.mn = self.X.min()
        # self.mx = self.X.max()

        if getattr(preprocessor, "inv_P_", None) is None:
            warnings.warn("ZCA preprocessor.inv_P_ was none. Computing "
                          "inverse of preprocessor.P_ now. This will take "
                          "some time. For efficiency, it is recommended that "
                          "in the future you compute the inverse in ZCA.fit() "
                          "instead, by passing it compute_inverse=True.")
            logger.info('inverting...')
            preprocessor.inv_P_ = np.linalg.inv(preprocessor.P_)
            logger.info('...done inverting')

        self.view_converter.set_axes(axes)
Exemplo n.º 12
0
 def __init__(self, which_set='train', center=False, start=None, stop=None,
              axes=['b', 'c', 0, 1], preprocessor=None,
              fit_preprocessor=False, fit_test_preprocessor=False):
     self.shape = (8, 35, 57)
     self.size = {'train': 2849, 'valid': 2849, 'test': 2849}
     self.range = (-10, 10)
     self.path = "${PYLEARN2_DATA_PATH}/ecmwf/"
     self.set_path = {'train': 'ecmwf.train', 'valid': 'ecmwf.val', 'test': 'ecmwf.test'}
     self.args = locals()
     if which_set not in ['train', 'valid', 'test']:
         raise ValueError(
             'Unrecognized which_set value "%s".' % (which_set,) +
             '". Valid values are ["train","valid","test"].')
     path = self.path + self.set_path[which_set]
     if control.get_load_data():
         path = serial.preprocess(path)
         datasetCache = cache.datasetCache
         path = datasetCache.cache_file(path)
         X, topo_view, y = self._read_ecmwf(path, which_set)
     else:
         X = np.random.rand(self.size[which_set], np.prod(self.shape))
         topo_view = np.random.rand(self.size[which_set]*np.prod(self.shape))
         y = np.random.randint(self.range[0], self.range[1], (self.size[which_set], 1))
     (m, v, r, c) = topo_view.shape
     if center:
         topo_view -= topo_view.mean(axis=0)
     super(ECMWF, self).__init__(X=X, topo_view=topo_view, y=y, axes=axes)
     assert not np.any(np.isnan(self.X))
     if start is not None:
         assert start >= 0
         if stop > self.X.shape[0]:
             raise ValueError('stop=' + str(stop) + '>' +
                              'm=' + str(self.X.shape[0]))
         assert stop > start
         self.X = self.X[start:stop, :]
         if self.X.shape[0] != stop - start:
             raise ValueError("X.shape[0]: %d. start: %d stop: %d"
                              % (self.X.shape[0], start, stop))
         if len(self.y.shape) > 1:
             self.y = self.y[start:stop, :]
         else:
             self.y = self.y[start:stop]
         assert self.y.shape[0] == stop - start
     if which_set == 'test':
         assert fit_test_preprocessor is None or \
             (fit_preprocessor == fit_test_preprocessor)
     if self.X is not None and preprocessor:
         preprocessor.apply(self, fit_preprocessor)
Exemplo n.º 13
0
    def __init__(self, which_set, center = False,
            one_hot = False, binarize = False,
            axes=['b', 0, 1, 'c'],
            preprocessor = ZCA(),
            fit_preprocessor = False,
            fit_test_preprocessor = False):

        self.args = locals()
        print "==========IIII LOVVEEEE YOOOOOUU========"
        def dimshuffle(b01c):
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():
            path = os.environ['PYLEARN2_DATA_PATH'] + '/faceEmo/'

            if which_set == 'train':
                X = np.load(path + 'train_X.npy').astype('float32')
                y = np.load(path + 'train_y.npy').astype('float32')
            else:
#                 import pdb
#                 pdb.set_trace()
                assert which_set == 'test'
                X = np.load(path + 'test_X.npy').astype('float32')
                y = np.load(path + 'test_y.npy').astype('float32')
            
            if binarize:
                X = (X > 0.5).astype('float32')

            self.one_hot = one_hot
            if one_hot:
                one_hot = np.zeros((y.shape[0],3),dtype='float32')
                for i in xrange(y.shape[0]):
                    one_hot[i,y[i]] = 1.
                y = one_hot

            if center:
                X -= X.mean(axis=0)

            super(FaceEmo,self).__init__(X = X, y = y)

        if which_set == 'test':
            assert fit_test_preprocessor is None or (fit_preprocessor == fit_test_preprocessor)

        if self.X is not None and preprocessor:
            preprocessor.fit(self.X)
            preprocessor.apply(self, fit_preprocessor)
Exemplo n.º 14
0
    def __init__(self,
                 preprocessed_dataset,
                 preprocessor,
                 start=None,
                 stop=None,
                 axes=None):

        if axes is not None:
            warnings.warn("The axes argument to ZCA_Dataset no longer has "
                          "any effect. Its role is now carried out by the "
                          "Space you pass to Dataset.iterator. You should "
                          "remove 'axes' arguments from calls to "
                          "ZCA_Dataset. This argument may be removed from "
                          "the library after 2015-05-05.")
        self.args = locals()

        self.preprocessed_dataset = preprocessed_dataset
        self.preprocessor = preprocessor
        self.rng = self.preprocessed_dataset.rng
        self.data_specs = preprocessed_dataset.data_specs
        self.X_space = preprocessed_dataset.X_space
        self.X_topo_space = preprocessed_dataset.X_topo_space
        self.view_converter = preprocessed_dataset.view_converter

        self.y = preprocessed_dataset.y
        self.y_labels = preprocessed_dataset.y_labels

        # Defined up here because PEP8 requires excessive indenting if defined
        # where it is used.
        msg = ("Expected self.y to have dim 2, but it has %d. Maybe you are "
               "loading from an outdated pickle file?")
        if control.get_load_data():
            if start is not None:
                self.X = preprocessed_dataset.X[start:stop, :]
                if self.y is not None:
                    if self.y.ndim != 2:
                        raise ValueError(msg % self.y.ndim)
                    self.y = self.y[start:stop, :]
                assert self.X.shape[0] == stop - start
            else:
                self.X = preprocessed_dataset.X
        else:
            self.X = None
        if self.X is not None:
            if self.y is not None:
                assert self.y.shape[0] == self.X.shape[0]
Exemplo n.º 15
0
    def __init__(self,
                 preprocessed_dataset,
                 preprocessor,
                 start=None,
                 stop=None,
                 axes=None):

        if axes is not None:
            warnings.warn("The axes argument to ZCA_Dataset no longer has "
                          "any effect. Its role is now carried out by the "
                          "Space you pass to Dataset.iterator. You should "
                          "remove 'axes' arguments from calls to "
                          "ZCA_Dataset. This argument may be removed from "
                          "the library after 2015-05-05.")
        self.args = locals()

        self.preprocessed_dataset = preprocessed_dataset
        self.preprocessor = preprocessor
        self.rng = self.preprocessed_dataset.rng
        self.data_specs = preprocessed_dataset.data_specs
        self.X_space = preprocessed_dataset.X_space
        self.X_topo_space = preprocessed_dataset.X_topo_space
        self.view_converter = preprocessed_dataset.view_converter

        self.y = preprocessed_dataset.y
        self.y_labels = preprocessed_dataset.y_labels

        # Defined up here because PEP8 requires excessive indenting if defined
        # where it is used.
        msg = ("Expected self.y to have dim 2, but it has %d. Maybe you are "
               "loading from an outdated pickle file?")
        if control.get_load_data():
            if start is not None:
                self.X = preprocessed_dataset.X[start:stop, :]
                if self.y is not None:
                    if self.y.ndim != 2:
                        raise ValueError(msg % self.y.ndim)
                    self.y = self.y[start:stop, :]
                assert self.X.shape[0] == stop - start
            else:
                self.X = preprocessed_dataset.X
        else:
            self.X = None
        if self.X is not None:
            if self.y is not None:
                assert self.y.shape[0] == self.X.shape[0]
Exemplo n.º 16
0
def from_dataset(dataset, num_examples):
# This function does not support tags attribute
    try:

        V, y = dataset.get_batch_topo(num_examples, True)

    except:

        if isinstance(dataset, DenseDesignMatrix) and dataset.X is None and not control.get_load_data():
                warnings.warn("from_dataset wasn't able to make subset of dataset, using the whole thing")
                return DenseDesignMatrix(X = None, view_converter = dataset.view_converter)
                #This patches a case where control.get_load_data() is false so dataset.X is None
                #This logic should be removed whenever we implement lazy loading
        raise

    rval =  DenseDesignMatrix(topo_view=V, y=y)
    rval.adjust_for_viewer = dataset.adjust_for_viewer

    return rval
Exemplo n.º 17
0
    def _load_path(self, which_set, which_targets, word2vec_dict={}):
        if which_targets not in ['fine', 'coarse']:
            raise ValueError(
                'Unrecognized which_set value "%s".' % (which_set,) +
                '". Valid values are ["fine","coarse"].')

        if which_set not in ['train', 'test']:
            raise ValueError(
                'Unrecognized which_set value "%s".' % (which_set,) +
                '". Valid values are ["train","test"].')

        if control.get_load_data():
            path = "${PYLEARN2_DATA_PATH}/TREC_question_type_data/"
            if which_set == 'train':
                data_path = path + 'trecqc.train_5500.label.txt'
            else:
                assert which_set == 'test'
                data_path = path + 'trecqc.test_500.label.txt'
            data_path = serial.preprocess(data_path)
        self.path = path
        return data_path
Exemplo n.º 18
0
    def __setstate__(self, d):

        if d["design_loc"] is not None:
            if control.get_load_data():
                d["X"] = N.load(d["design_loc"])
            else:
                d["X"] = None

        if d["compress"]:
            X = d["X"]
            mx = d["compress_max"]
            mn = d["compress_min"]
            del d["compress_max"]
            del d["compress_min"]
            d["X"] = 0
            self.__dict__.update(d)
            if X is not None:
                self.X = N.cast["float32"](X) * mx / 255.0 + mn
            else:
                self.X = None
        else:
            self.__dict__.update(d)
Exemplo n.º 19
0
    def __setstate__(self, d):

        if d['design_loc'] is not None:
            if control.get_load_data():
                d['X'] = N.load(d['design_loc'])
            else:
                d['X'] = None

        if d['compress']:
            X = d['X']
            mx = d['compress_max']
            mn = d['compress_min']
            del d['compress_max']
            del d['compress_min']
            d['X'] = 0
            self.__dict__.update(d)
            if X is not None:
                self.X = N.cast['float32'](X) * mx / 255. + mn
            else:
                self.X = None
        else:
            self.__dict__.update(d)
Exemplo n.º 20
0
    def __setstate__(self, d):

        if d['design_loc'] is not None:
            if control.get_load_data():
                d['X'] = N.load(d['design_loc'])
            else:
                d['X'] = None

        if d['compress']:
            X = d['X']
            mx = d['compress_max']
            mn = d['compress_min']
            del d['compress_max']
            del d['compress_min']
            d['X'] = 0
            self.__dict__.update(d)
            if X is not None:
                self.X = N.cast['float32'](X) * mx / 255. + mn
            else:
                self.X = None
        else:
            self.__dict__.update(d)
    def __init__(self, which_set, start=None, stop=None):

        self.args = locals()

        if which_set not in ['train', 'test']:
            raise ValueError('Unrecognized which_set value "%s".' %
                             (which_set, ) +
                             '". Valid values are ["train","test"].')

        size = TEST_SIZE if which_set == 'test' else TRAIN_SIZE
        if control.get_load_data():
            topo_view, y = read_images(IM_GLOB, size, TEST_TRAIN_RANDOM_SEED,
                                       False if which_set == 'train' else True)
        else:
            topo_view = np.random.rand(size, 32, 32)
            y = np.random.randint(0, 10, (size, 1))

        super(PlanktonDDM, self).__init__(topo_view=topo_view,
                                          y=y,
                                          axes=['b', 0, 1, 'c'],
                                          y_labels=NUM_CLASSES)

        if start is not None:
            assert start >= 0
            if stop > self.X.shape[0]:
                raise ValueError('stop=' + str(stop) + '>' + 'm=' +
                                 str(self.X.shape[0]))
            assert stop > start
            self.X = self.X[start:stop, :]
            if self.X.shape[0] != stop - start:
                raise ValueError("X.shape[0]: %d. start: %d stop: %d" %
                                 (self.X.shape[0], start, stop))
            if len(self.y.shape) > 1:
                self.y = self.y[start:stop, :]
            else:
                self.y = self.y[start:stop]
            assert self.y.shape[0] == stop - start
Exemplo n.º 22
0
    def __init__(
        self,
        which_set,
        center=False,
        shuffle=False,
        one_hot=False,
        binarize=False,
        start=None,
        stop=None,
        axes=["b", 0, 1, "c"],
        preprocessor=None,
        fit_preprocessor=False,
        fit_test_preprocessor=False,
    ):

        self.args = locals()

        if which_set not in ["train", "test"]:
            if which_set == "valid":
                raise ValueError(
                    "There is no such thing as the MNIST validation set. MNIST"
                    "consists of 60,000 train examples and 10,000 test"
                    "examples. If you wish to use a validation set you should"
                    "divide the train set yourself. The pylearn2 dataset"
                    "implements and will only ever implement the standard"
                    "train / test split used in the literature."
                )
            raise ValueError(
                'Unrecognized which_set value "%s".' % (which_set,) + '". Valid values are ["train","test"].'
            )

        def dimshuffle(b01c):
            default = ("b", 0, 1, "c")
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():
            path = "${VIDTIMIT}/data/"
            if which_set == "train":
                im_path = path + "train.npy"
                label_path = path + "train-labels.npy"
            else:
                assert which_set == "test"
                im_path = path + "test.npy"
                label_path = path + "test-labels.npy"
            # Path substitution done here in order to make the lower-level
            # mnist_ubyte.py as stand-alone as possible (for reuse in, e.g.,
            # the Deep Learning Tutorials, or in another package).
            im_path = serial.preprocess(im_path)
            label_path = serial.preprocess(label_path)
            topo_view = np.load(im_path)
            y = np.load(label_path)

            if binarize:
                topo_view = (topo_view > 0.5).astype("float32")

            self.one_hot = one_hot
            if one_hot:
                one_hot = N.zeros((y.shape[0], 36), dtype="float32")
                for i in xrange(y.shape[0]):
                    one_hot[i, y[i]] = 1.0
                y = one_hot
                max_labels = None
            else:
                max_labels = 36

            m, r, c = topo_view.shape
            assert r == 32
            assert c == 32
            topo_view = topo_view.reshape(m, r, c, 1)

            if which_set == "train":
                assert m == 27280
            elif which_set == "test":
                assert m == 10929
            else:
                assert False

            if center:
                topo_view -= topo_view.mean(axis=0)

            if shuffle:
                self.shuffle_rng = make_np_rng(None, [1, 2, 3], which_method="shuffle")
                for i in xrange(topo_view.shape[0]):
                    j = self.shuffle_rng.randint(m)
                    # Copy ensures that memory is not aliased.
                    tmp = topo_view[i, :, :, :].copy()
                    topo_view[i, :, :, :] = topo_view[j, :, :, :]
                    topo_view[j, :, :, :] = tmp
                    # Note: slicing with i:i+1 works for one_hot=True/False
                    tmp = y[i : i + 1].copy()
                    y[i] = y[j]
                    y[j] = tmp

            super(VidTIMIT, self).__init__(topo_view=dimshuffle(topo_view), y=y, axes=axes, max_labels=max_labels)

            assert not N.any(N.isnan(self.X))

            if start is not None:
                assert start >= 0
                if stop > self.X.shape[0]:
                    raise ValueError("stop=" + str(stop) + ">" + "m=" + str(self.X.shape[0]))
                assert stop > start
                self.X = self.X[start:stop, :]
                if self.X.shape[0] != stop - start:
                    raise ValueError("X.shape[0]: %d. start: %d stop: %d" % (self.X.shape[0], start, stop))
                if len(self.y.shape) > 1:
                    self.y = self.y[start:stop, :]
                else:
                    self.y = self.y[start:stop]
                assert self.y.shape[0] == stop - start
        else:
            # data loading is disabled, just make something that defines the
            # right topology
            topo = dimshuffle(np.zeros((1, 32, 32, 1)))
            super(VidTIMIT, self).__init__(topo_view=topo, axes=axes)
            self.X = None

        if which_set == "test":
            assert fit_test_preprocessor is None or (fit_preprocessor == fit_test_preprocessor)

        if self.X is not None and preprocessor:
            preprocessor.apply(self, fit_preprocessor)
Exemplo n.º 23
0
    def __init__(self, which_set, center=False, shuffle=False, one_hot=False, binarize=False, start=None, stop=None):

        self.args = locals()

        if which_set not in ["train", "test"]:
            if which_set == "valid":
                raise ValueError(
                    "There is no such thing as the MNIST "
                    "validation set. MNIST consists of 60,000 train examples and 10,000 test"
                    " examples. If you wish to use a validation set you should divide the train "
                    "set yourself. The pylearn2 dataset implements and will only ever implement "
                    "the standard train / test split used in the literature."
                )
            raise ValueError(
                'Unrecognized which_set value "%s".' % (which_set,) + '". Valid values are ["train","test"].'
            )

        if control.get_load_data():
            path = "${PYLEARN2_DATA_PATH}/mnist/"
            if which_set == "train":
                im_path = path + "train-images-idx3-ubyte"
                label_path = path + "train-labels-idx1-ubyte"
            else:
                assert which_set == "test"
                im_path = path + "t10k-images-idx3-ubyte"
                label_path = path + "t10k-labels-idx1-ubyte"
            # Path substitution done here in order to make the lower-level
            # mnist_ubyte.py as stand-alone as possible (for reuse in, e.g.,
            # the Deep Learning Tutorials, or in another package).
            im_path = serial.preprocess(im_path)
            label_path = serial.preprocess(label_path)
            topo_view = read_mnist_images(im_path, dtype="float32")
            y = read_mnist_labels(label_path)

            if binarize:
                topo_view = (topo_view > 0.5).astype("float32")

            self.one_hot = one_hot
            if one_hot:
                one_hot = N.zeros((y.shape[0], 10), dtype="float32")
                for i in xrange(y.shape[0]):
                    one_hot[i, y[i]] = 1.0
                y = one_hot

            m, r, c = topo_view.shape
            assert r == 28
            assert c == 28
            topo_view = topo_view.reshape(m, r, c, 1)

            if which_set == "train":
                assert m == 60000
            elif which_set == "test":
                assert m == 10000
            else:
                assert False

            if center:
                topo_view -= topo_view.mean(axis=0)

            if shuffle:
                self.shuffle_rng = np.random.RandomState([1, 2, 3])
                for i in xrange(topo_view.shape[0]):
                    j = self.shuffle_rng.randint(m)
                    # Copy ensures that memory is not aliased.
                    tmp = topo_view[i, :, :, :].copy()
                    topo_view[i, :, :, :] = topo_view[j, :, :, :]
                    topo_view[j, :, :, :] = tmp
                    # Note: slicing with i:i+1 works for both one_hot=True/False.
                    tmp = y[i : i + 1].copy()
                    y[i] = y[j]
                    y[j] = tmp

            view_converter = dense_design_matrix.DefaultViewConverter((28, 28, 1))

            super(MNIST, self).__init__(topo_view=topo_view, y=y)

            assert not N.any(N.isnan(self.X))

            if start is not None:
                assert start >= 0
                if stop > self.X.shape[0]:
                    raise ValueError("stop=" + str(stop) + ">" + "m=" + str(self.X.shape[0]))
                assert stop > start
                self.X = self.X[start:stop, :]
                if self.X.shape[0] != stop - start:
                    raise ValueError("X.shape[0]: %d. start: %d stop: %d" % (self.X.shape[0], start, stop))
                if len(self.y.shape) > 1:
                    self.y = self.y[start:stop, :]
                else:
                    self.y = self.y[start:stop]
                assert self.y.shape[0] == stop - start
        else:
            # data loading is disabled, just make something that defines the right topology
            topo = np.zeros((1, 28, 28, 1))
            super(MNIST, self).__init__(topo_view=topo)
            self.X = None
Exemplo n.º 24
0
    def __init__(self, which_set, center=False, shuffle=False,
                 binarize=False, start=None, stop=None,
                 axes=['b', 0, 1, 'c'],
                 preprocessor=None,
                 fit_preprocessor=False,
                 fit_test_preprocessor=False):
        self.args = locals()

        if which_set not in ['train', 'test']:
            if which_set == 'valid':
                raise ValueError(
                    "There is no such thing as the MNIST validation set. MNIST"
                    "consists of 60,000 train examples and 10,000 test"
                    "examples. If you wish to use a validation set you should"
                    "divide the train set yourself. The pylearn2 dataset"
                    "implements and will only ever implement the standard"
                    "train / test split used in the literature.")
            raise ValueError(
                'Unrecognized which_set value "%s".' % (which_set,) +
                '". Valid values are ["train","test"].')

        def dimshuffle(b01c):
            """
            .. todo::

                WRITEME
            """
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():
            path = "${PYLEARN2_DATA_PATH}/sign24/"
            if which_set == 'train':
                im_path = path + 'train-images-idx3-ubyte'
                label_path = path + 'train-labels-idx1-ubyte'
            else:
                assert which_set == 'test'
                im_path = path + 't10k-images-idx3-ubyte'
                label_path = path + 't10k-labels-idx1-ubyte'
            # Path substitution done here in order to make the lower-level
            # mnist_ubyte.py as stand-alone as possible (for reuse in, e.g.,
            # the Deep Learning Tutorials, or in another package).
            im_path = serial.preprocess(im_path)
            label_path = serial.preprocess(label_path)

            # Locally cache the files before reading them
            datasetCache = cache.datasetCache
            im_path = datasetCache.cache_file(im_path)
            label_path = datasetCache.cache_file(label_path)

            topo_view = read_mnist_images(im_path, dtype='float32')
            y = np.atleast_2d(read_mnist_labels(label_path)).T
        else:
            if which_set == 'train':
                size = 15
            elif which_set == 'test':
                size = 5
            else:
                raise ValueError(
                    'Unrecognized which_set value "%s".' % (which_set,) +
                    '". Valid values are ["train","test"].')
            topo_view = np.random.rand(size, 28, 28)
            y = np.random.randint(0, 10, (size, 1))

        if binarize:
            topo_view = (topo_view > 0.5).astype('float32')

        y_labels = 24

        m, r, c = topo_view.shape
        assert r == 28
        assert c == 28
        topo_view = topo_view.reshape(m, r, c, 1)

        if which_set == 'train':
            assert m == 3576
        elif which_set == 'test':
            assert m == 1176
        else:
            assert False

        if center:
            topo_view -= topo_view.mean(axis=0)

        if shuffle:
            self.shuffle_rng = make_np_rng(
                None, [1, 2, 3], which_method="shuffle")
            for i in xrange(topo_view.shape[0]):
                j = self.shuffle_rng.randint(m)
                # Copy ensures that memory is not aliased.
                tmp = topo_view[i, :, :, :].copy()
                topo_view[i, :, :, :] = topo_view[j, :, :, :]
                topo_view[j, :, :, :] = tmp

                tmp = y[i:i + 1].copy()
                y[i] = y[j]
                y[j] = tmp

        super(MNIST, self).__init__(topo_view=dimshuffle(topo_view), y=y,
                                    axes=axes, y_labels=y_labels)

        assert not N.any(N.isnan(self.X))

        if start is not None:
            assert start >= 0
            if stop > self.X.shape[0]:
                raise ValueError('stop=' + str(stop) + '>' +
                                 'm=' + str(self.X.shape[0]))
            assert stop > start
            self.X = self.X[start:stop, :]
            if self.X.shape[0] != stop - start:
                raise ValueError("X.shape[0]: %d. start: %d stop: %d"
                                 % (self.X.shape[0], start, stop))
            if len(self.y.shape) > 1:
                self.y = self.y[start:stop, :]
            else:
                self.y = self.y[start:stop]
            assert self.y.shape[0] == stop - start

        if which_set == 'test':
            assert fit_test_preprocessor is None or \
                (fit_preprocessor == fit_test_preprocessor)

        if self.X is not None and preprocessor:
            preprocessor.apply(self, fit_preprocessor)
Exemplo n.º 25
0
    def __init__(self, which_set, center=False, shuffle=False,
                 one_hot=None, binarize=False, start=None,
                 stop=None, axes=['b', 0, 1, 'c'],
                 preprocessor=None,
                 fit_preprocessor=False,
                 fit_test_preprocessor=False):
        self.args = locals()

        if which_set not in ['train', 'test']:
            if which_set == 'valid':
                raise ValueError(
                    "There is no such thing as the MNIST validation set. MNIST"
                    "consists of 60,000 train examples and 10,000 test"
                    "examples. If you wish to use a validation set you should"
                    "divide the train set yourself. The pylearn2 dataset"
                    "implements and will only ever implement the standard"
                    "train / test split used in the literature.")
            raise ValueError(
                'Unrecognized which_set value "%s".' % (which_set,) +
                '". Valid values are ["train","test"].')

        def dimshuffle(b01c):
            """
            .. todo::

                WRITEME
            """
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():
            path = "${PYLEARN2_DATA_PATH}/mnist/"
            if which_set == 'train':
                im_path = path + 'train-images-idx3-ubyte'
                label_path = path + 'train-labels-idx1-ubyte'
            else:
                assert which_set == 'test'
                im_path = path + 't10k-images-idx3-ubyte'
                label_path = path + 't10k-labels-idx1-ubyte'
            # Path substitution done here in order to make the lower-level
            # mnist_ubyte.py as stand-alone as possible (for reuse in, e.g.,
            # the Deep Learning Tutorials, or in another package).
            im_path = serial.preprocess(im_path)
            label_path = serial.preprocess(label_path)

            # Locally cache the files before reading them
            datasetCache = cache.datasetCache
            im_path = datasetCache.cache_file(im_path)
            label_path = datasetCache.cache_file(label_path)

            topo_view = read_mnist_images(im_path, dtype='float32')
            y = np.atleast_2d(read_mnist_labels(label_path)).T
        else:
            if which_set == 'train':
                size = 60000
            elif which_set == 'test':
                size = 10000
            else:
                raise ValueError(
                    'Unrecognized which_set value "%s".' % (which_set,) +
                    '". Valid values are ["train","test"].')
            topo_view = np.random.rand(size, 28, 28)
            y = np.random.randint(0, 10, (size, 1))

        if binarize:
            topo_view = (topo_view > 0.5).astype('float32')

        max_labels = 10
        if one_hot is not None:
            warnings.warn("the `one_hot` parameter is deprecated. To get "
                          "one-hot encoded targets, request that they "
                          "live in `VectorSpace` through the `data_specs` "
                          "parameter of MNIST's iterator method. "
                          "`one_hot` will be removed on or after "
                          "September 20, 2014.", stacklevel=2)

        m, r, c = topo_view.shape
        assert r == 28
        assert c == 28
        topo_view = topo_view.reshape(m, r, c, 1)

        if which_set == 'train':
            assert m == 60000
        elif which_set == 'test':
            assert m == 10000
        else:
            assert False

        if center:
            topo_view -= topo_view.mean(axis=0)

        if shuffle:
            self.shuffle_rng = make_np_rng(None, [1, 2, 3], which_method="shuffle")
            for i in xrange(topo_view.shape[0]):
                j = self.shuffle_rng.randint(m)
                # Copy ensures that memory is not aliased.
                tmp = topo_view[i, :, :, :].copy()
                topo_view[i, :, :, :] = topo_view[j, :, :, :]
                topo_view[j, :, :, :] = tmp
                # Note: slicing with i:i+1 works for one_hot=True/False
                tmp = y[i:i+1].copy()
                y[i] = y[j]
                y[j] = tmp

        super(MNIST, self).__init__(topo_view=dimshuffle(topo_view), y=y,
                                    axes=axes, y_labels=max_labels)

        assert not N.any(N.isnan(self.X))

        if start is not None:
            assert start >= 0
            if stop > self.X.shape[0]:
                raise ValueError('stop=' + str(stop) + '>' +
                                 'm=' + str(self.X.shape[0]))
            assert stop > start
            self.X = self.X[start:stop, :]
            if self.X.shape[0] != stop - start:
                raise ValueError("X.shape[0]: %d. start: %d stop: %d"
                                 % (self.X.shape[0], start, stop))
            if len(self.y.shape) > 1:
                self.y = self.y[start:stop, :]
            else:
                self.y = self.y[start:stop]
            assert self.y.shape[0] == stop - start

        if which_set == 'test':
            assert fit_test_preprocessor is None or \
                (fit_preprocessor == fit_test_preprocessor)

        if self.X is not None and preprocessor:
            preprocessor.apply(self, fit_preprocessor)
Exemplo n.º 26
0
    def __init__(self,
                 which_set,
                 start=None,
                 stop=None,
                 axes=['b', 0, 1, 'c']):
        self.args = locals()

        def dimshuffle(b01c):
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():
            path = "/local/Pylearn2/data/ADNI/"

            im_path = path + 'ADNI_X_down'
            label_path = path + 'ADNI_y_down'
            #im_path = serial.preprocess(im_path)
            #label_path = serial.preprocess(label_path)

            #Locally cache the files before reading them
            datasetCache = cache.datasetCache
            im_path = datasetCache.cache_file(im_path)
            label_path = datasetCache.cache_file(label_path)

            with open_if_filename(im_path, 'rb') as f:
                #magic, number, rows, cols = struct.unpack('>iiii', f.read(16))
                #im_array = N.fromfile(f, dtype='float64')
                im_array = pickle.load(f)

            with open_if_filename(label_path, 'rb') as f:
                #label_array = N.fromfile(f, dtype='uint8')
                label_array = pickle.load(f)

            topo_view = im_array
            y = label_array
        else:

            if which_set == 'train':
                size = 397
            else:
                size = 50
            topo_view = np.random.rand(size, 64, 64, 41)
            y = np.random.randint(0, 3, (size, 1))

        y_labels = 3

        m, r, c, d = topo_view.shape
        assert r == 64
        assert c == 64
        topo_view = topo_view.reshape(m, r, c, 41)

        if which_set == 'train':
            assert m == 397
        else:
            assert m == 50

        super(MRI, self).__init__(topo_view=dimshuffle(topo_view),
                                  y=y,
                                  axes=axes,
                                  y_labels=y_labels)

        assert not N.any(N.isnan(self.X))

        if start is not None:
            assert start >= 0
            #if stop > self.X.shape[0]:
            #raise ValueError('stop=' + str(stop) + '>' + 'm=' + str(self.X.shape=[0]))

            assert stop > start
            self.X = self.X[start:stop, :]
            if self.X.shape[0] != stop - start:
                raise ValueError("X.shape[0]: %d. start: %d stop: %d" %
                                 (self.X.shape[0], start, stop))
            if len(self.y.shape) > 1:
                self.y = self.y[start:stop, :]
            else:
                self.y = self.y[start:stop]
            assert self.y.shape[0] == stop - start
Exemplo n.º 27
0
    def __init__(self,
                 which_set,
                 center=False,
                 shuffle=False,
                 one_hot=False,
                 binarize=False,
                 start=None,
                 stop=None,
                 axes=['b', 0, 1, 'c'],
                 preprocessor=None,
                 fit_preprocessor=False,
                 fit_test_preprocessor=False):

        self.args = locals()

        if which_set not in ['train', 'test']:
            if which_set == 'valid':
                raise ValueError(
                    "There is no such thing as the MNIST validation set. MNIST"
                    "consists of 60,000 train examples and 10,000 test"
                    "examples. If you wish to use a validation set you should"
                    "divide the train set yourself. The pylearn2 dataset"
                    "implements and will only ever implement the standard"
                    "train / test split used in the literature.")
            raise ValueError('Unrecognized which_set value "%s".' %
                             (which_set, ) +
                             '". Valid values are ["train","test"].')

        def dimshuffle(b01c):
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():
            path = "${PYLEARN2_DATA_PATH}/mnist/"
            if which_set == 'train':
                im_path = path + 'train-images-idx3-ubyte'
                label_path = path + 'train-labels-idx1-ubyte'
            else:
                assert which_set == 'test'
                im_path = path + 't10k-images-idx3-ubyte'
                label_path = path + 't10k-labels-idx1-ubyte'
            # Path substitution done here in order to make the lower-level
            # mnist_ubyte.py as stand-alone as possible (for reuse in, e.g.,
            # the Deep Learning Tutorials, or in another package).
            im_path = serial.preprocess(im_path)
            label_path = serial.preprocess(label_path)
            topo_view = read_mnist_images(im_path, dtype='float32')
            y = read_mnist_labels(label_path)

            if binarize:
                topo_view = (topo_view > 0.5).astype('float32')

            self.one_hot = one_hot
            if one_hot:
                one_hot = N.zeros((y.shape[0], 10), dtype='float32')
                for i in xrange(y.shape[0]):
                    one_hot[i, y[i]] = 1.
                y = one_hot
                max_labels = None
            else:
                max_labels = 10

            m, r, c = topo_view.shape
            assert r == 28
            assert c == 28
            topo_view = topo_view.reshape(m, r, c, 1)

            if which_set == 'train':
                assert m == 60000
            elif which_set == 'test':
                assert m == 10000
            else:
                assert False

            if center:
                topo_view -= topo_view.mean(axis=0)

            if shuffle:
                self.shuffle_rng = make_np_rng(None, [1, 2, 3],
                                               which_method="shuffle")
                for i in xrange(topo_view.shape[0]):
                    j = self.shuffle_rng.randint(m)
                    # Copy ensures that memory is not aliased.
                    tmp = topo_view[i, :, :, :].copy()
                    topo_view[i, :, :, :] = topo_view[j, :, :, :]
                    topo_view[j, :, :, :] = tmp
                    # Note: slicing with i:i+1 works for one_hot=True/False
                    tmp = y[i:i + 1].copy()
                    y[i] = y[j]
                    y[j] = tmp

            super(MNIST, self).__init__(topo_view=dimshuffle(topo_view),
                                        y=y,
                                        axes=axes,
                                        max_labels=max_labels)

            assert not N.any(N.isnan(self.X))

            if start is not None:
                assert start >= 0
                if stop > self.X.shape[0]:
                    raise ValueError('stop=' + str(stop) + '>' + 'm=' +
                                     str(self.X.shape[0]))
                assert stop > start
                self.X = self.X[start:stop, :]
                if self.X.shape[0] != stop - start:
                    raise ValueError("X.shape[0]: %d. start: %d stop: %d" %
                                     (self.X.shape[0], start, stop))
                if len(self.y.shape) > 1:
                    self.y = self.y[start:stop, :]
                else:
                    self.y = self.y[start:stop]
                assert self.y.shape[0] == stop - start
        else:
            # data loading is disabled, just make something that defines the
            # right topology
            topo = dimshuffle(np.zeros((1, 28, 28, 1)))
            super(MNIST, self).__init__(topo_view=topo, axes=axes)
            self.X = None

        if which_set == 'test':
            assert fit_test_preprocessor is None or \
                (fit_preprocessor == fit_test_preprocessor)

        if self.X is not None and preprocessor:
            preprocessor.apply(self, fit_preprocessor)
Exemplo n.º 28
0
    def __init__(self,
                 preprocessed_dataset,
                 preprocessor,
                 convert_to_one_hot=None,
                 start=None,
                 stop=None,
                 axes=['b', 0, 1, 'c']):
        """
        .. todo::

            WRITEME
        """
        self.args = locals()

        self.preprocessed_dataset = preprocessed_dataset
        self.preprocessor = preprocessor
        self.rng = self.preprocessed_dataset.rng
        self.data_specs = preprocessed_dataset.data_specs
        self.X_space = preprocessed_dataset.X_space
        self.X_topo_space = preprocessed_dataset.X_topo_space
        self.view_converter = preprocessed_dataset.view_converter

        self.y = preprocessed_dataset.y
        self.y_labels = preprocessed_dataset.y_labels

        if convert_to_one_hot is not None:
            warnings.warn(
                "the `convert_to_one_hot` parameter is deprecated. To get "
                "one-hot encoded targets, request that they "
                "live in `VectorSpace` through the `data_specs` "
                "parameter of dataset iterator method. "
                "`convert_to_one_hot` will be removed on or after "
                "September 20, 2014.",
                stacklevel=2)

        if control.get_load_data():
            if start is not None:
                self.X = preprocessed_dataset.X[start:stop, :]
                if self.y is not None:
                    self.y = self.y[start:stop, :]
                assert self.X.shape[0] == stop - start
            else:
                self.X = preprocessed_dataset.X
        else:
            self.X = None
        if self.X is not None:
            if self.y is not None:
                assert self.y.shape[0] == self.X.shape[0]

        #self.mn = self.X.min()
        #self.mx = self.X.max()

        if getattr(preprocessor, "inv_P_", None) is None:
            warnings.warn("ZCA preprocessor.inv_P_ was none. Computing "
                          "inverse of preprocessor.P_ now. This will take "
                          "some time. For efficiency, it is recommended that "
                          "in the future you compute the inverse in ZCA.fit() "
                          "instead, by passing it compute_inverse=True.")
            logger.info('inverting...')
            preprocessor.inv_P_ = np.linalg.inv(preprocessor.P_)
            logger.info('...done inverting')

        self.view_converter.set_axes(axes)
Exemplo n.º 29
0
    def __init__(self,
                 which_set,
                 center=False,
                 shuffle=False,
                 one_hot=False,
                 binarize=False,
                 start=None,
                 stop=None):

        self.args = locals()

        if which_set not in ['train', 'test']:
            if which_set == 'valid':
                raise ValueError(
                    "There is no such thing as the MNIST "
                    "validation set. MNIST consists of 60,000 train examples and 10,000 test"
                    " examples. If you wish to use a validation set you should divide the train "
                    "set yourself. The pylearn2 dataset implements and will only ever implement "
                    "the standard train / test split used in the literature.")
            raise ValueError('Unrecognized which_set value "%s".' %
                             (which_set, ) +
                             '". Valid values are ["train","test"].')

        if control.get_load_data():
            path = "${PYLEARN2_DATA_PATH}/mnist/"
            if which_set == 'train':
                im_path = path + 'train-images-idx3-ubyte'
                label_path = path + 'train-labels-idx1-ubyte'
            else:
                assert which_set == 'test'
                im_path = path + 't10k-images-idx3-ubyte'
                label_path = path + 't10k-labels-idx1-ubyte'

            topo_view = read_mnist_images(im_path, dtype='float32')
            y = read_mnist_labels(label_path)

            if binarize:
                topo_view = (topo_view > 0.5).astype('float32')

            self.one_hot = one_hot
            if one_hot:
                one_hot = N.zeros((y.shape[0], 10), dtype='float32')
                for i in xrange(y.shape[0]):
                    one_hot[i, y[i]] = 1.
                y = one_hot

            m, r, c = topo_view.shape
            assert r == 28
            assert c == 28
            topo_view = topo_view.reshape(m, r, c, 1)

            if which_set == 'train':
                assert m == 60000
            elif which_set == 'test':
                assert m == 10000
            else:
                assert False

            if center:
                topo_view -= topo_view.mean(axis=0)

            if shuffle:
                self.shuffle_rng = np.random.RandomState([1, 2, 3])
                for i in xrange(topo_view.shape[0]):
                    j = self.shuffle_rng.randint(m)
                    # Copy ensures that memory is not aliased.
                    tmp = topo_view[i, :, :, :].copy()
                    topo_view[i, :, :, :] = topo_view[j, :, :, :]
                    topo_view[j, :, :, :] = tmp
                    # Note: slicing with i:i+1 works for both one_hot=True/False.
                    tmp = y[i:i + 1].copy()
                    y[i] = y[j]
                    y[j] = tmp

            view_converter = dense_design_matrix.DefaultViewConverter(
                (28, 28, 1))

            super(MNIST, self).__init__(topo_view=topo_view, y=y)

            assert not N.any(N.isnan(self.X))

            if start is not None:
                assert start >= 0
                if stop > self.X.shape[0]:
                    raise ValueError('stop=' + str(stop) + '>' + 'm=' +
                                     str(self.X.shape[0]))
                assert stop > start
                self.X = self.X[start:stop, :]
                if self.X.shape[0] != stop - start:
                    raise ValueError("X.shape[0]: %d. start: %d stop: %d" %
                                     (self.X.shape[0], start, stop))
                if len(self.y.shape) > 1:
                    self.y = self.y[start:stop, :]
                else:
                    self.y = self.y[start:stop]
                assert self.y.shape[0] == stop - start
        else:
            #data loading is disabled, just make something that defines the right topology
            topo = np.zeros((1, 28, 28, 1))
            super(MNIST, self).__init__(topo_view=topo)
            self.X = None
Exemplo n.º 30
0
    def __setstate__(self, d):

        if d['design_loc'] is not None:
            if control.get_load_data():
                d['X'] = N.load(d['design_loc'])
            else:
                d['X'] = None

        if d['compress']:
            X = d['X']
            mx = d['compress_max']
            mn = d['compress_min']
            del d['compress_max']
            del d['compress_min']
            d['X'] = 0
            self.__dict__.update(d)
            if X is not None:
                self.X = N.cast['float32'](X) * mx / 255. + mn
            else:
                self.X = None
        else:
            self.__dict__.update(d)

        # To be able to unpickle older data after the addition of
        # the data_specs mechanism
        if not all(m in d for m in ('data_specs', 'X_space',
                                    '_iter_data_specs', 'X_topo_space')):
            X_space = VectorSpace(dim=self.X.shape[1])
            X_source = 'features'
            if self.y is None:
                space = X_space
                source = X_source
            else:
                y_space = VectorSpace(dim=self.y.shape[-1])
                y_source = 'targets'

                space = CompositeSpace((X_space, y_space))
                source = (X_source, y_source)

            self.data_specs = (space, source)
            self.X_space = X_space
            self._iter_data_specs = (X_space, X_source)

            view_converter = d.get('view_converter', None)
            if view_converter is not None:
                # Build a Conv2DSpace from the view_converter
                if not (isinstance(view_converter, DefaultViewConverter)
                        and len(view_converter.shape) == 3):
                    raise NotImplementedError(
                            "Not able to build a Conv2DSpace "
                            "corresponding to this converter: %s"
                            % view_converter)

                axes = view_converter.axes
                rows, cols, channels = view_converter.shape

                # self.X_topo_space stores a "default" topological space that
                # will be used only when self.iterator is called without a
                # data_specs, and with "topo=True", which is deprecated.
                self.X_topo_space = Conv2DSpace(
                        shape=(rows, cols), num_channels=channels, axes=axes)
Exemplo n.º 31
0
    def __init__(self, which_set, shuffle=False,
                 start=None, stop=None, axes=['b', 0, 1, 'c'],
                 preprocessor=None, fit_preprocessor=False,
                 fit_test_preprocessor=False):
        self.args = locals()

        if which_set not in ['train', 'valid', 'test']:
            raise ValueError('Unrecognized which_set value "%s".' %
                             (which_set,) + '". Valid values are ' +
                             '["train", "valid", "test"].')

        def dimshuffle(b01c):
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():
            path = "${PYLEARN2_DATA_PATH}/binarized_mnist/binarized_mnist_" + \
                   which_set + ".npy"
            im_path = serial.preprocess(path)

            # Locally cache the files before reading them
            datasetCache = cache.datasetCache
            im_path = datasetCache.cache_file(im_path)

            try:
                X = serial.load(im_path)
            except IOError:
                raise NotInstalledError("BinarizedMNIST data files cannot be "
                                        "found in ${PYLEARN2_DATA_PATH}. Run "
                                        "pylearn2/scripts/datasets/"
                                        "download_binarized_mnist.py to get "
                                        "the data")
        else:
            if which_set == 'train':
                size = 50000
            else:
                size = 10000
            X = numpy.random.binomial(n=1, p=0.5, size=(size, 28 ** 2))

        m, d = X.shape
        assert d == 28 ** 2
        if which_set == 'train':
            assert m == 50000
        else:
            assert m == 10000

        if shuffle:
            self.shuffle_rng = make_np_rng(None, [1, 2, 3],
                                           which_method="shuffle")
            for i in xrange(X.shape[0]):
                j = self.shuffle_rng.randint(m)
                # Copy ensures that memory is not aliased.
                tmp = X[i, :].copy()
                X[i, :] = X[j, :]
                X[j, :] = tmp

        super(BinarizedMNIST, self).__init__(
            X=X,
            view_converter=DefaultViewConverter(shape=(28, 28, 1))
        )

        assert not numpy.any(numpy.isnan(self.X))

        if start is not None:
            assert start >= 0
            if stop > self.X.shape[0]:
                raise ValueError('stop=' + str(stop) + '>' +
                                 'm=' + str(self.X.shape[0]))
            assert stop > start
            self.X = self.X[start:stop, :]
            if self.X.shape[0] != stop - start:
                raise ValueError("X.shape[0]: %d. start: %d stop: %d"
                                 % (self.X.shape[0], start, stop))

        if which_set == 'test':
            assert fit_test_preprocessor is None or \
                (fit_preprocessor == fit_test_preprocessor)

        if self.X is not None and preprocessor:
            preprocessor.apply(self, fit_preprocessor)
Exemplo n.º 32
0
    def __init__(self, which_set, center = False, shuffle = False,
            one_hot = False, binarize = False):

        if which_set not in ['train','test']:
            if which_set == 'valid':
                raise ValueError("There is no such thing as the MNIST "
"validation set. MNIST consists of 60,000 train examples and 10,000 test"
" examples. If you wish to use a validation set you should divide the train "
"set yourself. The pylearn2 dataset implements and will only ever implement "
"the standard train / test split used in the literature.")
            raise ValueError('Unrecognized which_set value "%s".' %
                    (which_set,)+'". Valid values are ["train","test"].')


        if control.get_load_data():
            path = "${PYLEARN2_DATA_PATH}/mnist/"
            if which_set == 'train':
                im_path = path + 'train-images-idx3-ubyte'
                label_path = path + 'train-labels-idx1-ubyte'
            else:
                assert which_set == 'test'
                im_path = path + 't10k-images-idx3-ubyte'
                label_path = path + 't10k-labels-idx1-ubyte'

            topo_view = read_mnist_images(im_path, dtype='float32')
            y = read_mnist_labels(label_path)

            if binarize:
                topo_view = ( topo_view > 0.5).astype('float32')

            self.one_hot = one_hot
            if one_hot:
                one_hot = N.zeros((y.shape[0],10),dtype='float32')
                for i in xrange(y.shape[0]):
                    one_hot[i,y[i]] = 1.
                y = one_hot

            m, r, c = topo_view.shape
            assert r == 28
            assert c == 28
            topo_view = topo_view.reshape(m,r,c,1)

            if which_set == 'train':
                assert m == 60000
            elif which_set == 'test':
                assert m == 10000
            else:
                assert False


            if center:
                topo_view -= topo_view.mean(axis=0)

            if shuffle:
                self.shuffle_rng = np.random.RandomState([1,2,3])
                for i in xrange(topo_view.shape[0]):
                    j = self.shuffle_rng.randint(m)
                    tmp = topo_view[i,:,:,:]
                    topo_view[i,:,:,:] = topo_view[j,:,:,:]
                    topo_view[j,:,:,:] = tmp
                    tmp = y[i]
                    y[i] = y[j]
                    y[j] = tmp

            view_converter = dense_design_matrix.DefaultViewConverter((28,28,1))

            super(MNIST,self).__init__(topo_view = topo_view , y = y)

            assert not N.any(N.isnan(self.X))
        else:
            #data loading is disabled, just make something that defines the right topology
            topo = np.zeros((1,28,28,1))
            super(MNIST,self).__init__(topo_view = topo)
            self.X = None
Exemplo n.º 33
0
    def __init__(self,
                 preprocessed_dataset,
                 preprocessor,
                 convert_to_one_hot=None,
                 start=None,
                 stop=None,
                 axes=['b', 0, 1, 'c']):
        """
        .. todo::

            WRITEME
        """
        self.args = locals()

        self.preprocessed_dataset = preprocessed_dataset
        self.preprocessor = preprocessor
        self.rng = self.preprocessed_dataset.rng
        self.data_specs = preprocessed_dataset.data_specs
        self.X_space = preprocessed_dataset.X_space
        self.X_topo_space = preprocessed_dataset.X_topo_space
        self.view_converter = preprocessed_dataset.view_converter

        self.y = preprocessed_dataset.y
        self.y_labels = preprocessed_dataset.y_labels

        if convert_to_one_hot is not None:
            warnings.warn("the `convert_to_one_hot` parameter is deprecated. To get "
                          "one-hot encoded targets, request that they "
                          "live in `VectorSpace` through the `data_specs` "
                          "parameter of dataset iterator method. "
                          "`convert_to_one_hot` will be removed on or after "
                          "September 20, 2014.", stacklevel=2)

        if control.get_load_data():
            if start is not None:
                self.X = preprocessed_dataset.X[start:stop, :]
                if self.y is not None:
                    self.y = self.y[start:stop, :]
                assert self.X.shape[0] == stop-start
            else:
                self.X = preprocessed_dataset.X
        else:
            self.X = None
        if self.X is not None:
            if self.y is not None:
                assert self.y.shape[0] == self.X.shape[0]

        #self.mn = self.X.min()
        #self.mx = self.X.max()

        if getattr(preprocessor, "inv_P_", None) is None:
            warnings.warn("ZCA preprocessor.inv_P_ was none. Computing "
                          "inverse of preprocessor.P_ now. This will take "
                          "some time. For efficiency, it is recommended that "
                          "in the future you compute the inverse in ZCA.fit() "
                          "instead, by passing it compute_inverse=True.")
            logger.info('inverting...')
            preprocessor.inv_P_ = np.linalg.inv(preprocessor.P_)
            logger.info('...done inverting')

        self.view_converter.set_axes(axes)
Exemplo n.º 34
0
    def __setstate__(self, d):

        if d['design_loc'] is not None:
            if control.get_load_data():
                d['X'] = N.load(d['design_loc'])
            else:
                d['X'] = None

        if d['compress']:
            X = d['X']
            mx = d['compress_max']
            mn = d['compress_min']
            del d['compress_max']
            del d['compress_min']
            d['X'] = 0
            self.__dict__.update(d)
            if X is not None:
                self.X = N.cast['float32'](X) * mx / 255. + mn
            else:
                self.X = None
        else:
            self.__dict__.update(d)

        # To be able to unpickle older data after the addition of
        # the data_specs mechanism
        if not all(m in d for m in ('data_specs', 'X_space',
                                    '_iter_data_specs', 'X_topo_space')):
            X_space = VectorSpace(dim=self.X.shape[1])
            X_source = 'features'
            if self.y is None:
                space = X_space
                source = X_source
            else:
                y_space = VectorSpace(dim=self.y.shape[-1])
                y_source = 'targets'

                space = CompositeSpace((X_space, y_space))
                source = (X_source, y_source)

            self.data_specs = (space, source)
            self.X_space = X_space
            self._iter_data_specs = (X_space, X_source)

            view_converter = d.get('view_converter', None)
            if view_converter is not None:
                # Build a Conv2DSpace from the view_converter
                if not (isinstance(view_converter, DefaultViewConverter)
                        and len(view_converter.shape) == 3):
                    raise NotImplementedError(
                        "Not able to build a Conv2DSpace "
                        "corresponding to this converter: %s" % view_converter)

                axes = view_converter.axes
                rows, cols, channels = view_converter.shape

                # self.X_topo_space stores a "default" topological space that
                # will be used only when self.iterator is called without a
                # data_specs, and with "topo=True", which is deprecated.
                self.X_topo_space = Conv2DSpace(shape=(rows, cols),
                                                num_channels=channels,
                                                axes=axes)
Exemplo n.º 35
0
    def __init__(self, which_set, imgd=65, zd=1, ds=1, center = False, shuffle = False,
            one_hot = False, binarize = False, start = None,
            stop = None, axes=['b', 0, 1, 'c'],
            preprocessor = None,
            fit_preprocessor = False,
            fit_test_preprocessor = False):

        self.args = locals()

        if which_set not in ['train','valid','test']:
            raise ValueError('Unrecognized which_set value "%s".' %
                    (which_set,)+'". Valid values are ["train","valid",test"].')

        def dimshuffle(b01c):
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():

            path = "${PYLEARN2_DATA_PATH}/lgn/"
            path = path + "LGN1_MembraneSamples_65x65x1_mp0.50_train50000_valid10000_test10000_seed11.pkl.gz"
            path = serial.preprocess(path)

            f = gzip.open(path, 'rb')
            train_set, valid_set, test_set = cPickle.load(f)
            f.close()

            if which_set == 'train':
                data = train_set
            elif which_set == 'valid':
                data = valid_set
            else:
                data = test_set

            input_shape = (imgd, imgd, zd)

            # f = h5py.file(path, 'r')
            # input_shape = f['input_shape'][...]

            # if which_set == 'train':
            #     data = f['/train_set'][...]
            # elif which_set == 'valid':
            #     data = f['/valid_set'][...]
            # else:
            #     data = f['/test_set'][...]

            # Convert images to float 0-1
            topo_view = data[0].astype(np.float32) / 255.0
            y = data[1]

            self.one_hot = one_hot
            if one_hot:
                one_hot = N.zeros((y.shape[0],2),dtype='float32')
                for i in xrange(y.shape[0]):
                    one_hot[i,y[i]] = 1.
                y = one_hot

            m = topo_view.shape[0]
            rows, cols, slices = input_shape
            topo_view = topo_view.reshape(m, rows, cols, slices)

            if center:
                topo_view -= topo_view.mean(axis=0)

            if shuffle:
                self.shuffle_rng = np.random.RandomState([1,2,3])
                for i in xrange(topo_view.shape[0]):
                    j = self.shuffle_rng.randint(m)
                    # Copy ensures that memory is not aliased.
                    tmp = topo_view[i,:,:,:].copy()
                    topo_view[i,:,:,:] = topo_view[j,:,:,:]
                    topo_view[j,:,:,:] = tmp
                    # Note: slicing with i:i+1 works for both one_hot=True/False.
                    tmp = y[i:i+1].copy()
                    y[i] = y[j]
                    y[j] = tmp

            super(LGN,self).__init__(topo_view = dimshuffle(topo_view), y = y, axes=axes)

            assert not N.any(N.isnan(self.X))

            if start is not None:
                assert start >= 0
                if stop > self.X.shape[0]:
                    raise ValueError('stop='+str(stop)+'>'+'m='+str(self.X.shape[0]))
                assert stop > start
                self.X = self.X[start:stop,:]
                if self.X.shape[0] != stop - start:
                    raise ValueError("X.shape[0]: %d. start: %d stop: %d" % (self.X.shape[0], start, stop))
                if len(self.y.shape) > 1:
                    self.y = self.y[start:stop,:]
                else:
                    self.y = self.y[start:stop]
                assert self.y.shape[0] == stop - start
        else:
            #data loading is disabled, just make something that defines the right topology
            topo = dimshuffle(np.zeros((1,65,65,1)))
            super(LGN,self).__init__(topo_view = topo, axes=axes)
            self.X = None

        if which_set == 'test':
            assert fit_test_preprocessor is None or (fit_preprocessor == fit_test_preprocessor)

        if self.X is not None and preprocessor:
            preprocessor.apply(self, fit_preprocessor)
Exemplo n.º 36
0
    def __init__(self, which_set, imgd=49, zd=1, ds=1, center = False, shuffle = False,
            one_hot = False, binarize = False, start = None,
            stop = None, axes=['b', 0, 1, 'c'],
            preprocessor = None,
            fit_preprocessor = False,
            fit_test_preprocessor = False):

        self.args = locals()

        if which_set not in ['train','valid','test']:
            raise ValueError('Unrecognized which_set value "%s".' %
                    (which_set,)+'". Valid values are ["train","valid",test"].')

        def dimshuffle(b01c):
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():

            path = "${PYLEARN2_DATA_PATH}/ISBI_2012/"
            path = path + "ISBI_MembraneSamples_49x49_mp0.50_train50000_valid5000_test5000_seed11_ds2b.pkl.gz"
            path = serial.preprocess(path)

            f = gzip.open(path, 'rb')
            train_set, valid_set, test_set = cPickle.load(f)
            f.close()

            if which_set == 'train':
                data = train_set
            elif which_set == 'valid':
                data = valid_set
            else:
                data = test_set

            input_shape = (imgd, imgd, zd)

            # f = h5py.file(path, 'r')
            # input_shape = f['input_shape'][...]

            # if which_set == 'train':
            #     data = f['/train_set'][...]
            # elif which_set == 'valid':
            #     data = f['/valid_set'][...]
            # else:
            #     data = f['/test_set'][...]

            # Convert images to float 0-1
            topo_view = data[0].astype(np.float32) / 255.0
            y = data[1]

            self.one_hot = one_hot
            if one_hot:
                one_hot = N.zeros((y.shape[0],2),dtype='float32')
                for i in xrange(y.shape[0]):
                    one_hot[i,y[i]] = 1.
                y = one_hot

            m = topo_view.shape[0]
            rows, cols, slices = input_shape
            topo_view = topo_view.reshape(m, rows, cols, slices)

            if center:
                topo_view -= topo_view.mean(axis=0)

            if shuffle:
                self.shuffle_rng = np.random.RandomState([1,2,3])
                for i in xrange(topo_view.shape[0]):
                    j = self.shuffle_rng.randint(m)
                    # Copy ensures that memory is not aliased.
                    tmp = topo_view[i,:,:,:].copy()
                    topo_view[i,:,:,:] = topo_view[j,:,:,:]
                    topo_view[j,:,:,:] = tmp
                    # Note: slicing with i:i+1 works for both one_hot=True/False.
                    tmp = y[i:i+1].copy()
                    y[i] = y[j]
                    y[j] = tmp

            super(ISBI,self).__init__(topo_view = dimshuffle(topo_view), y = y, axes=axes)

            assert not N.any(N.isnan(self.X))

            if start is not None:
                assert start >= 0
                if stop > self.X.shape[0]:
                    raise ValueError('stop='+str(stop)+'>'+'m='+str(self.X.shape[0]))
                assert stop > start
                self.X = self.X[start:stop,:]
                if self.X.shape[0] != stop - start:
                    raise ValueError("X.shape[0]: %d. start: %d stop: %d" % (self.X.shape[0], start, stop))
                if len(self.y.shape) > 1:
                    self.y = self.y[start:stop,:]
                else:
                    self.y = self.y[start:stop]
                assert self.y.shape[0] == stop - start
        else:
            #data loading is disabled, just make something that defines the right topology
            topo = dimshuffle(np.zeros((1,49,49,1)))
            super(ISBI,self).__init__(topo_view = topo, axes=axes)
            self.X = None

        if which_set == 'test':
            assert fit_test_preprocessor is None or (fit_preprocessor == fit_test_preprocessor)

        if self.X is not None and preprocessor:
            preprocessor.apply(self, fit_preprocessor)
Exemplo n.º 37
0
    def __init__(self, which_set, start=None, stop=None, axes=['b', 0, 1, 'c']):
        self.args = locals()

        def dimshuffle(b01c):
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():
            path = "/local/Pylearn2/data/ADNI/"

            im_path = path + 'ADNI_X_down'
            label_path = path + 'ADNI_y_down'
            #im_path = serial.preprocess(im_path)
            #label_path = serial.preprocess(label_path)

            #Locally cache the files before reading them
            datasetCache = cache.datasetCache
            im_path = datasetCache.cache_file(im_path)
            label_path = datasetCache.cache_file(label_path)

            with open_if_filename(im_path, 'rb') as f:
                #magic, number, rows, cols = struct.unpack('>iiii', f.read(16))
                #im_array = N.fromfile(f, dtype='float64')
                im_array = pickle.load(f)

            with open_if_filename(label_path, 'rb') as f:
                #label_array = N.fromfile(f, dtype='uint8')
                label_array = pickle.load(f)

            topo_view = im_array
            y = label_array
        else:

            if which_set == 'train':
                size = 397
            else:
                size = 50
            topo_view = np.random.rand(size, 64, 64, 41)
            y = np.random.randint(0, 3, (size, 1))

        y_labels = 3

        m, r, c, d = topo_view.shape 
        assert r == 64
        assert c == 64
        topo_view = topo_view.reshape(m, r, c, 41)

        if which_set == 'train':
            assert m == 397
        else:
            assert m == 50

        super(MRI, self).__init__(topo_view=dimshuffle(topo_view), y=y,
                axes=axes, y_labels=y_labels)

        assert not N.any(N.isnan(self.X))

        if start is not None:
            assert start >= 0
            #if stop > self.X.shape[0]:
                #raise ValueError('stop=' + str(stop) + '>' + 'm=' + str(self.X.shape=[0]))

            assert stop > start
            self.X = self.X[start:stop, :]
            if self.X.shape[0] != stop - start:
                raise ValueError("X.shape[0]: %d. start: %d stop: %d"
                            % (self.X.shape[0], start, stop))
            if len(self.y.shape) > 1:
                self.y = self.y[start:stop, :]
            else:
                self.y = self.y[start:stop]
            assert self.y.shape[0] == stop - start
Exemplo n.º 38
0
    def __init__(self, which_set, start=None, stop=None):

        self.args = locals()

        if which_set not in ['train','valid', 'test']:
            if which_set == 'valid':
                raise ValueError(
                'Unrecognized which_set value "%s".' % (which_set,) +
                '". Valid values are ["train","test"].')

        if control.get_load_data():
            path = "${PYLEARN2_DATA_PATH}/milleniumSAMs/"
            if which_set == 'train':
                data_path = path + 'milliTrain.pickle.gz'
            elif which_set == 'valid':
                data_path = path + 'milliValid.pickle.gz'
            else:
                assert which_set == 'test'
                data_path = path + 'milliTest.pickle.gz'

            # Path substitution done here in order to make the lower-level
            # mnist_ubyte.py as stand-alone as possible (for reuse in, e.g.,
            # the Deep Learning Tutorials, or in another package).
            data_path = serial.preprocess(data_path)

            # Locally cache the files before reading them
            #Not sure if it's necessary, but why not?
            datasetCache = cache.datasetCache
            data_path = datasetCache.cache_file(data_path)

            X, y  = pickle.load(gzip.open(data_path))
        else:
            #I don't know when this would be called, or why?
            #It should generate random data of the same dimensions, but I'm not gonna bother doing that.
            #This is the old code for the MNIST images
            if which_set == 'train':
                size = 60000
            elif which_set == 'test':
                size = 10000
            else:
                raise ValueError(
                    'Unrecognized which_set value "%s".' % (which_set,) +
                    '". Valid values are ["train","test"].')
            topo_view = np.random.rand(size, 28, 28)
            y = np.random.randint(0, 10, (size, 1))

        m, r = X.shape
        assert r == 193

        #Shuffle used to be here, which I don't think is terrifically necessary
                                        #X=dimshuffle(X)
        super(MILLI_SAM, self).__init__(X=X, y=y)

        assert not np.any(np.isnan(self.X))

        if start is not None:
            assert start >= 0
            if stop > self.X.shape[0]:
                raise ValueError('stop=' + str(stop) + '>' +
                                 'm=' + str(self.X.shape[0]))
            assert stop > start
            self.X = self.X[start:stop, :]
            if self.X.shape[0] != stop - start:
                raise ValueError("X.shape[0]: %d. start: %d stop: %d"
                                 % (self.X.shape[0], start, stop))
            if len(self.y.shape) > 1:
                self.y = self.y[start:stop, :]
            else:
                self.y = self.y[start:stop]
            assert self.y.shape[0] == stop - start
Exemplo n.º 39
0
    def __init__(self, which_set, center = False, shuffle = False,
            one_hot = False, binarize = False, start = None,
            stop = None, axes=['b', 0, 1, 'c'],
            preprocessor = None,
            fit_preprocessor = False,
            fit_test_preprocessor = False):

        self.args = locals()


        if which_set not in ['train','test']:
            if which_set == 'valid':
                raise ValueError("There is no such thing as the MNIST "
"validation set. MNIST consists of 60,000 train examples and 10,000 test"
" examples. If you wish to use a validation set you should divide the train "
"set yourself. The pylearn2 dataset implements and will only ever implement "
"the standard train / test split used in the literature.")
            raise ValueError('Unrecognized which_set value "%s".' %
                    (which_set,)+'". Valid values are ["train","test"].')

        def dimshuffle(b01c):
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():
            path = "${PYLEARN2_DATA_PATH}/mnist/"
            if which_set == 'train':
                im_path = path + 'train-images-idx3-ubyte'
                label_path = path + 'train-labels-idx1-ubyte'
            else:
                assert which_set == 'test'
                im_path = path + 't10k-images-idx3-ubyte'
                label_path = path + 't10k-labels-idx1-ubyte'
            # Path substitution done here in order to make the lower-level
            # mnist_ubyte.py as stand-alone as possible (for reuse in, e.g.,
            # the Deep Learning Tutorials, or in another package).
            im_path = serial.preprocess(im_path)
            label_path = serial.preprocess(label_path)
            topo_view = read_mnist_images(im_path, dtype='float32')
            y = read_mnist_labels(label_path)

            if binarize:
                topo_view = ( topo_view > 0.5).astype('float32')

            self.one_hot = one_hot
            if one_hot:
                one_hot = N.zeros((y.shape[0],10),dtype='float32')
                for i in xrange(y.shape[0]):
                    one_hot[i,y[i]] = 1.
                y = one_hot

            m, r, c = topo_view.shape
            assert r == 28
            assert c == 28
            topo_view = topo_view.reshape(m,r,c,1)

            if which_set == 'train':
                assert m == 60000
            elif which_set == 'test':
                assert m == 10000
            else:
                assert False


            if center:
                topo_view -= topo_view.mean(axis=0)

            if shuffle:
                self.shuffle_rng = np.random.RandomState([1,2,3])
                for i in xrange(topo_view.shape[0]):
                    j = self.shuffle_rng.randint(m)
                    # Copy ensures that memory is not aliased.
                    tmp = topo_view[i,:,:,:].copy()
                    topo_view[i,:,:,:] = topo_view[j,:,:,:]
                    topo_view[j,:,:,:] = tmp
                    # Note: slicing with i:i+1 works for both one_hot=True/False.
                    tmp = y[i:i+1].copy()
                    y[i] = y[j]
                    y[j] = tmp


            super(MNIST,self).__init__(topo_view = dimshuffle(topo_view), y = y, axes=axes)

            assert not N.any(N.isnan(self.X))

            if start is not None:
                assert start >= 0
                if stop > self.X.shape[0]:
                    raise ValueError('stop='+str(stop)+'>'+'m='+str(self.X.shape[0]))
                assert stop > start
                self.X = self.X[start:stop,:]
                if self.X.shape[0] != stop - start:
                    raise ValueError("X.shape[0]: %d. start: %d stop: %d" % (self.X.shape[0], start, stop))
                if len(self.y.shape) > 1:
                    self.y = self.y[start:stop,:]
                else:
                    self.y = self.y[start:stop]
                assert self.y.shape[0] == stop - start
        else:
            # data loading is disabled, just make something that defines the
            # right topology
            topo = dimshuffle(np.zeros((1,28,28,1)))
            super(MNIST,self).__init__(topo_view = topo, axes=axes)
            self.X = None

        if which_set == 'test':
            assert fit_test_preprocessor is None or (fit_preprocessor == fit_test_preprocessor)

        if self.X is not None and preprocessor:
            preprocessor.apply(self, fit_preprocessor)
Exemplo n.º 40
0
    def __init__(self, which_set, start=None, stop=None, nParticles = None):

        self.args = locals()

        if which_set not in ['train','valid', 'test']:
            raise ValueError(
            'Unrecognized which_set value "%s".' % (which_set,) +
            '". Valid values are ["train","test", "valid"].')
        if control.get_load_data():

            path = "${PYLEARN2_DATA_PATH}/nanoParticle/"
            #have to load in the whole array from the start anyway
            import h5py
            import numpy as np
            totParticles = 262144
            if nParticles is  None:
                nParticles = totParticles

            trainingFrac = .8
            validFrac = .1

            np.random.seed(0)

            idxs = np.random.choice(totParticles, nParticles)

            #if which_set == 'train':
            slice = np.s_[idxs, :]

            #elif which_set == 'valid':
            #    slice = np.s_[int(totParticles*trainingFrac):int(totParticles*trainingFrac)+nParticles, :]
            #else:
            #    assert which_set == 'test'
            #    slice = np.s_[int(totParticles*(trainingFrac+validFrac)):int(totParticles*(trainingFrac+validFrac))+nParticles, :]

            X = np.zeros((100, nParticles*6))
            y = np.zeros((100, nParticles*3))

            colors = ['b', 'g','r','y','k','m']

            def getFilename(i):
                base = path+'snapshot_'
                if i<10:
                    out= base+'00%d.hdf5'%i
                elif i<100:
                    out= base+'0%d.hdf5'%i
                else:
                    out= base+'%d.hdf5'%i
                return serial.preprocess(out)

            absMinVel, absMaxVal = 0,0
            maxCoord= 10000 #particles in a 0-10000 cube
            for i in xrange(101):
                fname = getFilename(i)
                f = h5py.File(fname, 'r')
                ids = f['PartType1']['ParticleIDs'][()]
                sorter = ids.argsort()

                coords = f['PartType1']['Coordinates'][()]
                coords = coords[sorter]#sort by ids

                #normalize
                #coordinates are all >=0, so just divide by max
                coords/=maxCoord

                #from matplotlib import pyplot as plt
                #plt.scatter(coords[0, 0], coords[0,1], c = colors[i%len(colors)])

                coords = coords[slice]

                #if i == 100:
                #    print which_set
                #    plt.show()

                if i!=0:
                    y[i-1,:] = coords.flatten()
                #if i == 100:
                #    continue
                #y[i,:] = coords.flatten()
                if i!=100:
                    vels = f['PartType1']['Velocities'][()]
                    vels = vels[sorter]

                    minVel, maxVel = vels.min(), vels.max()
                    if minVel < absMinVel:
                        absMinVel = minVel

                    if maxVel > absMaxVal:
                        absMaxVal = maxVel

                    vels = vels[slice]
                    data = np.concatenate((coords, vels), axis = 1).flatten()

                    X[i,:] = data
                    del data

                del coords
                f.close()

            #normalize the velocity columns
            for n in xrange(nParticles):
                for col in xrange(3):
                    X[:, n*6+3+col] = (X[:, n*6+3+col]-absMinVel)/(absMaxVal-absMinVel)

        else:
            #I don't know when this would be called, or why?
            #It should generate random data of the same dimensions, but I'm not gonna bother doing that.
            #This is the old code for the MNIST images
            if which_set == 'train':
                size = 60000
            elif which_set == 'test':
                size = 10000
            else:
                raise ValueError(
                    'Unrecognized which_set value "%s".' % (which_set,) +
                    '". Valid values are ["train","test"].')
            topo_view = np.random.rand(size, 28, 28)
            y = np.random.randint(0, 10, (size, 1))

        m, r = X.shape
        assert m == 100

        n, s = y.shape
        assert n == 100

        #Shuffle used to be here, which I don't think is terrifically necessary
                                        #X=dimshuffle(X)
        super(NANO_PARTICLE, self).__init__(X=X, y=y)
        del X
        del y

        assert not np.any(np.isnan(self.X))

        if start is not None:
            assert start >= 0
            if stop > self.X.shape[0]:
                raise ValueError('stop=' + str(stop) + '>' +
                                 'm=' + str(self.X.shape[0]))
            assert stop > start
            self.X = self.X[start:stop, :]
            if self.X.shape[0] != stop - start:
                raise ValueError("X.shape[0]: %d. start: %d stop: %d"
                                 % (self.X.shape[0], start, stop))
            if len(self.y.shape) > 1:
                self.y = self.y[start:stop, :]
            else:
                self.y = self.y[start:stop]
            assert self.y.shape[0] == stop - start