def _gcn(self, X):
     if not self.specs:
         X = global_contrast_normalize(X, scale=True)
         return X
     pre = 0
     for n in feature_ns:
         if n <= X.shape[1]:
             X[:, pre:n] = global_contrast_normalize(X[:, pre:n], scale=True)
             pre = n
     return X
Пример #2
0
    def __init__(self,
                 which_set,
                 data_path=None,
                 center=True,
                 rescale=True,
                 gcn=True):
        self.class_name = ['neg', 'pos']
        # load data
        path = "${PYLEARN2_DATA_PATH}/cin/"
        #datapath = path + 'feature850-2-1.pkl'
        if data_path is None:
            data_path = path + 'feature850-2-1.pkl'
        else:
            data_path = path + data_path
        data_path = serial.preprocess(data_path)
        with  open(data_path, 'rb') as f:
            #f = open(datapath, 'rb')
            train_set, valid_set, test_set = cPickle.load(f)
            #f.close()

        self.train_set = train_set
        self.valid_set = valid_set
        self.test_set = test_set
        if which_set == 'train':
            X, Y = self.train_set
        elif which_set == 'valid':
            X, Y = self.valid_set
        else:
            X, Y = self.test_set

        X.astype(float)
        axis = 0
        _max = np.max(X, axis=axis)
        _min = np.min(X, axis=axis)
        _mean = np.mean(X, axis=axis)
        _std = np.std(X, axis=axis)
        _scale = _max - _min


        # print _max
        # print _min
        # print _mean
        # print _std

        if gcn:
            X = global_contrast_normalize(X, scale=gcn)
        else:
            if center:
                X[:, ] -= _mean
            if rescale:
                X[:, ] /= _scale

        # topo_view = X.reshape(X.shape[0], X.shape[1], 1, 1)
        # y = np.reshape(Y, (Y.shape[0], 1))
        # y = np.atleast_2d(Y).T
        y = np.zeros((Y.shape[0], 2))
        y[:, 0] = Y
        y[:, 0] = 1 - Y
        print X.shape, y.shape
        super(CIN_FEATURE2, self).__init__(X=X, y=y)
Пример #3
0
    def apply(self, dataset, can_fit=False):
        if self.skip:
            return

        if self._batch_size is None:
            X = global_contrast_normalize(dataset.get_design_matrix(),
                                          scale=self._scale,
                                          subtract_mean=self._subtract_mean,
                                          use_std=self._use_std,
                                          sqrt_bias=self._sqrt_bias,
                                          min_divisor=self._min_divisor)
            dataset.set_design_matrix(X)
        else:
            X = dataset.get_design_matrix()
            data_size = X.shape[0]
            last = (np.floor(data_size / float(self._batch_size)) *
                    self._batch_size)
            for i in xrange(0, data_size, self._batch_size):
                if i >= last:
                    stop = i + np.mod(data_size, self._batch_size)
                else:
                    stop = i + self._batch_size
                log.info("GCN processing data from %d to %d" % (i, stop))
                data = self.transform(X[i:stop])
                dataset.set_design_matrix(data, start = i)
Пример #4
0
    def apply(self, dataset, can_fit=False):

        #check if we have already flattened patches
        if self.normalized_data_key in dataset.keys():
            print "skipping normalization, this has already been run"
            return
        else:
            print "normalizing patches"

        in_data = dataset[self.data_to_normalize_key]
        data_size = in_data.shape[0]

        dataset.create_dataset(self.normalized_data_key, in_data.shape, chunks=((self.batch_size,)+in_data.shape[1:]))

        out_data = dataset[self.normalized_data_key]

        #iterate over patches
        for patch_index in range(data_size):
            if patch_index % 2000 == 0:
                print str(patch_index) + '/' + str(data_size)

            #iterate over rgbd so they are all normalized separately at this point
            for channel in range(4):
                out_data[patch_index, :, :, channel] = global_contrast_normalize(in_data[patch_index, :, :, channel],
                                                                             scale=self.scale,
                                                                             subtract_mean=self.subtract_mean,
                                                                             use_std=self.use_std,
                                                                             sqrt_bias=self.sqrt_bias,
                                                                             min_divisor=self.min_divisor)
Пример #5
0
def test_subtract_mean_false():
    rng = numpy.random.RandomState(0)
    X = abs(rng.randn(50, 70))
    Y = global_contrast_normalize(X, subtract_mean=False, scale=5)
    numpy.testing.assert_allclose(numpy.sqrt((Y ** 2).sum(axis=1)), 5)
    numpy.testing.assert_raises(AssertionError,
                                numpy.testing.assert_allclose,
                                Y.mean(axis=1), 0, atol=1e-10)
Пример #6
0
    def __init__(self, 
	    which_set = 'full',
            path = 'train.mat',
            one_hot = False,
	    colorspace = 'none',
	    step = 1,
	    start = None, 
	    stop = None,
	    center = False, 
	    rescale = False,
	    gcn = None,
	    toronto_prepro = False,
            axes=('b', 0, 1, 'c')):

        self.__dict__.update(locals())
        del self.self	

        #
        #self.one_hot = one_hot
	#self.colorspace = colorspace
	#self.step=step
	#self.which_set=which_set
        
        self.view_converter = None

        self.path = preprocess(self.path)
        X, y = self._load_data()

	if center:
            X -= 127.5
        #self.center = center

        if rescale:
            X /= 127.5
        #self.rescale = rescale
        
        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.
            if which_set == 'test':
                other = MATDATA(which_set='train')
                oX = other.X
                oX /= 255.
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        #self.toronto_prepro = toronto_prepro

        #self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn, min_divisor=1e-8)
	    
	view_converter = DefaultViewConverter((
	    self.windowSize,self.windowSize,self.channels), axes)
        
        super(MATDATA, self).__init__(X=X, y=y, view_converter=view_converter)
Пример #7
0
    def __init__(self,
                 which_set='full',
                 path='train.mat',
                 one_hot=False,
                 colorspace='none',
                 step=1,
                 start=None,
                 stop=None,
                 center=False,
                 rescale=False,
                 gcn=None,
                 toronto_prepro=False,
                 axes=('b', 0, 1, 'c')):

        self.__dict__.update(locals())
        del self.self

        #
        #self.one_hot = one_hot
        #self.colorspace = colorspace
        #self.step=step
        #self.which_set=which_set

        self.view_converter = None

        self.path = preprocess(self.path)
        X, y = self._load_data()

        if center:
            X -= 127.5
        #self.center = center

        if rescale:
            X /= 127.5
        #self.rescale = rescale

        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.
            if which_set == 'test':
                other = MATDATA(which_set='train')
                oX = other.X
                oX /= 255.
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        #self.toronto_prepro = toronto_prepro

        #self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn, min_divisor=1e-8)

        view_converter = DefaultViewConverter(
            (self.windowSize, self.windowSize, self.channels), axes)

        super(MATDATA, self).__init__(X=X, y=y, view_converter=view_converter)
Пример #8
0
def normalize(img, prep, img_shape):
    # this requires zca from pylearn 2 for all functions prep.
    img = prep.inverse(img.reshape(1, -1))[0]
    img /= np.abs(img).max()
    img = np.clip(img, -1., 1.)
    img = (img + 1.) / 2.
    img = global_contrast_normalize(img.reshape(1, -1) * 255, scale=55.)
    img = prep._gpu_matrix_dot(img - prep.mean_, prep.P_)
    return img.reshape(img_shape)
Пример #9
0
def test_subtract_mean_false():
    rng = numpy.random.RandomState(0)
    X = abs(rng.randn(50, 70))
    Y = global_contrast_normalize(X, subtract_mean=False, scale=5)
    numpy.testing.assert_allclose(numpy.sqrt((Y**2).sum(axis=1)), 5)
    numpy.testing.assert_raises(AssertionError,
                                numpy.testing.assert_allclose,
                                Y.mean(axis=1),
                                0,
                                atol=1e-10)
    def transform(self, X):

        if self.flag_gcn:
            X = global_contrast_normalize(X)

        if self.flag_lcn:
            X = self.lcn_transform(X)

        if self.flag_zca:
            X, _ = centersphere(X, method='ZCA', A=self.zca_mat)

        return X
Пример #11
0
def real_time_prediction():

    ### loading new images for classification starts here
    fo = open(save_path, 'rb')  # batch path
    batch1 = pickle.load(fo)
    fo.close()

    xarr = np.array(batch1['data'], dtype='float32')
    xarr = global_contrast_normalize(xarr, scale=55.)

    no_of_row = len(batch1['data'])

    xdat = np.array(
        xarr.reshape((no_of_row, 3, 32, 32)),
        dtype='float32')  #reshape first parameter = batch matrix no. of row
    xdat = np.transpose(xdat[:, :, :, :], (1, 2, 3, 0))

    x = dense_design_matrix.DenseDesignMatrix(topo_view=xdat,
                                              axes=['c', 0, 1, 'b'])
    x.apply_preprocessor(my_pca_preprocessor, can_fit=False)
    tarr = x.get_topological_view()
    #print tarr
    y = f(tarr)

    ###########searching max in matrix##################################################
    #j = no. of row in prediction_batch
    #i = no. of classes (0-9)
    #result=('airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck')
    result = ('bottle', 'book', 'toy', 'pen', 'chair', 'coin', 'phone', 'hand',
              'note', 'head')
    resultString = ''

    for j in range(0, no_of_row):
        max_index = 0
        max_no = y[j][0]
        #print max_no
        for i in range(0, 10):
            if y[j][i] > max_no:
                max_no = y[j][i]
                max_index = i
        # print max_index
        print "======================"
        print 'Photo', j + 1, ' max=', result[max_index]

        if j > 0:
            resultString += ','

        resultString += result[max_index]
    #print 'y =', y
    ###################################################################################3

    return resultString
    def __init__(self, which_set=None, file=None, center = False, rescale = False, gcn = None, one_hot = False, start = None, stop = None, axes=('b', 0, 1, 'c'), toronto_prepro = False, preprocessor = None):
        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype  = 'uint8'
        ntrain = 0
        nvalid = 0  # artefact, we won't use it
        ntest  = 300000

        # we also expose the following details:
        self.img_shape = (3,32,32)
        self.img_size = N.prod(self.img_shape)
        #self.n_classes = 10
        #self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
                            #'dog', 'frog','horse','ship','truck']
        # prepare loading
        #fnames = ['data_batch_%i' % i for i in range(1,6)]
        #lenx = N.ceil((ntrain + nvalid) / 10000.)*10000
        #x = N.zeros((lenx,self.img_size), dtype=dtype)
        #y = N.zeros(lenx, dtype=dtype)
        X=np.load(file).astype(np.float32)
        # load train data

        if center:
            X -= 127.5
        self.center = center

        if rescale:
            X /= 127.5
        self.rescale = rescale

        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        view_converter = dense_design_matrix.DefaultViewConverter((32,32,3), axes)

        super(CIFAR10_TEST, self).__init__(X=X, view_converter=view_converter)

        assert not np.any(np.isnan(self.X))

        if preprocessor:
            preprocessor.apply(self)
def real_time_prediction():

    ### loading new images for classification starts here
    fo = open(save_path,'rb')   # batch path
    batch1 = pickle.load(fo)
    fo.close()

    xarr = np.array(batch1['data'],dtype='float32')
    xarr = global_contrast_normalize(xarr, scale=55.)

    no_of_row=len(batch1['data'])

    xdat = np.array(xarr.reshape((no_of_row,3,32,32)),dtype='float32')  #reshape first parameter = batch matrix no. of row
    xdat = np.transpose(xdat[:,:,:,:],(1,2,3,0))

    x = dense_design_matrix.DenseDesignMatrix(topo_view=xdat, axes = ['c', 0, 1, 'b'])
    x.apply_preprocessor(my_pca_preprocessor, can_fit = False)
    tarr = x.get_topological_view()
    #print tarr
    y = f(tarr)


    ###########searching max in matrix##################################################
    #j = no. of row in prediction_batch
    #i = no. of classes (0-9)
    #result=('airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck')
    result=('bottle','book','toy','pen','chair','coin','phone','hand','note','head')
    resultString=''

    for j in range(0,no_of_row):
      max_index=0
      max_no=y[j][0]
      #print max_no
      for i in range(0,10):
         if y[j][i]>max_no:
          max_no=y[j][i]
          max_index=i
        # print max_index
      print "======================"
      print 'Photo',j+1, ' max=', result[max_index]

      if j > 0:
          resultString += ','

      resultString += result[max_index]
    #print 'y =', y
    ###################################################################################3

    return resultString
Пример #14
0
    def __init__(self, source_directory, axes=('b', 0, 1, 'c'), remove_misfits = False):

        self.axes = axes

        # we define here:
        dtype = 'uint8'
        files = self.file_list_of_source(source_directory)

        # we also expose the following details:
        self.img_shape = self.determine_shape(files[0]) #this is rather dangerous if the first file is not representative to the remainder
        self.img_size = np.prod(self.img_shape)

        # prepare loading
        x = np.zeros((len(files), self.img_size), dtype=dtype)

        # load train data
        X = self.flatten_images(x, files)
        X = global_contrast_normalize(X)

        view_converter = dense_design_matrix.DefaultViewConverter(self.img_shape, self.axes)

        super(LocalImages, self).__init__(X = X, view_converter = view_converter)
Пример #15
0
 def apply(self, dataset, can_fit=False):
     if self._batch_size is None:
         X = global_contrast_normalize(dataset.get_design_matrix(),
                                       scale=self._scale,
                                       subtract_mean=self._subtract_mean,
                                       use_std=self._use_std,
                                       sqrt_bias=self._sqrt_bias,
                                       min_divisor=self._min_divisor)
         dataset.set_design_matrix(X)
     else:
         X = dataset.get_design_matrix()
         data_size = X.shape[0]
         last = (np.floor(data_size / float(self._batch_size)) *
                 self._batch_size)
         for i in xrange(0, data_size, self._batch_size):
             if i >= last:
                 stop = i + np.mod(data_size, self._batch_size)
             else:
                 stop = i + self._batch_size
             log.info("GCN processing data from {} to {}".format(i, stop))
             data = self.transform(X[i:stop])
             dataset.set_design_matrix(data, start=i)
    def __init__(self,
                 which_set,
                 data_path=None,
                 center=True,
                 rescale=True,
                 gcn=True,
                 specs=False):
        self.class_name = ['neg', 'pos']
        # load data
        path = "${PYLEARN2_DATA_PATH}/cin/"
        #datapath = path + 'feature850-2-1.pkl'
        if data_path is None:
            data_path = path + 'feature1406-2-1.pkl'
        else:
            data_path = path + data_path
        data_path = serial.preprocess(data_path)
        with  open(data_path, 'rb') as f:
            #f = open(datapath, 'rb')
            train_set, valid_set, test_set = cPickle.load(f)
            #f.close()

        self.train_set = train_set
        self.valid_set = valid_set
        self.test_set = test_set
        self.specs = specs
        if which_set == 'train':
            X, Y = self.train_set
        elif which_set == 'valid':
            X, Y = self.valid_set
        else:
            X, Y = self.test_set

        X.astype(float)
        axis = 0
        _max = np.max(X, axis=axis)
        _min = np.min(X, axis=axis)
        _mean = np.mean(X, axis=axis)
        _std = np.std(X, axis=axis)
        _scale = _max - _min

        def features_map_fn(indexes):
            rval = []
            for sequence_index, example_index in self._fetch_index(indexes):
                rval.append(self.samples_sequences[sequence_index][example_index:example_index
                                                                                 + self.frames_per_example].ravel())
            return rval

        def targets_map_fn(indexes):
            rval = []
            for sequence_index, example_index in self._fetch_index(indexes):
                rval.append(self.samples_sequences[sequence_index][example_index
                                                                   + self.frames_per_example].ravel())
            return rval

        map_fn_components = [features_map_fn, targets_map_fn]
        self.map_functions = tuple(map_fn_components)
        self.cumulative_example_indexes = X.shape[0]

        # print _max
        # print _min
        # print _mean
        # print _std

        if gcn:
            X = global_contrast_normalize(X, scale=gcn)
        else:
            if center:
                X[:, ] -= _mean
            if rescale:
                X[:, ] /= _scale

        # topo_view = X.reshape(X.shape[0], X.shape[1], 1, 1)
        # y = np.reshape(Y, (Y.shape[0], 1))
        # y = np.atleast_2d(Y).T
        y = np.zeros((Y.shape[0], 2))
        y[:, 0] = Y
        y[:, 0] = 1 - Y
        print X.shape, y.shape
        super(CIN_FEATURE2, self).__init__(X=X, y=y)
        # super(CIN_FEATURE2, self).__init__(topo_view=topo_view, y=y, y_labels=2)

        if specs:
            assert X.shape[1] == (850 + 656)
            self.init_data_specs()
            self.feature850 = X[:, 0:850]
            self.feature656 = X[:, 850:]
            self.y = y
Пример #17
0
def test_scale():
    rng = numpy.random.RandomState(0)
    X = abs(rng.randn(50, 70))
    Y = global_contrast_normalize(X, scale=5)
    numpy.testing.assert_allclose(numpy.sqrt((Y**2).sum(axis=1)), 5)
    numpy.testing.assert_allclose(Y.mean(axis=1), 0, atol=1e-10)
Пример #18
0
    def __init__(self,
                 which_set,
                 center=False,
                 rescale=False,
                 gcn=None,
                 one_hot=False,
                 start=None,
                 stop=None,
                 axes=('b', 0, 1, 'c'),
                 toronto_prepro=False,
                 preprocessor=None):
        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype = 'uint8'
        ntrain = 50000
        nvalid = 0  # artefact, we won't use it
        ntest = 10000

        # we also expose the following details:
        self.img_shape = (3, 32, 32)
        self.img_size = N.prod(self.img_shape)
        self.n_classes = 10
        self.label_names = [
            'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog',
            'horse', 'ship', 'truck'
        ]

        # prepare loading
        fnames = ['data_batch_%i' % i for i in range(1, 6)]
        lenx = N.ceil((ntrain + nvalid) / 10000.) * 10000
        x = N.zeros((lenx, self.img_size), dtype=dtype)
        y = N.zeros(lenx, dtype=dtype)

        # load train data
        nloaded = 0
        for i, fname in enumerate(fnames):
            data = CIFAR10._unpickle(fname)
            x[i * 10000:(i + 1) * 10000, :] = data['data']
            y[i * 10000:(i + 1) * 10000] = data['labels']
            nloaded += 10000
            if nloaded >= ntrain + nvalid + ntest: break

        # load test data
        data = CIFAR10._unpickle('test_batch')

        # process this data
        Xs = {'train': x[0:ntrain], 'test': data['data'][0:ntest]}

        Ys = {'train': y[0:ntrain], 'test': data['labels'][0:ntest]}

        X = N.cast['float32'](Xs[which_set])
        y = Ys[which_set]

        if isinstance(y, list):
            y = np.asarray(y)

        if which_set == 'test':
            assert y.shape[0] == 10000

        self.one_hot = one_hot
        if one_hot:
            one_hot = np.zeros((y.shape[0], 10), dtype='float32')
            for i in xrange(y.shape[0]):
                one_hot[i, y[i]] = 1.
            y = one_hot

        if center:
            X -= 127.5
        self.center = center

        if rescale:
            X /= 127.5
        self.rescale = rescale

        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.
            if which_set == 'test':
                other = CIFAR10(which_set='train')
                oX = other.X
                oX /= 255.
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        if start is not None:
            # This needs to come after the prepro so that it doesn't change the pixel
            # means computed above for toronto_prepro
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop]
            assert X.shape[0] == y.shape[0]

        if which_set == 'test':
            assert X.shape[0] == 10000

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)

        super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter)

        assert not np.any(np.isnan(self.X))

        if preprocessor:
            preprocessor.apply(self)
    def __init__(self,
                 which_set,
                 data_path=None,
                 center=True,
                 rescale=True,
                 gcn=True,
                 specs=True):
        self.class_name = ['neg', 'pos']
        # load data
        path = "${PYLEARN2_DATA_PATH}/cin/"
        #datapath = path + 'feature850-2-1.pkl'
        if data_path is None:
            data_path = path + 'feature1406-2-1.pkl'
        else:
            data_path = path + data_path
        data_path = serial.preprocess(data_path)
        with  open(data_path, 'rb') as f:
            #f = open(datapath, 'rb')
            train_set, valid_set, test_set = cPickle.load(f)
            #f.close()

        self.train_set = train_set
        self.valid_set = valid_set
        self.test_set = test_set
        self.specs = specs
        if which_set == 'train':
            X, Y = self.train_set
        elif which_set == 'valid':
            X, Y = self.valid_set
        else:
            X, Y = self.test_set


        X.astype(float)
        axis = 0
        _max = np.max(X, axis=axis)
        _min = np.min(X, axis=axis)
        _mean = np.mean(X, axis=axis)
        _std = np.std(X, axis=axis)
        _scale = _max - _min

        if gcn:
            X = global_contrast_normalize(X, scale=gcn)
        else:
            if center:
                X[:, ] -= _mean
            if rescale:
                X[:, ] /= _scale


        # topo_view = X.reshape(X.shape[0], X.shape[1], 1, 1)
        # y = np.reshape(Y, (Y.shape[0], 1))
        # y = np.atleast_2d(Y).T
        self.raw_X = X
        self.raw_y = Y
        y = np.zeros((Y.shape[0], 2))
        y[:, 0] = Y
        y[:, 1] = 1 - Y
        # print "Load CIN_FEATURE2 data: {}, with size X:{}, y:{}".format(data_path, X.shape, y.shape)
        super(CIN_FEATURE2, self).__init__(X=X, y=y)
        # super(CIN_FEATURE2, self).__init__(topo_view=topo_view, y=y, y_labels=2)

        if specs:
            assert X.shape[1] == (850 + 556)
            self.init_data_specs()
            self.feature850 = X[:, 0:850]
            self.feature556 = X[:, 850:]
            self.y = y
Пример #20
0
def test_min_divisor():
    rng = numpy.random.RandomState(0)
    X = abs(rng.randn(50, 70))
    X[0] *= 1e-15
    Y = global_contrast_normalize(X, subtract_mean=False, use_std=True)
    numpy.testing.assert_array_equal(X[0], Y[0])
Пример #21
0
def test_basic():
    rng = numpy.random.RandomState(0)
    X = abs(rng.randn(50, 70))
    Y = global_contrast_normalize(X)
    numpy.testing.assert_allclose((Y**2).sum(axis=1), 1)
    numpy.testing.assert_allclose(Y.mean(axis=1), 0, atol=1e-10)
Пример #22
0
    def __init__(self,
                 which_set,
                 center=False,
                 rescale=False,
                 gcn=None,
                 start=None,
                 stop=None,
                 axes=('b', 0, 1, 'c'),
                 toronto_prepro=False,
                 preprocessor=None):
        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype = 'uint8'
        ntrain = 50000
        nvalid = 0  # artefact, we won't use it
        ntest = 10000

        # we also expose the following details:
        self.img_shape = (3, 32, 32)
        self.img_size = numpy.prod(self.img_shape)
        self.n_classes = 10
        self.label_names = [
            'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog',
            'horse', 'ship', 'truck'
        ]

        # prepare loading
        fnames = ['data_batch_%i' % i for i in range(1, 6)]
        datasets = {}
        datapath = os.path.join(
            string_utils.preprocess('${PYLEARN2_DATA_PATH}'), 'cifar10',
            'cifar-10-batches-py')
        for name in fnames + ['test_batch']:
            fname = os.path.join(datapath, name)
            if not os.path.exists(fname):
                raise IOError(fname + " was not found. You probably need to "
                              "download the CIFAR-10 dataset by using the "
                              "download script in "
                              "pylearn2/scripts/datasets/download_cifar10.sh "
                              "or manually from "
                              "http://www.cs.utoronto.ca/~kriz/cifar.html")
            datasets[name] = cache.datasetCache.cache_file(fname)

        lenx = int(numpy.ceil((ntrain + nvalid) / 10000.) * 10000)
        x = numpy.zeros((lenx, self.img_size), dtype=dtype)
        y = numpy.zeros((lenx, 1), dtype=dtype)

        # load train data
        nloaded = 0
        for i, fname in enumerate(fnames):
            _logger.info('loading file %s' % datasets[fname])
            data = serial.load(datasets[fname])
            x[i * 10000:(i + 1) * 10000, :] = data['data']
            y[i * 10000:(i + 1) * 10000, 0] = data['labels']
            nloaded += 10000
            if nloaded >= ntrain + nvalid + ntest:
                break

        # load test data
        _logger.info('loading file %s' % datasets['test_batch'])
        data = serial.load(datasets['test_batch'])

        # process this data
        Xs = {'train': x[0:ntrain], 'test': data['data'][0:ntest]}

        Ys = {'train': y[0:ntrain], 'test': data['labels'][0:ntest]}

        X = numpy.cast['float32'](Xs[which_set])
        y = Ys[which_set]

        if isinstance(y, list):
            y = numpy.asarray(y).astype(dtype)

        if which_set == 'test':
            assert y.shape[0] == 10000
            y = y.reshape((y.shape[0], 1))

        if center:
            X -= 127.5
        self.center = center

        if rescale:
            X /= 127.5
        self.rescale = rescale

        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.
            if which_set == 'test':
                other = CIFAR10(which_set='train')
                oX = other.X
                oX /= 255.
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        if start is not None:
            # This needs to come after the prepro so that it doesn't
            # change the pixel means computed above for toronto_prepro
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop, :]
            assert X.shape[0] == y.shape[0]

        if which_set == 'test':
            assert X.shape[0] == 10000

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)

        super(CIFAR10, self).__init__(X=X,
                                      y=y,
                                      view_converter=view_converter,
                                      y_labels=self.n_classes)

        assert not contains_nan(self.X)

        if preprocessor:
            preprocessor.apply(self)
Пример #23
0
    def __init__(
        self,
        lfw_path,
        filelist_path,
        embedding_file=None,
        center=False,
        scale=False,
        start=None,
        stop=None,
        gcn=None,
        shuffle=False,
        rng=None,
        seed=132987,
        axes=("b", 0, 1, "c"),
        img_shape=(3, 250, 250),
    ):
        self.axes = axes

        self.img_shape = img_shape
        C, H, W = img_shape
        self.img_size = np.prod(self.img_shape)

        files = []
        with open(filelist_path, "r") as filelist_f:
            files = [line.strip() for line in filelist_f]

        # Load raw pixel integer values
        dtype = "uint8"
        X = np.zeros((len(files), W, H, C), dtype=dtype)
        img_ids = []

        for i, line in enumerate(files):
            if "\t" in line:
                # New format: contains image IDs
                img_path, img_id = line.strip().split()
                img_ids.append(int(img_id))
            else:
                img_path = line.strip()

            full_path = os.path.join(lfw_path, img_path)
            im = image.load(full_path, rescale_image=False, dtype=dtype)

            # Handle grayscale images which may not have RGB channels
            if len(im.shape) == 2:
                W, H = im.shape

                # Repeat image 3 times across axis 2
                im = im.reshape(W, H, 1).repeat(3, 2)

            # Swap color channel to front
            X[i] = im

        # Cast to float32, center / scale if necessary
        X = np.cast["float32"](X)

        # Create dense design matrix from topological view
        X = X.reshape(X.shape[0], -1)

        # Prepare img_ids
        if embedding_file is not None:
            if len(img_ids) != len(files):
                raise ValueError("You must provide a filelist with indexes " "into the embedding array for each image.")
        img_ids = np.array(img_ids, dtype="uint32")

        if center and scale:
            X[:] -= 127.5
            X[:] /= 127.5
        elif center:
            X[:] -= 127.5
        elif scale:
            X[:] /= 255.0

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        if shuffle:
            rng = make_np_rng(rng, seed, which_method="permutation")
            rand_idx = rng.permutation(len(X))

            X = X[rand_idx]
            img_ids = img_ids[rand_idx]

        if start is not None:
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]

            X = X[start:stop]

            if len(img_ids) > 0:
                img_ids = img_ids[start:stop]

        # Load embeddings if provided
        Y = None
        if embedding_file is not None:
            embeddings = np.load(embedding_file)["arr_0"]
            assert embeddings.shape[0] >= len(files)

            Y = embeddings[img_ids].astype(theano.config.floatX)

        # create view converting for retrieving topological view
        self.view_converter = dense_design_matrix.DefaultViewConverter((W, H, C), axes)

        # init super class
        super(LFW, self).__init__(X=X, y=Y)

        assert not contains_nan(self.X)

        # Another hack: rename 'targets' to match model expectations
        if embedding_file is not None:
            space, (X_source, y_source) = self.data_specs
            self.data_specs = (space, (X_source, "condition"))
Пример #24
0
def test_basic():
    rng = numpy.random.RandomState(0)
    X = abs(rng.randn(50, 70))
    Y = global_contrast_normalize(X)
    numpy.testing.assert_allclose((Y ** 2).sum(axis=1), 1)
    numpy.testing.assert_allclose(Y.mean(axis=1), 0, atol=1e-10)
Пример #25
0
def test_min_divisor():
    rng = numpy.random.RandomState(0)
    X = abs(rng.randn(50, 70))
    X[0] *= 1e-15
    Y = global_contrast_normalize(X, subtract_mean=False, use_std=True)
    numpy.testing.assert_array_equal(X[0], Y[0])
Пример #26
0
def test_std_norm():
    rng = numpy.random.RandomState(0)
    X = abs(rng.randn(50, 70))
    Y = global_contrast_normalize(X, use_std=True, scale=5)
    numpy.testing.assert_allclose(Y.std(axis=1, ddof=1), 5)
Пример #27
0
print batch1_data.shape
print batch1_labels.shape
print batch2_data.shape
print batch2_labels.shape
print batch3_data.shape
print batch3_labels.shape

image = paramgraphics.mat_to_img(batch1_data[:100,:].T, dim_input, colorImg=colorImg, scale=True)
image.save(saveDir+'svhn_train.png', 'PNG')
image = paramgraphics.mat_to_img(batch2_data[:100,:].T, dim_input, colorImg=colorImg, scale=True)
image.save(saveDir+'svhn_valid.png', 'PNG')
image = paramgraphics.mat_to_img(batch3_data[:100,:].T, dim_input, colorImg=colorImg, scale=True)
image.save(saveDir+'svhn_test.png', 'PNG')

if preprocessing == 'gcn_var':
    batch1_data = pypp.global_contrast_normalize(batch1_data, subtract_mean=True, use_std=True)
    batch2_data = pypp.global_contrast_normalize(batch2_data, subtract_mean=True, use_std=True)
    batch3_data = pypp.global_contrast_normalize(batch3_data, subtract_mean=True, use_std=True)
elif preprocessing == 'gcn_norm':
    batch1_data = pypp.global_contrast_normalize(batch1_data, subtract_mean=True)
    batch2_data = pypp.global_contrast_normalize(batch2_data, subtract_mean=True)
    batch3_data = pypp.global_contrast_normalize(batch3_data, subtract_mean=True)
    print batch1_data.shape
    print batch1_data.max()
    print batch1_data.min()
    print batch2_data.shape
    print batch2_data.max()
    print batch2_data.min()
    print batch3_data.shape
    print batch3_data.max()
    print batch3_data.min()
Пример #28
0
def test_scale():
    rng = numpy.random.RandomState(0)
    X = abs(rng.randn(50, 70))
    Y = global_contrast_normalize(X, scale=5)
    numpy.testing.assert_allclose(numpy.sqrt((Y ** 2).sum(axis=1)), 5)
    numpy.testing.assert_allclose(Y.mean(axis=1), 0, atol=1e-10)
Пример #29
0
    def __init__(self,
                 which_set,
                 center=False,
                 rescale=False,
                 gcn=None,
                 one_hot=None,
                 start=None,
                 stop=None,
                 axes=('b', 0, 1, 'c'),
                 toronto_prepro=False,
                 preprocessor=None,
                 two_image=False):
        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype = 'uint8'
        ntrain = 50000
        nvalid = 0  # artefact, we won't use it
        ntest = 10000

        # we also expose the following details:
        self.img_shape = (3, 32, 32)
        self.img_shape2 = (32, 32, 3)
        self.img_size = N.prod(self.img_shape)
        self.n_classes = 10
        self.label_names = [
            'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog',
            'horse', 'ship', 'truck'
        ]

        # prepare loading
        fnames = ['data_batch_%i' % i for i in range(1, 6)]
        lenx = N.ceil((ntrain + nvalid) / 10000.) * 10000
        x = N.zeros((lenx, self.img_size), dtype=dtype)
        y = N.zeros((lenx, 1), dtype=dtype)

        # load train data
        nloaded = 0
        for i, fname in enumerate(fnames):
            data = CIFAR10._unpickle(fname)
            x[i * 10000:(i + 1) * 10000, :] = data['data']
            y[i * 10000:(i + 1) * 10000, 0] = data['labels']
            nloaded += 10000
            if nloaded >= ntrain + nvalid + ntest:
                break

        # load test data
        data = CIFAR10._unpickle('test_batch')

        # 2value image
        # can not use other option when you use two_image option
        print x.shape

        if two_image:
            from PIL import Image
            two_value_x = []
            self.img_shape = (1, 32, 32)
            self.img_shape2 = (32, 32, 1)

            for i, pixel in enumerate(x.reshape(50000, 3, 32, 32)):
                if i % 1000 == 0:
                    print i
                pixel = np.transpose(pixel, (1, 2, 0))
                test_img = Image.new("RGB", (32, 32), (255, 0, 0))
                test_img.putdata(
                    [tuple(x.tolist()) for x in pixel.reshape(1024, 3)])
                two_value_x.append(
                    [x for x in test_img.convert("1").getdata()])
            x = np.asarray(two_value_x)

        # process this data
        Xs = {'train': x[0:ntrain], 'test': data['data'][0:ntest]}

        Ys = {'train': y[0:ntrain], 'test': data['labels'][0:ntest]}

        X = N.cast['float32'](Xs[which_set])
        y = Ys[which_set]

        if isinstance(y, list):
            y = np.asarray(y).astype(dtype)

        if which_set == 'test':
            assert y.shape[0] == 10000
            y = y.reshape((y.shape[0], 1))

        max_labels = 10
        if one_hot is not None:
            warnings.warn(
                "the `one_hot` parameter is deprecated. To get "
                "one-hot encoded targets, request that they "
                "live in `VectorSpace` through the `data_specs` "
                "parameter of MNIST's iterator method. "
                "`one_hot` will be removed on or after "
                "September 20, 2014.",
                stacklevel=2)

        if center:
            X -= 127.5
        self.center = center

        if rescale:
            X /= 127.5
        self.rescale = rescale

        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.
            if which_set == 'test':
                other = CIFAR10(which_set='train')
                oX = other.X
                oX /= 255.
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        if start is not None:
            # This needs to come after the prepro so that it doesn't
            # change the pixel means computed above for toronto_prepro
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop, :]
            assert X.shape[0] == y.shape[0]

        if which_set == 'test':
            assert X.shape[0] == 10000

        # view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
        #                                                           axes)
        view_converter = dense_design_matrix.DefaultViewConverter(
            self.img_shape2, axes)

        super(CIFAR10, self).__init__(X=X,
                                      y=y,
                                      view_converter=view_converter,
                                      y_labels=self.n_classes)

        assert not contains_nan(self.X)

        if preprocessor:
            preprocessor.apply(self)
Пример #30
0
    def __init__(self,
                 which_set,
                 center=False,
                 rescale=False,
                 gcn=None,
                 one_hot=False,
                 start=None,
                 stop=None,
                 axes=('b', 0, 1, 'c'),
                 toronto_prepro=False,
                 preprocessor=None):

        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype = 'uint8'
        ntrain = 50000
        nvalid = 0  # artefact, we won't use it
        ntest = 10000

        # we also expose the following details:
        self.img_shape = (3, 32, 32)
        self.img_size = np.prod(self.img_shape)
        self.n_classes = 10
        self.label_names = [
            'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog',
            'horse', 'ship', 'truck'
        ]

        #         # prepare loading
        #         fnames = ['data_batch_%i' % i for i in range(1,6)]
        #         lenx = np.ceil((ntrain + nvalid) / 10000.)*10000
        #         x = np.zeros((lenx,self.img_size), dtype=dtype)
        #         y = np.zeros(lenx, dtype=dtype)
        #
        #         # load train data
        #         nloaded = 0
        #         for i, fname in enumerate(fnames):
        #             data = CIFAR10._unpickle(fname)
        #             x[i*10000:(i+1)*10000, :] = data['data']
        #             y[i*10000:(i+1)*10000] = data['labels']
        #             nloaded += 10000
        #             if nloaded >= ntrain + nvalid + ntest: break;
        #
        #         # load test data
        #         data = CIFAR10._unpickle('test_batch')
        #
        #         # process this data
        #         Xs = {
        #                 'train' : x[0:ntrain],
        #                 'test'  : data['data'][0:ntest]
        #             }
        #
        #         Ys = {
        #                 'train' : y[0:ntrain],
        #                 'test'  : data['labels'][0:ntest]
        #             }

        if which_set == 'train':

            #             pkl = self._unpickle(os.environ['PYLEARN2_DATA_PATH']+
            #                                  'cifar10/pylearn2_gcn_whitened/train.pkl')
            #pkl = self._unpickle(os.environ['PYLEARN2_DATA_PATH']+
            #         'cifar10/pylearn2_gcn_whitened/test.pkl')
            #X = pkl.X
            #y = pkl.y

            X = np.load(os.environ['PYLEARN2_DATA_PATH'] +
                        '/cifar10/train_X.npy')
            y = np.load(os.environ['PYLEARN2_DATA_PATH'] +
                        '/cifar10/train_y.npy')
            X = np.cast['float32'](X)
            y = np.cast['float32'](y)

        elif which_set == 'test':
            #             pkl = self._unpickle(os.environ['PYLEARN2_DATA_PATH']+
            #                                  'cifar10/pylearn2_gcn_whitened/test.pkl')
            #             X = pkl.X
            #             y = pkl.y
            X = np.load(os.environ['PYLEARN2_DATA_PATH'] +
                        '/cifar10/test_X.npy')
            y = np.load(os.environ['PYLEARN2_DATA_PATH'] +
                        '/cifar10/test_y.npy')
            X = np.cast['float32'](X)
            y = np.cast['float32'](y)


#         X = np.cast['float32'](Xs[which_set])
#         y = Ys[which_set]

        if which_set == 'test':
            assert X.shape[0] == 10000

        if isinstance(y, list):
            y = np.asarray(y)

        if center:
            X -= 127.5
        self.center = center

        if rescale:
            X /= 127.5
        self.rescale = rescale

        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.
            if which_set == 'test':
                other = CIFAR10(which_set='train')
                oX = other.X
                oX /= 255.
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        if start is not None:
            # This needs to come after the prepro so that it doesn't change the pixel
            # means computed above for toronto_prepro
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop]
            assert X.shape[0] == y.shape[0]

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)

        if which_set == 'train':
            length = X.shape[0]

            def search_right_label(desired_label, i):
                for idx in xrange(i, length):
                    if y[idx] == desired_label:
                        return idx

            def swap_ele(index, i):
                x_tmp = X[i]
                X[i] = X[index]
                X[index] = x_tmp

                y_tmp = y[i]
                y[i] = y[index]
                y[index] = y_tmp

            desired_label = 0
            for i in xrange(length):
                desired_label = i % 10
                if y[i] != desired_label:
                    index = search_right_label(desired_label, i)
                    swap_ele(index, i)

            for i in xrange(length - 100, length):
                print y[i]

        self.one_hot = one_hot
        if one_hot:
            one_hot = np.zeros((y.shape[0], 10), dtype='float32')
            for i in xrange(y.shape[0]):
                one_hot[i, y[i]] = 1.
            y = one_hot

        super(My_CIFAR10, self).__init__(X=X,
                                         y=y,
                                         view_converter=view_converter)

        assert not np.any(np.isnan(self.X))

        if preprocessor:
            preprocessor.apply(self)
Пример #31
0
    def __init__(self, which_set, center=False, rescale=False, gcn=None,
                 start=None, stop=None, axes=('b', 0, 1, 'c'),
                 toronto_prepro = False, preprocessor = None):
        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype = 'uint8'
        ntrain = 50000
        nvalid = 0  # artefact, we won't use it
        ntest = 10000

        # we also expose the following details:
        self.img_shape = (3, 32, 32)
        self.img_size = numpy.prod(self.img_shape)
        self.n_classes = 10
        self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
                            'dog', 'frog', 'horse', 'ship', 'truck']

        # prepare loading
        fnames = ['data_batch_%i' % i for i in range(1, 6)]
        datasets = {}
        datapath = os.path.join(
            string_utils.preprocess('${PYLEARN2_DATA_PATH}'),
            'cifar10', 'cifar-10-batches-py')
        for name in fnames + ['test_batch']:
            fname = os.path.join(datapath, name)
            if not os.path.exists(fname):
                raise IOError(fname + " was not found. You probably need to "
                              "download the CIFAR-10 dataset by using the "
                              "download script in "
                              "pylearn2/scripts/datasets/download_cifar10.sh "
                              "or manually from "
                              "http://www.cs.utoronto.ca/~kriz/cifar.html")
            datasets[name] = cache.datasetCache.cache_file(fname)

        lenx = numpy.ceil((ntrain + nvalid) / 10000.) * 10000
        x = numpy.zeros((lenx, self.img_size), dtype=dtype)
        y = numpy.zeros((lenx, 1), dtype=dtype)

        # load train data
        nloaded = 0
        for i, fname in enumerate(fnames):
            _logger.info('loading file %s' % datasets[fname])
            data = serial.load(datasets[fname])
            x[i * 10000:(i + 1) * 10000, :] = data['data']
            y[i * 10000:(i + 1) * 10000, 0] = data['labels']
            nloaded += 10000
            if nloaded >= ntrain + nvalid + ntest:
                break

        # load test data
        _logger.info('loading file %s' % datasets['test_batch'])
        data = serial.load(datasets['test_batch'])

        # process this data
        Xs = {'train': x[0:ntrain],
              'test': data['data'][0:ntest]}

        Ys = {'train': y[0:ntrain],
              'test': data['labels'][0:ntest]}

        X = numpy.cast['float32'](Xs[which_set])
        y = Ys[which_set]

        if isinstance(y, list):
            y = numpy.asarray(y).astype(dtype)

        if which_set == 'test':
            assert y.shape[0] == 10000
            y = y.reshape((y.shape[0], 1))

        if center:
            X -= 127.5
        self.center = center

        if rescale:
            X /= 127.5
        self.rescale = rescale

        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.
            if which_set == 'test':
                other = CIFAR10(which_set='train')
                oX = other.X
                oX /= 255.
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        if start is not None:
            # This needs to come after the prepro so that it doesn't
            # change the pixel means computed above for toronto_prepro
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop, :]
            assert X.shape[0] == y.shape[0]

        if which_set == 'test':
            assert X.shape[0] == 10000

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)

        super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter,
                                      y_labels=self.n_classes)

        assert not contains_nan(self.X)

        if preprocessor:
            preprocessor.apply(self)
    def __init__(self, which_set, center=False, rescale=False, gcn=None,
                 start=None, stop=None, axes=('b', 0, 1, 'c'),
                 toronto_prepro = False, preprocessor = None):
        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype = 'uint8'
        ntrain = 50000
        nvalid = 0  # artefact, we won't use it
        ntest = 10000

        # we also expose the following details:
        self.img_shape = (3, 32, 32)
        self.img_size = numpy.prod(self.img_shape)
        self.n_classes = 100
        # make sure that this is working (we can also copy it from meta file)
        self.label_names = range(1900,2000)

        import cPickle
        fo = open('datasets/data_batch')
        dict = cPickle.load(fo)
        fo.close()
        
        lenx = numpy.ceil((ntrain + nvalid) / 10000.) * 10000
        x = numpy.zeros((lenx, self.img_size), dtype=dtype)
        y = numpy.zeros((lenx, 1), dtype=dtype)

        # load train data
        #data = serial.load(datasets[fname])
        x[0:8305,:] = dict['data']
        #x[i * 10000:(i + 1) * 10000, :] = dict['data']
        #y[i * 10000:(i + 1) * 10000, 0] = dict['labels']

        #X = dict['data']
        y[0:8305,0] = dict['labels']
        
        # load test data
        #_logger.info('loading file %s' % datasets['test_batch'])
        #data = serial.load(datasets['test_batch'])

        # process this data
        #Xs = {'train': x[0:ntrain],
        #      'test': data['data'][0:ntest]}

        #Ys = {'train': y[0:ntrain],
        #      'test': data['labels'][0:ntest]}

        X = numpy.cast['float32'](x[0:8305])
        y = y[0:8305]
#        y = Ys[which_set]



        if isinstance(y, list):
            y = numpy.asarray(y).astype(dtype)

        self.center = center

        self.rescale = rescale

        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)

        super(Timeliner, self).__init__(X=X, y=y, view_converter=view_converter,
                                      y_labels=self.n_classes)

        assert not contains_nan(self.X)

        if preprocessor:
            preprocessor.apply(self)
Пример #33
0
if __name__ == "__main__":

    #Load dataset
    train_x, test_x, train_y, test_y = unpack_facedataset(
    )  # 7:3 ratio for train:test

    # preprocess images
    # GCN and ZCA object!!
    # Normalize and then ZCA whitening
    # Normalized data only used on inversion, not in training
    try:
        zca = load("faces/zca.data")
    except Exception as e:
        print("Failed to load preprocessed data from disk, computing zca")
        train_x_normalized = global_contrast_normalize(train_x * 255,
                                                       scale=55.)
        zca = ZCA()
        zca.fit(train_x_normalized)
        save("faces/zca.data", zca)

    x = tf.compat.v1.placeholder(tf.float32, shape=[None, 112 * 92])
    y_ = tf.compat.v1.placeholder(tf.float32, shape=[None, 40])
    model = Model(x, y_)
    session = tf.compat.v1.InteractiveSession()
    session.run(tf.compat.v1.global_variables_initializer())
    #print(f"test {test_y.shape} t: {type(test_y)} ; train {train_y.shape} t: {type(train_y)}")
    model.train(train_x, train_y, session, test_x, test_y, 250)

    perform_inversion(zca, test_x[0::3], model, session)
    # perform_inversion(train_x, test_x[0::3], model, session)
    def __init__(self,
                 lfw_path,
                 filelist_path,
                 embedding_file=None,
                 center=False,
                 scale=False,
                 start=None,
                 stop=None,
                 gcn=None,
                 shuffle=False,
                 rng=None,
                 seed=132987,
                 axes=('b', 0, 1, 'c'),
                 img_shape=(3, 250, 250)):
        self.axes = axes

        self.img_shape = img_shape
        C, H, W = img_shape
        self.img_size = np.prod(self.img_shape)

        files = []
        with open(filelist_path, 'r') as filelist_f:
            files = [line.strip() for line in filelist_f]

        # Load raw pixel integer values
        dtype = 'uint8'
        X = np.zeros((len(files), W, H, C), dtype=dtype)
        img_ids = []

        for i, line in enumerate(files):
            if '\t' in line:
                # New format: contains image IDs
                img_path, img_id = line.strip().split()
                img_ids.append(int(img_id))
            else:
                img_path = line.strip()

            full_path = os.path.join(lfw_path, img_path)
            im = image.load(full_path, rescale_image=False, dtype=dtype)

            # Handle grayscale images which may not have RGB channels
            if len(im.shape) == 2:
                W, H = im.shape

                # Repeat image 3 times across axis 2
                im = im.reshape(W, H, 1).repeat(3, 2)

            # Swap color channel to front
            X[i] = im

        # Cast to float32, center / scale if necessary
        X = np.cast['float32'](X)

        # Create dense design matrix from topological view
        X = X.reshape(X.shape[0], -1)

        # Prepare img_ids
        if embedding_file is not None:
            if len(img_ids) != len(files):
                raise ValueError("You must provide a filelist with indexes "
                                 "into the embedding array for each image.")
        img_ids = np.array(img_ids, dtype='uint32')

        if center and scale:
            X[:] -= 127.5
            X[:] /= 127.5
        elif center:
            X[:] -= 127.5
        elif scale:
            X[:] /= 255.

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        if shuffle:
            rng = make_np_rng(rng, seed, which_method='permutation')
            rand_idx = rng.permutation(len(X))

            X = X[rand_idx]
            img_ids = img_ids[rand_idx]

        if start is not None:
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]

            X = X[start:stop]

            if len(img_ids) > 0:
                img_ids = img_ids[start:stop]

        # Load embeddings if provided
        Y = None
        if embedding_file is not None:
            embeddings = np.load(embedding_file)['arr_0']
            assert embeddings.shape[0] >= len(files)

            Y = embeddings[img_ids].astype(theano.config.floatX)

        # create view converting for retrieving topological view
        self.view_converter = dense_design_matrix.DefaultViewConverter(
            (W, H, C), axes)

        # init super class
        super(LFW, self).__init__(X=X, y=Y)

        assert not contains_nan(self.X)

        # Another hack: rename 'targets' to match model expectations
        if embedding_file is not None:
            space, (X_source, y_source) = self.data_specs
            self.data_specs = (space, (X_source, 'condition'))
Пример #35
0
def test_std_norm():
    rng = numpy.random.RandomState(0)
    X = abs(rng.randn(50, 70))
    Y = global_contrast_normalize(X, use_std=True, scale=5)
    numpy.testing.assert_allclose(Y.std(axis=1, ddof=1), 5)
Пример #36
0
    for i in xrange(0, data_size, batch_size):
        stop = i + numpy.mod(data_size, batch_size) if i >= last else i + batch_size
        data[i:stop, :,:,0] = lecun_lcn(data[i:stop,:,:,0].astype('float32'), img_shape, kernel_size)


    return data


preprocess = True
if preprocess:
    print "Pre-processing the data"
    features = []
    labels = []
    for item, y in zip(data_x, data_y):
        data_shape = item.shape
        item = item / 255.
        item = global_contrast_normalize(item.reshape((data_shape[0], 48*48))).reshape(data_shape)
        item = apply_lcn(item, img_shape = [48,48], kernel_size=5)
        features.append(item.astype('float32'))
        labels.append(y)

    print "Done pre-preprocessing"

data = {'data_x' : features, 'data_y' : labels, 'clip_ids' : clip_ids}

#save_path = "/data/lisa/data/faces/EmotiW/preproc/samira/KGL-AFEW/"
save_path = "/data/lisa/data/faces/EmotiW/preproc/samira/KGLIS-AFEWIS/"
serial.save(save_path + 'afew2_valid_prep.pkl', data)
#serial.save(save_path + 'afew2_train_prep.pkl', data)

Пример #37
0
                                 scale=True)
image.save(saveDir + 'svhn_train.png', 'PNG')
image = paramgraphics.mat_to_img(batch2_data[:100, :].T,
                                 dim_input,
                                 colorImg=colorImg,
                                 scale=True)
image.save(saveDir + 'svhn_valid.png', 'PNG')
image = paramgraphics.mat_to_img(batch3_data[:100, :].T,
                                 dim_input,
                                 colorImg=colorImg,
                                 scale=True)
image.save(saveDir + 'svhn_test.png', 'PNG')

if preprocessing == 'gcn_var':
    batch1_data = pypp.global_contrast_normalize(batch1_data,
                                                 subtract_mean=True,
                                                 use_std=True)
    batch2_data = pypp.global_contrast_normalize(batch2_data,
                                                 subtract_mean=True,
                                                 use_std=True)
    batch3_data = pypp.global_contrast_normalize(batch3_data,
                                                 subtract_mean=True,
                                                 use_std=True)
elif preprocessing == 'gcn_norm':
    batch1_data = pypp.global_contrast_normalize(batch1_data,
                                                 subtract_mean=True)
    batch2_data = pypp.global_contrast_normalize(batch2_data,
                                                 subtract_mean=True)
    batch3_data = pypp.global_contrast_normalize(batch3_data,
                                                 subtract_mean=True)
    print batch1_data.shape
    def __init__(self,
                 which_set,
                 data_path=None,
                 center=True,
                 rescale=True,
                 gcn=True,
                 specs=True,
                 foldi=1,
                 foldn=10,
                 filestr="feature2086-5-{}.pkl"):
        self.class_name = ['neg', 'cin1','cin2','cin3','cancer']
        # load data
        self.specs = specs
        self.filestr = filestr

        if which_set == 'valid':
            i = (foldi) % foldn
            filepath = self.filestr.format(str(i + 1))
            filepath = self.dirpath + filepath
            filepath = serial.preprocess(filepath)
            X, Y = self.loadi(filepath)
        elif which_set == 'test':
            i = (foldi - 1) % foldn
            filepath = self.filestr.format(str(i + 1))
            filepath = self.dirpath + filepath
            filepath = serial.preprocess(filepath)
            X, Y = self.loadi(filepath)
        else:
            indexs = range(foldn)
            i = foldi % foldn
            indexs.pop(i)
            if i == 0:
                indexs.pop(-1)
            else:
                i = (foldi - 1) % foldn
                indexs.pop(i)
            Xs = []
            Ys = []
            for i in indexs:
                filepath = self.filestr.format(str(i + 1))
                filepath = self.dirpath + filepath
                filepath = serial.preprocess(filepath)
                X, Y = self.loadi(filepath)
                Xs.append(X)
                Ys.append(Y)
            X = np.vstack(Xs)
            Y = np.hstack(Ys)

        print X.shape, Y.shape
        # col0s = np.where(Y == 0)[0]
        # print len(col0s)

        X.astype(float)
        axis = 0
        _max = np.max(X, axis=axis)
        _min = np.min(X, axis=axis)
        _mean = np.mean(X, axis=axis)
        _std = np.std(X, axis=axis)
        _scale = _max - _min

        if gcn:
            X[:, :850] = global_contrast_normalize(X[:, :850], scale=gcn)
            X[:, 850:850+556] = global_contrast_normalize(X[:, 850:850 + 556], scale=gcn)
            X[:, 850+556:] = global_contrast_normalize(X[:, 850+556:], scale=gcn)
        # else:
        #     if center:
        #         X[:, ] -= _mean
        #     if rescale:
        #         X[:, ] /= _scale


        # topo_view = X.reshape(X.shape[0], X.shape[1], 1, 1)
        # y = np.reshape(Y, (Y.shape[0], 1))
        # y = np.atleast_2d(Y).T
        self.raw_X = X
        self.raw_y = Y
        y = np.zeros((Y.shape[0], 5))
        for i in range(Y.shape[0]):
            j = Y[i]
            y[i, j] = 1
        # print y[:, :]
        # y[:, 0] = Y
        # y[:, 1] = 1 - Y
        print "Load CIN_FEATURE2086_5 data: {}, with size X:{}, y:{}".format(data_path, X.shape, y.shape)
        super(CIN_FEATURE2086_5, self).__init__(X=X, y=y)
        # super(CIN_FEATURE2, self).__init__(topo_view=topo_view, y=y, y_labels=2)

        if specs:
            assert X.shape[1] == (850 + 556 + 680)
            self.init_data_specs()
            self.feature850 = X[:, 0:850]
            self.feature556 = X[:, 850:850 + 556]
            self.feature680 = X[:, 850 + 556:]
            self.y = y
Пример #39
0
    def __init__(
        self,
        which_set,
        center=False,
        rescale=False,
        gcn=None,
        one_hot=False,
        start=None,
        stop=None,
        axes=("b", 0, 1, "c"),
        toronto_prepro=False,
        preprocessor=None,
    ):

        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype = "uint8"
        ntrain = 50000
        nvalid = 0  # artefact, we won't use it
        ntest = 10000

        # we also expose the following details:
        self.img_shape = (3, 32, 32)
        self.img_size = np.prod(self.img_shape)
        self.n_classes = 10
        self.label_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]

        #         # prepare loading
        #         fnames = ['data_batch_%i' % i for i in range(1,6)]
        #         lenx = np.ceil((ntrain + nvalid) / 10000.)*10000
        #         x = np.zeros((lenx,self.img_size), dtype=dtype)
        #         y = np.zeros(lenx, dtype=dtype)
        #
        #         # load train data
        #         nloaded = 0
        #         for i, fname in enumerate(fnames):
        #             data = CIFAR10._unpickle(fname)
        #             x[i*10000:(i+1)*10000, :] = data['data']
        #             y[i*10000:(i+1)*10000] = data['labels']
        #             nloaded += 10000
        #             if nloaded >= ntrain + nvalid + ntest: break;
        #
        #         # load test data
        #         data = CIFAR10._unpickle('test_batch')
        #
        #         # process this data
        #         Xs = {
        #                 'train' : x[0:ntrain],
        #                 'test'  : data['data'][0:ntest]
        #             }
        #
        #         Ys = {
        #                 'train' : y[0:ntrain],
        #                 'test'  : data['labels'][0:ntest]
        #             }

        if which_set == "train":

            #             pkl = self._unpickle(os.environ['PYLEARN2_DATA_PATH']+
            #                                  'cifar10/pylearn2_gcn_whitened/train.pkl')
            # pkl = self._unpickle(os.environ['PYLEARN2_DATA_PATH']+
            #         'cifar10/pylearn2_gcn_whitened/test.pkl')
            # X = pkl.X
            # y = pkl.y

            X = np.load(os.environ["PYLEARN2_DATA_PATH"] + "/cifar10/train_X.npy")
            y = np.load(os.environ["PYLEARN2_DATA_PATH"] + "/cifar10/train_y.npy")
            X = np.cast["float32"](X)
            y = np.cast["float32"](y)

        elif which_set == "test":
            #             pkl = self._unpickle(os.environ['PYLEARN2_DATA_PATH']+
            #                                  'cifar10/pylearn2_gcn_whitened/test.pkl')
            #             X = pkl.X
            #             y = pkl.y
            X = np.load(os.environ["PYLEARN2_DATA_PATH"] + "/cifar10/test_X.npy")
            y = np.load(os.environ["PYLEARN2_DATA_PATH"] + "/cifar10/test_y.npy")
            X = np.cast["float32"](X)
            y = np.cast["float32"](y)

        #         X = np.cast['float32'](Xs[which_set])
        #         y = Ys[which_set]

        if which_set == "test":
            assert X.shape[0] == 10000

        if isinstance(y, list):
            y = np.asarray(y)

        if center:
            X -= 127.5
        self.center = center

        if rescale:
            X /= 127.5
        self.rescale = rescale

        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.0
            if which_set == "test":
                other = CIFAR10(which_set="train")
                oX = other.X
                oX /= 255.0
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        if start is not None:
            # This needs to come after the prepro so that it doesn't change the pixel
            # means computed above for toronto_prepro
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop]
            assert X.shape[0] == y.shape[0]

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes)

        if which_set == "train":
            length = X.shape[0]

            def search_right_label(desired_label, i):
                for idx in xrange(i, length):
                    if y[idx] == desired_label:
                        return idx

            def swap_ele(index, i):
                x_tmp = X[i]
                X[i] = X[index]
                X[index] = x_tmp

                y_tmp = y[i]
                y[i] = y[index]
                y[index] = y_tmp

            desired_label = 0
            for i in xrange(length):
                desired_label = i % 10
                if y[i] != desired_label:
                    index = search_right_label(desired_label, i)
                    swap_ele(index, i)

            for i in xrange(length - 100, length):
                print y[i]

        self.one_hot = one_hot
        if one_hot:
            one_hot = np.zeros((y.shape[0], 10), dtype="float32")
            for i in xrange(y.shape[0]):
                one_hot[i, y[i]] = 1.0
            y = one_hot

        super(My_CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter)

        assert not np.any(np.isnan(self.X))

        if preprocessor:
            preprocessor.apply(self)
Пример #40
0
    def __init__(self, which_set, center=False, rescale=False, gcn=None,
                 one_hot=None, start=None, stop=None, axes=('b', 0, 1, 'c'),
                 toronto_prepro = False, preprocessor = None, two_image=False):
        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype = 'uint8'
        ntrain = 50000
        nvalid = 0  # artefact, we won't use it
        ntest = 10000

        # we also expose the following details:
        self.img_shape  = (3, 32, 32)
        self.img_shape2 = (32, 32,3)
        self.img_size = N.prod(self.img_shape)
        self.n_classes = 10
        self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
                            'dog', 'frog', 'horse', 'ship', 'truck']

        # prepare loading
        fnames = ['data_batch_%i' % i for i in range(1, 6)]
        lenx = N.ceil((ntrain + nvalid) / 10000.)*10000
        x = N.zeros((lenx, self.img_size), dtype=dtype)
        y = N.zeros((lenx, 1), dtype=dtype)

        # load train data
        nloaded = 0
        for i, fname in enumerate(fnames):
            data = CIFAR10._unpickle(fname)
            x[i*10000:(i+1)*10000, :] = data['data']
            y[i*10000:(i+1)*10000, 0] = data['labels']
            nloaded += 10000
            if nloaded >= ntrain + nvalid + ntest:
                break

        # load test data
        data = CIFAR10._unpickle('test_batch')

        # 2value image
        # can not use other option when you use two_image option
        print x.shape

        if two_image:
            from PIL import Image
            two_value_x = []
            self.img_shape  = (1, 32, 32)
            self.img_shape2 = (32, 32,1)

            for i,pixel in enumerate(x.reshape(50000, 3, 32, 32)):
                if i % 1000 == 0:
                    print i
                pixel = np.transpose(pixel, (1,2,0))
                test_img = Image.new("RGB",(32,32),(255,0,0))
                test_img.putdata([tuple(x.tolist()) for x in pixel.reshape(1024,3)])
                two_value_x.append([x for x in test_img.convert("1").getdata()] )
            x = np.asarray(two_value_x)

        # process this data
        Xs = {'train': x[0:ntrain],
              'test': data['data'][0:ntest]}

        Ys = {'train': y[0:ntrain],
              'test': data['labels'][0:ntest]}

        X = N.cast['float32'](Xs[which_set])
        y = Ys[which_set]

        if isinstance(y, list):
            y = np.asarray(y).astype(dtype)

        if which_set == 'test':
            assert y.shape[0] == 10000
            y = y.reshape((y.shape[0], 1))

        max_labels = 10
        if one_hot is not None:
            warnings.warn("the `one_hot` parameter is deprecated. To get "
                          "one-hot encoded targets, request that they "
                          "live in `VectorSpace` through the `data_specs` "
                          "parameter of MNIST's iterator method. "
                          "`one_hot` will be removed on or after "
                          "September 20, 2014.", stacklevel=2)

        if center:
            X -= 127.5
        self.center = center

        if rescale:
            X /= 127.5
        self.rescale = rescale

        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.
            if which_set == 'test':
                other = CIFAR10(which_set='train')
                oX = other.X
                oX /= 255.
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        if start is not None:
            # This needs to come after the prepro so that it doesn't
            # change the pixel means computed above for toronto_prepro
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop, :]
            assert X.shape[0] == y.shape[0]

        if which_set == 'test':
            assert X.shape[0] == 10000

        # view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
        #                                                           axes)
        view_converter = dense_design_matrix.DefaultViewConverter(self.img_shape2,
                                                                  axes)

        super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter,
                                      y_labels=self.n_classes)

        assert not contains_nan(self.X)

        if preprocessor:
            preprocessor.apply(self)
Пример #41
0
    def __init__(
        self,
        which_set,
        center=False,
        rescale=False,
        gcn=None,
        one_hot=False,
        start=None,
        stop=None,
        axes=("b", 0, 1, "c"),
        toronto_prepro=False,
        preprocessor=None,
    ):
        """
        Parameters
        ----------
        which_set : str
            One of 'train', 'test'
        gcn : float, optional
            Multiplicative constant to use for global contrast normalization.
            No global contrast normalization is applied, if None

        .. todo::

            WRITEME
        """

        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype = "uint8"
        ntrain = 50000
        nvalid = 0  # artefact, we won't use it
        ntest = 10000

        # we also expose the following details:
        self.img_shape = (3, 32, 32)
        self.img_size = N.prod(self.img_shape)
        self.n_classes = 10
        self.label_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]

        # prepare loading
        fnames = ["data_batch_%i" % i for i in range(1, 6)]
        lenx = N.ceil((ntrain + nvalid) / 10000.0) * 10000
        x = N.zeros((lenx, self.img_size), dtype=dtype)
        y = N.zeros(lenx, dtype=dtype)

        # load train data
        nloaded = 0
        for i, fname in enumerate(fnames):
            data = CIFAR10._unpickle(fname)
            x[i * 10000 : (i + 1) * 10000, :] = data["data"]
            y[i * 10000 : (i + 1) * 10000] = data["labels"]
            nloaded += 10000
            if nloaded >= ntrain + nvalid + ntest:
                break

        # load test data
        data = CIFAR10._unpickle("test_batch")

        # process this data
        Xs = {"train": x[0:ntrain], "test": data["data"][0:ntest]}

        Ys = {"train": y[0:ntrain], "test": data["labels"][0:ntest]}

        X = N.cast["float32"](Xs[which_set])
        y = Ys[which_set]

        if isinstance(y, list):
            y = np.asarray(y)

        if which_set == "test":
            assert y.shape[0] == 10000

        self.one_hot = one_hot
        if one_hot:
            one_hot = np.zeros((y.shape[0], 10), dtype="float32")
            for i in xrange(y.shape[0]):
                one_hot[i, y[i]] = 1.0
            y = one_hot

        if center:
            X -= 127.5
        self.center = center

        if rescale:
            X /= 127.5
        self.rescale = rescale

        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.0
            if which_set == "test":
                other = CIFAR10(which_set="train")
                oX = other.X
                oX /= 255.0
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        if start is not None:
            # This needs to come after the prepro so that it doesn't change the pixel
            # means computed above for toronto_prepro
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop]
            assert X.shape[0] == y.shape[0]

        if which_set == "test":
            assert X.shape[0] == 10000

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes)

        super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter)

        assert not np.any(np.isnan(self.X))

        if preprocessor:
            preprocessor.apply(self)
Пример #42
0
    def __init__(self, which_set, center = False, rescale = False, gcn = None,
            one_hot = False, start = None, stop = None, axes=('b', 0, 1, 'c'),
            toronto_prepro = False, preprocessor = None):
        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype  = 'uint8'
        ntrain = 50000
        nvalid = 0  # artefact, we won't use it
        ntest  = 10000

        # we also expose the following details:
        self.img_shape = (3,32,32)
        self.img_size = N.prod(self.img_shape)
        self.n_classes = 10
        self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
                            'dog', 'frog','horse','ship','truck']

        # prepare loading
        fnames = ['data_batch_%i' % i for i in range(1,6)]
        lenx = N.ceil((ntrain + nvalid) / 10000.)*10000
        x = N.zeros((lenx,self.img_size), dtype=dtype)
        y = N.zeros(lenx, dtype=dtype)

        # load train data
        nloaded = 0
        for i, fname in enumerate(fnames):
            data = CIFAR10._unpickle(fname)
            x[i*10000:(i+1)*10000, :] = data['data']
            y[i*10000:(i+1)*10000] = data['labels']
            nloaded += 10000
            if nloaded >= ntrain + nvalid + ntest: break;

        # load test data
        data = CIFAR10._unpickle('test_batch')

        # process this data
        Xs = {
                'train' : x[0:ntrain],
                'test'  : data['data'][0:ntest]
            }

        Ys = {
                'train' : y[0:ntrain],
                'test'  : data['labels'][0:ntest]
            }

        X = N.cast['float32'](Xs[which_set])
        y = Ys[which_set]

        if isinstance(y,list):
            y = np.asarray(y)

        if which_set == 'test':
            assert y.shape[0] == 10000


        self.one_hot = one_hot
        if one_hot:
            one_hot = np.zeros((y.shape[0],10),dtype='float32')
            for i in xrange(y.shape[0]):
                one_hot[i,y[i]] = 1.
            y = one_hot

        if center:
            X -= 127.5
        self.center = center

        if rescale:
            X /= 127.5
        self.rescale = rescale

        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.
            if which_set == 'test':
                other = CIFAR10(which_set='train')
                oX = other.X
                oX /= 255.
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        if start is not None:
            # This needs to come after the prepro so that it doesn't change the pixel
            # means computed above for toronto_prepro
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop]
            assert X.shape[0] == y.shape[0]

        if which_set == 'test':
            assert X.shape[0] == 10000

        view_converter = dense_design_matrix.DefaultViewConverter((32,32,3), axes)

        super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter)

        assert not np.any(np.isnan(self.X))

        if preprocessor:
            preprocessor.apply(self)
Пример #43
0
                             batch_size) if i >= last else i + batch_size
        data[i:stop, :, :,
             0] = lecun_lcn(data[i:stop, :, :, 0].astype('float32'), img_shape,
                            kernel_size)

    return data


preprocess = True
if preprocess:
    print "Pre-processing the data"
    features = []
    labels = []
    for item, y in zip(data_x, data_y):
        data_shape = item.shape
        item = item / 255.
        item = global_contrast_normalize(item.reshape(
            (data_shape[0], 48 * 48))).reshape(data_shape)
        item = apply_lcn(item, img_shape=[48, 48], kernel_size=5)
        features.append(item.astype('float32'))
        labels.append(y)

    print "Done pre-preprocessing"

data = {'data_x': features, 'data_y': labels, 'clip_ids': clip_ids}

#save_path = "/data/lisa/data/faces/EmotiW/preproc/samira/KGL-AFEW/"
save_path = "/data/lisa/data/faces/EmotiW/preproc/samira/KGLIS-AFEWIS/"
serial.save(save_path + 'afew2_valid_prep.pkl', data)
#serial.save(save_path + 'afew2_train_prep.pkl', data)