def load_data(self, type='train', batch=0, verbose=2): """ Will load the data from the file and will return the data. The important thing to note is that all the datasets in :mod: ``yann`` all require a ``y`` or a variable to predict. In case of auto-encoder for instance, the thing to predict is the image itself. Setup dataset thusly. Args: type: ``train``, ``test`` or ``valid``. default is ``train`` batch: Supply an integer verbose: Simliar to verbose in toolbox. Returns: numpy.ndarray: ``data_x, data_y`` """ if verbose >= 3: print "... loading " + type + " data batch " + str(batch) f = open(self.dataset + '/' + type + '/batch_' + str(batch) + '.pkl', 'rb') data_x, data_y = cPickle.load(f) f.close() if verbose >= 3: print "... data is loaded" data_x = check_type(data_x, theano.config.floatX) data_y = check_type(data_y, theano.config.floatX) # Theano recommends storing on gpus only as floatX and casts them to ints during use. # I don't know why, but I am following their recommendations blindly. return data_x, data_y
def load_data (self, type = 'train', batch = 0, verbose = 2): """ Will load the data from the file and will return the data. The important thing to note is that all the datasets in :mod: ``yann`` all require a ``y`` or a variable to predict. In case of auto-encoder for instance, the thing to predict is the image itself. Setup dataset thusly. Args: type: ``train``, ``test`` or ``valid``. default is ``train`` batch: Supply an integer verbose: Simliar to verbose in toolbox. Returns: numpy.ndarray: ``data_x, data_y`` """ if verbose >= 3: print "... loading " + type + " data batch " + str(batch) f = open(self.dataset + '/' + type + '/batch_' + str(batch) +'.pkl', 'rb') data_x, data_y = cPickle.load(f) f.close() if verbose >= 3: print "... data is loaded" data_x = check_type (data_x, theano.config.floatX) data_y = check_type (data_y, theano.config.floatX) # Theano recommends storing on gpus only as floatX and casts them to ints during use. # I don't know why, but I am following their recommendations blindly. return data_x, data_y
def one_hot_labels(self, y, verbose=1): """ Function takes in labels and returns a one-hot encoding. Used for max-margin loss. Args: y: Labels to be encoded.n_classes verbose: Typical as in the rest of the toolbox. Notes: ``self.n_classes``: Number of unique classes in the labels. This could be found out using the following: .. code-block: python import numpy n_classes = len(numpy.unique(y)) This might be potentially dangerous in case of cached dataset. Although this is the default if ``n_classes`` is not provided as input to this module, I discourage anyone from using this. Returns: numpy ndarray: one-hot encoded label list. """ if self.n_classes is False: if verbose >= 3: print "... Making a decision to create n_classes variable, not a good idea." self.n_classes = len(numpy.unique(y)) # found this technique online somewhere, forgot where couldn't cite. y1 = -1 * numpy.ones((y.shape[0], self.n_classes)) y1[numpy.arange(y.shape[0]), y] = 1 y1 = check_type(y1, theano.config.floatX) return y1
def one_hot_labels (self, y, verbose = 1): """ Function takes in labels and returns a one-hot encoding. Used for max-margin loss. Args: y: Labels to be encoded.n_classes verbose: Typical as in the rest of the toolbox. Notes: ``self.n_classes``: Number of unique classes in the labels. This could be found out using the following: .. code-block: python import numpy n_classes = len(numpy.unique(y)) This might be potentially dangerous in case of cached dataset. Although this is the default if ``n_classes`` is not provided as input to this module, I discourage anyone from using this. Returns: numpy ndarray: one-hot encoded label list. """ if self.n_classes is False: if verbose >= 3: print "... Making a decision to create n_classes variable, not a good idea." self.n_classes = len(numpy.unique(y)) # found this technique online somewhere, forgot where couldn't cite. y1 = -1 * numpy.ones((y.shape[0], self.n_classes)) y1[numpy.arange(y.shape[0]), y] = 1 y1 = check_type(y1, theano.config.floatX) return y1