Exemplo n.º 1
0
def load_data(conf):
    """
    Loads a specified dataset according to the parameters in the dictionary

    Parameters
    ----------
    conf : WRITEME

    Returns
    -------
    WRITEME
    """
    logger.info('... loading dataset')

    # Special case for sparse format
    if conf.get('sparse', False):
        expected = inspect.getargspec(load_sparse_dataset)[0][1:]
        data = load_sparse_dataset(conf['dataset'], **subdict(conf, expected))
        valid, test = data[1:3]

        # Sparse TERRY data on LISA servers contains an extra null first row in
        # valid and test subsets.
        if conf['dataset'] == 'terry':
            valid = valid[1:]
            test = test[1:]
            assert valid.shape[0] == test.shape[0] == 4096, \
                'Sparse TERRY data loaded has wrong number of examples'

        if len(data) == 3:
            return [data[0], valid, test]
        else:
            return [data[0], valid, test, data[3]]

    # Load as the usual ndarray
    expected = inspect.getargspec(load_ndarray_dataset)[0][1:]
    data = load_ndarray_dataset(conf['dataset'], **subdict(conf, expected))

    # Special case for on-the-fly normalization
    if conf.get('normalize_on_the_fly', False):
        return data

    # Allocate shared variables
    def shared_dataset(data_x):
        """Function that loads the dataset into shared variables"""
        if conf.get('normalize', True):
            return sharedX(data_x, borrow=True)
        else:
            return theano.shared(theano._asarray(data_x), borrow=True)

    return map(shared_dataset, data)
Exemplo n.º 2
0
def load_data(conf):
    """
    Loads a specified dataset according to the parameters in the dictionary

    Parameters
    ----------
    conf : WRITEME

    Returns
    -------
    WRITEME
    """
    print '... loading dataset'

    # Special case for sparse format
    if conf.get('sparse', False):
        expected = inspect.getargspec(load_sparse_dataset)[0][1:]
        data = load_sparse_dataset(conf['dataset'], **subdict(conf, expected))
        valid, test = data[1:3]

        # Sparse TERRY data on LISA servers contains an extra null first row in
        # valid and test subsets.
        if conf['dataset'] == 'terry':
            valid = valid[1:]
            test = test[1:]
            assert valid.shape[0] == test.shape[0] == 4096, \
                'Sparse TERRY data loaded has wrong number of examples'

        if len(data) == 3:
            return [data[0], valid, test]
        else:
            return [data[0], valid, test, data[3]]

    # Load as the usual ndarray
    expected = inspect.getargspec(load_ndarray_dataset)[0][1:]
    data = load_ndarray_dataset(conf['dataset'], **subdict(conf, expected))

    # Special case for on-the-fly normalization
    if conf.get('normalize_on_the_fly', False):
        return data

    # Allocate shared variables
    def shared_dataset(data_x):
        """Function that loads the dataset into shared variables"""
        if conf.get('normalize', True):
            return sharedX(data_x, borrow=True)
        else:
            return theano.shared(theano._asarray(data_x), borrow=True)

    return map(shared_dataset, data)
Exemplo n.º 3
0
def create_submission(conf, transform_valid, transform_test=None, features=None):
    """
    Create a submission file given a configuration dictionary and a
    computation function.

    Note that it always reload the datasets to ensure valid & test
    are not permuted.
    """
    if transform_test is None:
        transform_test = transform_valid

    # Load the dataset, without permuting valid and test
    kwargs = subdict(conf, ['dataset', 'normalize', 'normalize_on_the_fly', 'sparse'])
    kwargs.update(randomize_valid=False, randomize_test=False)
    valid_set, test_set = load_data(kwargs)[1:3]

    # Sparse datasets are not stored as Theano shared vars.
    if not conf.get('sparse', False):
        valid_set = valid_set.get_value(borrow=True)
        test_set = test_set.get_value(borrow=True)

    # Prefilter features, if needed.
    if features is not None:
        valid_set = valid_set[:, features]
        test_set = test_set[:, features]

    # Valid and test representations
    valid_repr = transform_valid(valid_set)
    test_repr = transform_test(test_set)

    # Convert into text info
    save_submission(conf, valid_repr, test_repr)
Exemplo n.º 4
0
 def fromdict(cls, conf, **kwargs):
     """ Alternative way to build a block, by using a dictionary """
     arglist = []
     kwargs.update(conf)
     # Loop over all superclasses of cls
     # NB : Supposes that "cls" is the first element returned by "getmro()"
     for elem in inspect.getmro(cls):
         # Extend arglist with arguments of elem.__init__
         argspec = inspect.getargspec(elem.__init__)
         arglist.extend(argspec[0])
         # If a keyworkds argument is not expected, then break the loop
         if argspec[2] is None:
             break
     # Build the class with appropriated arguments
     return cls(**subdict(kwargs, arglist))
Exemplo n.º 5
0
 def fromdict(cls, conf, **kwargs):
     """ Alternative way to build a block, by using a dictionary """
     arglist = []
     kwargs.update(conf)
     # Loop over all superclasses of cls
     # NB : Supposes that "cls" is the first element returned by "getmro()"
     for elem in inspect.getmro(cls):
         # Extend arglist with arguments of elem.__init__
         argspec = inspect.getargspec(elem.__init__)
         arglist.extend(argspec[0])
         # If a keyworkds argument is not expected, then break the loop
         if argspec[2] is None:
             break
     # Build the class with appropriated arguments
     return cls(**subdict(kwargs, arglist))
Exemplo n.º 6
0
def create_submission(conf,
                      transform_valid,
                      transform_test=None,
                      features=None):
    """
    Create a submission file given a configuration dictionary and a
    computation function.

    Note that it always reload the datasets to ensure valid & test
    are not permuted.

    Parameters
    ----------
    transform_valid : WRITEME
    transform_test : WRITEME
    features : WRITEME
    """
    if transform_test is None:
        transform_test = transform_valid

    # Load the dataset, without permuting valid and test
    kwargs = subdict(
        conf, ['dataset', 'normalize', 'normalize_on_the_fly', 'sparse'])
    kwargs.update(randomize_valid=False, randomize_test=False)
    valid_set, test_set = load_data(kwargs)[1:3]

    # Sparse datasets are not stored as Theano shared vars.
    if not conf.get('sparse', False):
        valid_set = valid_set.get_value(borrow=True)
        test_set = test_set.get_value(borrow=True)

    # Prefilter features, if needed.
    if features is not None:
        valid_set = valid_set[:, features]
        test_set = test_set[:, features]

    # Valid and test representations
    valid_repr = transform_valid(valid_set)
    test_repr = transform_test(test_set)

    # Convert into text info
    save_submission(conf, valid_repr, test_repr)