示例#1
0
def combine_cols(M, lambd, col_names):
    """Return an array that is the function of existing columns

    Parameters
    ----------
    lambd : list of np.array > np.array
        Function that takes a list of columns and produces a single
        column. 
    col_names : list of str
        Names of columns to combine
    """
    utils.check_consistent(M, col_names=col_names)

    new_col = lambd(*[M[name] for name in col_names])
    return new_col
示例#2
0
def combine_cols(M, lambd, col_names):
    """Return an array that is the function of existing columns

    Parameters
    ----------
    lambd : list of np.array > np.array
        Function that takes a list of columns and produces a single
        column. 
    col_names : list of str
        Names of columns to combine
    """
    utils.check_consistent(M, col_names=col_names)

    new_col = lambd(*[M[name] for name in col_names])
    return new_col
示例#3
0
 def test_check_consistent(self):
     M = np.array([(1, 'a', 100), (2, 'b', 200)], dtype=[('f0', int), ('f1', 'O'), 
                                                         ('f2', int)])
     col = np.array([1.0, 2.0])
     col_names = ['f0', 'f1']
     self.assertEqual(utils.check_consistent(M, col, col_names), [M, col, col_names])
     self.assertEqual(utils.check_consistent(M, col), [M, col])
     self.assertEqual(utils.check_consistent(M, col_names=col_names), [M, col_names])
     self.assertEqual(utils.check_consistent(M, col, col_names, n_rows=2, n_cols=3), 
                      [M, col, col_names])
     self.assertRaises(ValueError, utils.check_consistent, {})
     self.assertRaises(ValueError, utils.check_consistent, M, n_rows=7)
     self.assertRaises(ValueError, utils.check_consistent, M, n_cols=7)
     self.assertRaises(ValueError, utils.check_consistent, M, col, {})
     self.assertRaises(ValueError, utils.check_consistent, M, np.array([1.0, 2.0, 3.0]))
     self.assertRaises(ValueError, utils.check_consistent, M, col_names=['f0', 'not_a_col'])
示例#4
0
 def __init__(self,
              M,
              labels,
              clfs=[{
                  'clf': RandomForestClassifier
              }],
              subsets=[{
                  'subset': s_i.SubsetNoSubset
              }],
              cvs=[{
                  'cv': KFold
              }],
              trials=None):
     if M is not None:
         if utils.is_nd(M) and not utils.is_sa(M):
             # nd_array, short circuit the usual type checking and coersion
             if M.ndim != 2:
                 raise ValueError('Expected 2-dimensional array for M')
             self.M = M
             self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])]
             self.labels = utils.check_col(labels,
                                           n_rows=M.shape[0],
                                           argument_name='labels')
         else:
             # M is either a structured array or something that should
             # be converted
             (M, self.labels) = utils.check_consistent(
                 M, labels, col_argument_name='labels')
             self.col_names = M.dtype.names
             self.M = utils.cast_np_sa_to_nd(M)
     else:
         self.col_names = None
     if trials is None:
         clfs = utils.check_arguments(
             clfs, {'clf': lambda clf: issubclass(clf, BaseEstimator)},
             optional_keys_take_lists=True,
             argument_name='clfs')
         subsets = utils.check_arguments(subsets, {
             'subset':
             lambda subset: issubclass(subset, s_i.BaseSubsetIter)
         },
                                         optional_keys_take_lists=True,
                                         argument_name='subsets')
         cvs = utils.check_arguments(
             cvs, {'cv': lambda cv: issubclass(cv, _PartitionIterator)},
             optional_keys_take_lists=True,
             argument_name='cvs')
     self.clfs = clfs
     self.subsets = subsets
     self.cvs = cvs
     self.trials = trials
示例#5
0
 def __init__(
         self, 
         M, 
         labels, 
         clfs=[{'clf': RandomForestClassifier}], 
         subsets=[{'subset': s_i.SubsetNoSubset}], 
         cvs=[{'cv': KFold}],
         trials=None):
     if M is not None:
         if utils.is_nd(M) and not utils.is_sa(M):
             # nd_array, short circuit the usual type checking and coersion
             if M.ndim != 2:
                 raise ValueError('Expected 2-dimensional array for M')
             self.M = M
             self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])]
             self.labels = utils.check_col(
                     labels, 
                     n_rows=M.shape[0], 
                     argument_name='labels')
         else:    
             # M is either a structured array or something that should
             # be converted
             (M, self.labels) = utils.check_consistent(
                     M, 
                     labels, 
                     col_argument_name='labels')
             self.col_names = M.dtype.names
             self.M = utils.cast_np_sa_to_nd(M)
     else:
         self.col_names = None
     if trials is None:
         clfs = utils.check_arguments(
                 clfs, 
                 {'clf': lambda clf: issubclass(clf, BaseEstimator)},
                 optional_keys_take_lists=True,
                 argument_name='clfs')
         subsets = utils.check_arguments(
                 subsets,
                 {'subset': lambda subset: issubclass(subset, s_i.BaseSubsetIter)},
                 optional_keys_take_lists=True,
                 argument_name='subsets')
         cvs = utils.check_arguments(
                 cvs,
                 {'cv': lambda cv: issubclass(cv, _PartitionIterator)},
                 optional_keys_take_lists=True,
                 argument_name='cvs')
     self.clfs = clfs
     self.subsets = subsets
     self.cvs = cvs
     self.trials = trials