def __init__(self, M, labels, clfs=[{ 'clf': RandomForestClassifier }], subsets=[{ 'subset': s_i.SubsetNoSubset }], cvs=[{ 'cv': KFold }], trials=None): if M is not None: if utils.is_nd(M) and not utils.is_sa(M): # nd_array, short circuit the usual type checking and coersion if M.ndim != 2: raise ValueError('Expected 2-dimensional array for M') self.M = M self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])] self.labels = utils.check_col(labels, n_rows=M.shape[0], argument_name='labels') else: # M is either a structured array or something that should # be converted (M, self.labels) = utils.check_consistent( M, labels, col_argument_name='labels') self.col_names = M.dtype.names self.M = utils.cast_np_sa_to_nd(M) else: self.col_names = None if trials is None: clfs = utils.check_arguments( clfs, {'clf': lambda clf: issubclass(clf, BaseEstimator)}, optional_keys_take_lists=True, argument_name='clfs') subsets = utils.check_arguments(subsets, { 'subset': lambda subset: issubclass(subset, s_i.BaseSubsetIter) }, optional_keys_take_lists=True, argument_name='subsets') cvs = utils.check_arguments( cvs, {'cv': lambda cv: issubclass(cv, _PartitionIterator)}, optional_keys_take_lists=True, argument_name='cvs') self.clfs = clfs self.subsets = subsets self.cvs = cvs self.trials = trials
def __init__( self, M, labels, clfs=[{'clf': RandomForestClassifier}], subsets=[{'subset': s_i.SubsetNoSubset}], cvs=[{'cv': KFold}], trials=None): if M is not None: if utils.is_nd(M) and not utils.is_sa(M): # nd_array, short circuit the usual type checking and coersion if M.ndim != 2: raise ValueError('Expected 2-dimensional array for M') self.M = M self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])] self.labels = utils.check_col( labels, n_rows=M.shape[0], argument_name='labels') else: # M is either a structured array or something that should # be converted (M, self.labels) = utils.check_consistent( M, labels, col_argument_name='labels') self.col_names = M.dtype.names self.M = utils.cast_np_sa_to_nd(M) else: self.col_names = None if trials is None: clfs = utils.check_arguments( clfs, {'clf': lambda clf: issubclass(clf, BaseEstimator)}, optional_keys_take_lists=True, argument_name='clfs') subsets = utils.check_arguments( subsets, {'subset': lambda subset: issubclass(subset, s_i.BaseSubsetIter)}, optional_keys_take_lists=True, argument_name='subsets') cvs = utils.check_arguments( cvs, {'cv': lambda cv: issubclass(cv, _PartitionIterator)}, optional_keys_take_lists=True, argument_name='cvs') self.clfs = clfs self.subsets = subsets self.cvs = cvs self.trials = trials
def __check_args_row_select(M, args, argument_name='arguments'): return utils.check_arguments(args, { 'func': __valid_col_select_func, 'vals': None, 'col_name': lambda name: name in M.dtype.names }, argument_name=argument_name)
def test_check_arguments(self): row_reqs = {'func': lambda f: hasattr(f, '__call__'), 'vals': None, 'col_name': lambda c: isinstance(c, str)} col_reqs = {'func': lambda f: hasattr(f, '__call__'), 'vals': None} clfs_reqs = {'clf': lambda c: isinstance(c, type) and issubclass(c, BaseEstimator)} row_valid = [{'func': modify.row_val_eq, 'vals': 8, 'col_name': 'f0'}, {'func': modify.row_val_between, 'vals': (1, 2), 'col_name' : 'f1'}] col_valid = [{'func': modify.col_random, 'vals': 7}, {'func': modify.col_val_eq, 'vals': 2}] clfs_valid = [{'clf': RandomForestClassifier, 'n_estimators': [1, 10], 'max_features': [10, 100, 1000]}, {'clf': SVC, 'kernel': ['linear', 'poly']}] utils.check_arguments(row_valid, row_reqs) utils.check_arguments(col_valid, col_reqs) utils.check_arguments(clfs_valid, clfs_reqs, optional_keys_take_lists=True) self.assertRaises(ValueError, utils.check_arguments, [2, row_valid[0]], row_reqs) self.assertRaises(ValueError, utils.check_arguments, row_valid[0], row_reqs) row_invalid1 = [{'func': modify.row_val_eq, 'vals': 8, 'col_name': 'f0'}, {'func': 7, 'vals': (1, 2), 'col_name' : 'f1'}] self.assertRaises(ValueError, utils.check_arguments, row_invalid1, row_reqs) row_invalid2 = [{'func': modify.row_val_eq, 'vals': 8, 'col_name': 'f0'}, {'func': modify.row_val_between, 'vals': (1, 2)}] self.assertRaises(ValueError, utils.check_arguments, row_invalid2, row_reqs) clfs_invalid1 = [{'clf': RandomForestClassifier(), 'n_estimators': [1, 10], 'max_features': [10, 100, 1000]}] self.assertRaises(ValueError, utils.check_arguments, clfs_invalid1, clfs_reqs, optional_keys_take_lists=True) clfs_invalid2 = [{'clf': RandomForestClassifier, 'n_estimators': 1, 'max_features': [10, 100, 1000]}] self.assertRaises(ValueError, utils.check_arguments, clfs_invalid2, clfs_reqs, optional_keys_take_lists=True)
def __check_args_row_select(M, args, argument_name='arguments'): return utils.check_arguments(args, {'func': __valid_col_select_func, 'vals': None, 'col_name': lambda name: name in M.dtype.names}, argument_name)
def __check_args_col_select(args, argument_name='arguments'): return utils.check_arguments(args, {'func': __valid_col_select_func, 'vals': None}, argument_name)
def __check_args_col_select(args, argument_name='arguments'): return utils.check_arguments(args, { 'func': __valid_col_select_func, 'vals': None }, argument_name=argument_name)