Пример #1
0
 def test_make_csv(self):
     M, y = uft.generate_test_matrix(1000, 5, 2, random_state=0)
     clfs = [{
         'clf': RandomForestClassifier,
         'n_estimators': [10, 100],
         'max_depth': [5, 25],
         'random_state': [0]
     }, {
         'clf': SVC,
         'kernel': ['linear', 'rbf'],
         'probability': [True],
         'random_state': [0]
     }]
     subsets = [{
         'subset': per.SubsetSweepNumRows,
         'num_rows': [[100, 200]],
         'random_state': [0]
     }]
     cvs = [{'cv': StratifiedKFold, 'n_folds': [2, 3]}]
     exp = per.Experiment(M, y, clfs=clfs, subsets=subsets, cvs=cvs)
     result_path = exp.make_csv()
     ctrl_path = os.path.join(REFERENCE_PKL_PATH, 'make_csv.csv')
     with open(result_path) as result:
         with open(ctrl_path) as ctrl:
             self.assertEqual(result.read(), ctrl.read())
Пример #2
0
 def test_subsetting(self):
     M, y = uft.generate_test_matrix(1000, 5, 2, random_state=0)
     subsets = [{
         'subset': per.SubsetRandomRowsEvenDistribution,
         'subset_size': [20],
         'random_state': [0]
     }, {
         'subset': per.SubsetRandomRowsActualDistribution,
         'subset_size': [20],
         'random_state': [0]
     }, {
         'subset': per.SubsetSweepNumRows,
         'num_rows': [[10, 20, 30]],
         'random_state': [0]
     }, {
         'subset': per.SubsetSweepVaryStratification,
         'proportions_positive': [[.5, .75, .9]],
         'subset_size': [10],
         'random_state': [0]
     }]
     exp = per.Experiment(M, y, subsets=subsets)
     exp.run()
     result = {
         str(trial): frozenset([str(run) for run in trial.runs])
         for trial in exp.trials
     }
     self.__compare_to_ref_pkl(result, 'test_subsetting')
Пример #3
0
 def test_report_complex(self):
     M, y = uft.generate_test_matrix(100, 5, 2)
     clfs = [{
         'clf': RandomForestClassifier,
         'n_estimators': [10, 100],
         'max_depth': [1, 10],
         'random_state': [0]
     }, {
         'clf': SVC,
         'kernel': ['linear', 'rbf'],
         'probability': [True],
         'random_state': [0]
     }]
     subsets = [{
         'subset': per.SubsetRandomRowsActualDistribution,
         'subset_size': [20, 40, 60, 80, 100],
         'random_state': [0]
     }]
     cvs = [{'cv': StratifiedKFold}]
     exp = per.Experiment(M, y, clfs, subsets, cvs)
     _, rep = exp.make_report(dimension=per.CLF,
                              return_report_object=True,
                              verbose=False)
     self.report.add_heading('test_report_complex', 1)
     self.report.add_subreport(rep)
Пример #4
0
 def test_operate(self):
     M, y = uft.generate_test_matrix(100, 5, 2, random_state=0)
     cvs = [{'cv': StratifiedKFold}]
     for label, clfs in zip(('std',), (op.DBG_std_clfs,)):
         exp = per.Experiment(M, y, clfs=clfs, cvs=cvs)
         result = {str(key) : val for key, val in 
                   exp.average_score().iteritems()}
         self.__compare_to_ref_pkl(
                 result, 
                 'test_operate_{}'.format(label))
Пример #5
0
 def test_std_clfs(self):
     M, y = uft.generate_test_matrix(100, 5, 2, random_state=0)
     cvs = [{'cv': StratifiedKFold}]
     for label, clfs in zip(('std',), (per.DBG_std_clfs,)):
         exp = per.Experiment(M, y, clfs=clfs, cvs=cvs)
         exp.run()
         result = {str(trial) for trial in exp.trials}
         self.__compare_to_ref_pkl(
                 result, 
                 'test_operate_{}'.format(label))
Пример #6
0
 def test_operate(self):
     M, y = uft.generate_test_matrix(100, 5, 2, random_state=0)
     cvs = [{'cv': StratifiedKFold}]
     for label, clfs in zip(('std', ), (op.DBG_std_clfs, )):
         exp = per.Experiment(M, y, clfs=clfs, cvs=cvs)
         result = {
             str(key): val
             for key, val in exp.average_score().iteritems()
         }
         self.__compare_to_ref_pkl(result, 'test_operate_{}'.format(label))
Пример #7
0
 def test_report_simple(self):
     M, y = uft.generate_test_matrix(100, 5, 2, random_state=0)
     clfs = [{'clf': RandomForestClassifier, 
              'n_estimators': [10, 100, 1000],
              'random_state': [0]}]
     cvs = [{'cv': StratifiedKFold}]
     exp = per.Experiment(M, y, clfs=clfs, cvs=cvs)
     _, rep = exp.make_report(return_report_object=True, verbose=False)
     self.report.add_heading('test_report_simple', 1)
     self.report.add_subreport(rep)
Пример #8
0
 def test_report_simple(self):
     M, y = uft.generate_test_matrix(100, 5, 2, random_state=0)
     clfs = [{
         'clf': RandomForestClassifier,
         'n_estimators': [10, 100, 1000],
         'random_state': [0]
     }]
     cvs = [{'cv': StratifiedKFold}]
     exp = per.Experiment(M, y, clfs=clfs, cvs=cvs)
     _, rep = exp.make_report(return_report_object=True, verbose=False)
     self.report.add_heading('test_report_simple', 1)
     self.report.add_subreport(rep)
Пример #9
0
 def test_make_csv(self):
     M, y = uft.generate_test_matrix(1000, 5, 2, random_state=0)
     clfs = [{'clf': RandomForestClassifier, 
              'n_estimators': [10, 100], 
              'max_depth': [5, 25],
              'random_state': [0]},
             {'clf': SVC, 
              'kernel': ['linear', 'rbf'], 
              'probability': [True],
              'random_state': [0]}]        
     subsets = [{'subset': per.SubsetSweepNumRows, 
                 'num_rows': [[100, 200]],
                 'random_state': [0]}]
     cvs = [{'cv': StratifiedKFold, 
             'n_folds': [2, 3]}]
     exp = per.Experiment(M, y, clfs=clfs, subsets=subsets, cvs=cvs)
     result_path = exp.make_csv()
Пример #10
0
 def test_report_complex(self):
     M, y = uft.generate_test_matrix(100, 5, 2)
     clfs = [{'clf': RandomForestClassifier, 
              'n_estimators': [10, 100], 
              'max_depth': [1, 10],
              'random_state': [0]}, 
              {'clf': SVC, 
               'kernel': ['linear', 'rbf'], 
               'probability': [True],
               'random_state': [0]}]        
     subsets = [{'subset': per.SubsetRandomRowsActualDistribution, 
                 'subset_size': [20, 40, 60, 80, 100],
                 'random_state': [0]}]
     cvs = [{'cv': StratifiedKFold}]
     exp = per.Experiment(M, y, clfs, subsets, cvs)
     _, rep = exp.make_report(dimension=per.CLF, return_report_object=True, 
                              verbose=False)
     self.report.add_heading('test_report_complex', 1)
     self.report.add_subreport(rep)
Пример #11
0
 def test_subsetting(self):
     M, y = uft.generate_test_matrix(1000, 5, 2, random_state=0)
     subsets = [{'subset': per.SubsetRandomRowsEvenDistribution, 
                 'subset_size': [20],
                 'random_state': [0]},
                {'subset': per.SubsetRandomRowsActualDistribution, 
                 'subset_size': [20],
                 'random_state': [0]},
                {'subset': per.SubsetSweepNumRows, 
                 'num_rows': [[10, 20, 30]],
                 'random_state': [0]},
                {'subset': per.SubsetSweepVaryStratification, 
                 'proportions_positive': [[.5, .75, .9]],
                 'subset_size': [10],
                 'random_state': [0]}]
     exp = per.Experiment(M, y, subsets=subsets)
     exp.run()
     result = {str(trial) : frozenset([str(run) for run in trial.runs]) for 
               trial in exp.trials}
     self.__compare_to_ref_pkl(result, 'test_subsetting')
Пример #12
0
 def test_get_top_features(self):
     M, labels = uft.generate_test_matrix(1000, 15, random_state=0)
     M = utils.cast_np_sa_to_nd(M)
     M_train, M_test, labels_train, labels_test = train_test_split(
         M, labels)
     clf = RandomForestClassifier(random_state=0)
     clf.fit(M_train, labels_train)
     res = comm.get_top_features(clf, M, verbose=False)
     ctrl = utils.convert_to_sa([('f5', 0.0773838526068),
                                 ('f13', 0.0769596713039),
                                 ('f8', 0.0751584839431),
                                 ('f6', 0.0730815879102),
                                 ('f11', 0.0684456133071),
                                 ('f9', 0.0666747414603),
                                 ('f10', 0.0659621889608),
                                 ('f7', 0.0657988099065),
                                 ('f2', 0.0634000069218),
                                 ('f0', 0.0632912268319)],
                                col_names=('feat_name', 'score'))
     self.assertTrue(uft.array_equal(ctrl, res))
Пример #13
0
    def test_get_top_features(self):
        M, labels = uft.generate_test_matrix(1000, 15, random_state=0)
        M = utils.cast_np_sa_to_nd(M)
        M_train, M_test, labels_train, labels_test = train_test_split(
                M, 
                labels)
        clf = RandomForestClassifier(random_state=0)
        clf.fit(M_train, labels_train)

        ctrl_feat_importances = clf.feature_importances_
        ctrl_col_names = ['f{}'.format(i) for i in xrange(15)]
        ctrl_feat_ranks = np.argsort(ctrl_feat_importances)[::-1][:10]
        ctrl = utils.convert_to_sa(
                zip(ctrl_col_names, ctrl_feat_importances),
                col_names=('feat_name', 'score'))[ctrl_feat_ranks]

        res = dsp.get_top_features(clf, M, verbose=False)
        self.assertTrue(uft.array_equal(ctrl, res))

        res = dsp.get_top_features(clf, col_names=['f{}'.format(i) for i in xrange(15)], verbose=False)
        self.assertTrue(uft.array_equal(ctrl, res))
Пример #14
0
 def test_make_csv(self):
     M, y = uft.generate_test_matrix(1000, 5, 2, random_state=0)
     clfs = [{'clf': RandomForestClassifier, 
              'n_estimators': [10, 100], 
              'max_depth': [5, 25],
              'random_state': [0]},
             {'clf': SVC, 
              'kernel': ['linear', 'rbf'], 
              'probability': [True],
              'random_state': [0]}]        
     subsets = [{'subset': per.SubsetSweepNumRows, 
                 'num_rows': [[100, 200]],
                 'random_state': [0]}]
     cvs = [{'cv': StratifiedKFold, 
             'n_folds': [2, 3]}]
     exp = per.Experiment(M, y, clfs=clfs, subsets=subsets, cvs=cvs)
     result_path = exp.make_csv()
     ctrl_path = os.path.join(REFERENCE_PKL_PATH, 'make_csv.csv')
     with open(result_path) as result:
         with open(ctrl_path) as ctrl:
             self.assertEqual(result.read(), ctrl.read())
Пример #15
0
 def test_get_top_features(self):
     M, labels = uft.generate_test_matrix(1000, 15, random_state=0)
     M = utils.cast_np_sa_to_nd(M)
     M_train, M_test, labels_train, labels_test = train_test_split(
             M, 
             labels)
     clf = RandomForestClassifier(random_state=0)
     clf.fit(M_train, labels_train)
     res = comm.get_top_features(clf, M, verbose=False)
     ctrl = utils.convert_to_sa(
             [('f5',  0.0773838526068), 
              ('f13',   0.0769596713039),
              ('f8',  0.0751584839431),
              ('f6',  0.0730815879102),
              ('f11',   0.0684456133071),
              ('f9',  0.0666747414603),
              ('f10',   0.0659621889608),
              ('f7',  0.0657988099065),
              ('f2',  0.0634000069218),
              ('f0',  0.0632912268319)],
             col_names=('feat_name', 'score'))
     self.assertTrue(uft.array_equal(ctrl, res))
Пример #16
0
 def test_generate_matrix(self):
     M, y = utils_for_tests.generate_test_matrix(100, 5, 3, [float, str, int])
     print M
     print y
Пример #17
0
 def test_generate_matrix(self):
     M, y = utils_for_tests.generate_test_matrix(100, 5, 3,
                                                 [float, str, int])
     print M
     print y